xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600Instructions.td (revision 3f5d875a27318a909f23a2b7463c4b2d963085df)
1//===-- R600Instructions.td - R600 Instruction defs  -------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// TableGen definitions for instructions which are available on R600 family
10// GPUs.
11//
12//===----------------------------------------------------------------------===//
13
14include "R600InstrFormats.td"
15
16// FIXME: Should not be arbitrarily split from other R600 inst classes.
17class R600WrapperInst <dag outs, dag ins, string asm = "", list<dag> pattern = []> :
18  AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
19  let SubtargetPredicate = isR600toCayman;
20  let Namespace = "R600";
21}
22
23
24class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> :
25    InstR600 <outs, ins, asm, pattern, NullALU> {
26
27}
28
29def MEMxi : Operand<iPTR> {
30  let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index);
31  let PrintMethod = "printMemOperand";
32}
33
34def MEMrr : Operand<iPTR> {
35  let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index);
36}
37
38// Operands for non-registers
39
40class InstFlag<string PM = "printOperand", int Default = 0>
41    : OperandWithDefaultOps <i32, (ops (i32 Default))> {
42  let PrintMethod = PM;
43}
44
45// src_sel for ALU src operands, see also ALU_CONST, ALU_PARAM registers
46def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))>;
47def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
48  let PrintMethod = "printBankSwizzle";
49}
50
51def LITERAL : InstFlag<"printLiteral">;
52
53def WRITE : InstFlag <"printWrite", 1>;
54def OMOD : InstFlag <"printOMOD">;
55def REL : InstFlag <"printRel">;
56def CLAMP : InstFlag <"printClamp">;
57def NEG : InstFlag <"printNeg">;
58def ABS : InstFlag <"printAbs">;
59def UEM : InstFlag <"printUpdateExecMask">;
60def UP : InstFlag <"printUpdatePred">;
61
62// XXX: The r600g finalizer in Mesa expects last to be one in most cases.
63// Once we start using the packetizer in this backend we should have this
64// default to 0.
65def LAST : InstFlag<"printLast", 1>;
66def RSel : Operand<i32> {
67  let PrintMethod = "printRSel";
68}
69def CT: Operand<i32> {
70  let PrintMethod = "printCT";
71}
72
73def FRAMEri : Operand<iPTR> {
74  let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
75}
76
77def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
78def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
79def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
80def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
81
82
83def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
84                                     (ops PRED_SEL_OFF)>;
85
86let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
87    usesCustomInserter = 1, Namespace = "R600" in {
88  def RETURN : ILFormat<(outs), (ins variable_ops),
89    "RETURN", [(AMDGPUendpgm)]
90  >;
91}
92
93let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
94
95// Class for instructions with only one source register.
96// If you add new ins to this instruction, make sure they are listed before
97// $literal, because the backend currently assumes that the last operand is
98// a literal.  Also be sure to update the enum R600Op1OperandIndex::ROI in
99// R600Defines.h, R600InstrInfo::buildDefaultInstruction(),
100// and R600InstrInfo::getOperandIdx().
101class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
102                InstrItinClass itin = AnyALU> :
103    InstR600 <(outs R600_Reg32:$dst),
104              (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
105                   R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
106                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
107                   BANK_SWIZZLE:$bank_swizzle),
108              !strconcat("  ", opName,
109                   "$clamp $last $dst$write$dst_rel$omod, "
110                   "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
111                   "$pred_sel $bank_swizzle"),
112              pattern,
113              itin>,
114    R600ALU_Word0,
115    R600ALU_Word1_OP2 <inst> {
116
117  let src1 = 0;
118  let src1_rel = 0;
119  let src1_neg = 0;
120  let src1_abs = 0;
121  let update_exec_mask = 0;
122  let update_pred = 0;
123  let HasNativeOperands = 1;
124  let Op1 = 1;
125  let ALUInst = 1;
126  let DisableEncoding = "$literal";
127  let UseNamedOperandTable = 1;
128
129  let Inst{31-0}  = Word0;
130  let Inst{63-32} = Word1;
131}
132
133class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
134                    InstrItinClass itin = AnyALU> :
135    R600_1OP <inst, opName,
136              [(set R600_Reg32:$dst, (node R600_Reg32:$src0))], itin
137>;
138
139// If you add or change the operands for R600_2OP instructions, you must
140// also update the R600Op2OperandIndex::ROI enum in R600Defines.h,
141// R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
142class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
143                InstrItinClass itin = AnyALU> :
144  InstR600 <(outs R600_Reg32:$dst),
145          (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
146               OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
147               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
148               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
149               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
150               BANK_SWIZZLE:$bank_swizzle),
151          !strconcat("  ", opName,
152                "$clamp $last $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
153                "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
154                "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
155                "$pred_sel $bank_swizzle"),
156          pattern,
157          itin>,
158    R600ALU_Word0,
159    R600ALU_Word1_OP2 <inst> {
160
161  let HasNativeOperands = 1;
162  let Op2 = 1;
163  let ALUInst = 1;
164  let DisableEncoding = "$literal";
165  let UseNamedOperandTable = 1;
166
167  let Inst{31-0}  = Word0;
168  let Inst{63-32} = Word1;
169}
170
171class R600_2OP_Helper <bits<11> inst, string opName,
172                       SDPatternOperator node = null_frag,
173                       InstrItinClass itin = AnyALU> :
174    R600_2OP <inst, opName,
175              [(set R600_Reg32:$dst, (node R600_Reg32:$src0,
176                                           R600_Reg32:$src1))], itin
177>;
178
179// If you add our change the operands for R600_3OP instructions, you must
180// also update the R600Op3OperandIndex::ROI enum in R600Defines.h,
181// R600InstrInfo::buildDefaultInstruction(), and
182// R600InstrInfo::getOperandIdx().
183class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
184                InstrItinClass itin = AnyALU> :
185  InstR600 <(outs R600_Reg32:$dst),
186          (ins REL:$dst_rel, CLAMP:$clamp,
187               R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
188               R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
189               R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
190               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
191               BANK_SWIZZLE:$bank_swizzle),
192          !strconcat("  ", opName, "$clamp $last $dst$dst_rel, "
193                             "$src0_neg$src0$src0_rel, "
194                             "$src1_neg$src1$src1_rel, "
195                             "$src2_neg$src2$src2_rel, "
196                             "$pred_sel"
197                             "$bank_swizzle"),
198          pattern,
199          itin>,
200    R600ALU_Word0,
201    R600ALU_Word1_OP3<inst>{
202
203  let HasNativeOperands = 1;
204  let DisableEncoding = "$literal";
205  let Op3 = 1;
206  let UseNamedOperandTable = 1;
207  let ALUInst = 1;
208
209  let Inst{31-0}  = Word0;
210  let Inst{63-32} = Word1;
211}
212
213} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
214
215class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
216                 dag outs, dag ins, string asm, list<dag> pattern> :
217    InstR600ISA <outs, ins, asm, pattern>,
218    CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF  {
219
220  let rat_id = ratid;
221  let rat_inst = ratinst;
222  let rim         = 0;
223  // XXX: Have a separate instruction for non-indexed writes.
224  let type        = 1;
225  let rw_rel      = 0;
226  let elem_size   = 0;
227
228  let array_size  = 0;
229  let comp_mask   = mask;
230  let burst_count = 0;
231  let vpm         = 0;
232  let cf_inst = cfinst;
233  let mark        = 0;
234  let barrier     = 1;
235
236  let Inst{31-0} = Word0;
237  let Inst{63-32} = Word1;
238  let IsExport = 1;
239
240}
241
242class VTX_READ <string name, dag outs, list<dag> pattern>
243    : InstR600ISA <outs, (ins MEMxi:$src_gpr, i8imm:$buffer_id), !strconcat("  ", name, ", #$buffer_id"), pattern>,
244      VTX_WORD1_GPR {
245
246  // Static fields
247  let DST_REL = 0;
248  // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
249  // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
250  // however, based on my testing if USE_CONST_FIELDS is set, then all
251  // these fields need to be set to 0.
252  let USE_CONST_FIELDS = 0;
253  let NUM_FORMAT_ALL = 1;
254  let FORMAT_COMP_ALL = 0;
255  let SRF_MODE_ALL = 0;
256
257  let Inst{63-32} = Word1;
258  // LLVM can only encode 64-bit instructions, so these fields are manually
259  // encoded in R600CodeEmitter
260  //
261  // bits<16> OFFSET;
262  // bits<2>  ENDIAN_SWAP = 0;
263  // bits<1>  CONST_BUF_NO_STRIDE = 0;
264  // bits<1>  MEGA_FETCH = 0;
265  // bits<1>  ALT_CONST = 0;
266  // bits<2>  BUFFER_INDEX_MODE = 0;
267
268  // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
269  // is done in R600CodeEmitter
270  //
271  // Inst{79-64} = OFFSET;
272  // Inst{81-80} = ENDIAN_SWAP;
273  // Inst{82}    = CONST_BUF_NO_STRIDE;
274  // Inst{83}    = MEGA_FETCH;
275  // Inst{84}    = ALT_CONST;
276  // Inst{86-85} = BUFFER_INDEX_MODE;
277  // Inst{95-86} = 0; Reserved
278
279  // VTX_WORD3 (Padding)
280  //
281  // Inst{127-96} = 0;
282
283  let VTXInst = 1;
284}
285
286// Legacy.
287def atomic_cmp_swap_global_noret : PatFrag<
288  (ops node:$ptr, node:$cmp, node:$value),
289  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
290  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
291
292def atomic_cmp_swap_global_ret : PatFrag<
293  (ops node:$ptr, node:$cmp, node:$value),
294  (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
295  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
296
297def mskor_global : PatFrag<(ops node:$val, node:$ptr),
298                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
299  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
300}]>;
301
302// FIXME: These are deprecated
303class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr),
304                                              (ld_node node:$ptr), [{
305  LoadSDNode *L = cast<LoadSDNode>(N);
306  return L->getExtensionType() == ISD::ZEXTLOAD ||
307         L->getExtensionType() == ISD::EXTLOAD;
308}]>;
309
310def az_extload : AZExtLoadBase <unindexedload>;
311
312def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
313  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
314}]>;
315
316def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
317  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
318}]>;
319
320def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
321  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
322}]>;
323
324let AddressSpaces = LoadAddress_local.AddrSpaces in {
325def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr)>;
326def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr)>;
327}
328
329class LoadParamFrag <PatFrag load_type> : PatFrag <
330  (ops node:$ptr), (load_type node:$ptr),
331  [{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
332            (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
333>;
334
335def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
336def vtx_id3_az_extloadi16 : LoadParamFrag<az_extloadi16>;
337def vtx_id3_load : LoadParamFrag<load>;
338
339class LoadVtxId1 <PatFrag load> : PatFrag <
340  (ops node:$ptr), (load node:$ptr), [{
341  const MemSDNode *LD = cast<MemSDNode>(N);
342  return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
343         (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
344           !isa<GlobalValue>(getUnderlyingObject(
345           LD->getMemOperand()->getValue())));
346}]>;
347
348def vtx_id1_az_extloadi8 : LoadVtxId1 <az_extloadi8>;
349def vtx_id1_az_extloadi16 : LoadVtxId1 <az_extloadi16>;
350def vtx_id1_load : LoadVtxId1 <load>;
351
352class LoadVtxId2 <PatFrag load> : PatFrag <
353  (ops node:$ptr), (load node:$ptr), [{
354  const MemSDNode *LD = cast<MemSDNode>(N);
355  return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
356         isa<GlobalValue>(getUnderlyingObject(
357         LD->getMemOperand()->getValue()));
358}]>;
359
360def vtx_id2_az_extloadi8 : LoadVtxId2 <az_extloadi8>;
361def vtx_id2_az_extloadi16 : LoadVtxId2 <az_extloadi16>;
362def vtx_id2_load : LoadVtxId2 <load>;
363
364//===----------------------------------------------------------------------===//
365// R600 SDNodes
366//===----------------------------------------------------------------------===//
367
368let Namespace = "R600" in {
369
370def INTERP_PAIR_XY :  AMDGPUShaderInst <
371  (outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
372  (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
373  "INTERP_PAIR_XY $src0 $src1 $src2 : $dst0 dst1",
374  []>;
375
376def INTERP_PAIR_ZW :  AMDGPUShaderInst <
377  (outs R600_TReg32_Z:$dst0, R600_TReg32_W:$dst1),
378  (ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
379  "INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
380  []>;
381
382}
383
384def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
385  SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
386  [SDNPVariadic]
387>;
388
389def DOT4 : SDNode<"AMDGPUISD::DOT4",
390  SDTypeProfile<1, 8, [SDTCisFP<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32>,
391      SDTCisVT<3, f32>, SDTCisVT<4, f32>, SDTCisVT<5, f32>,
392      SDTCisVT<6, f32>, SDTCisVT<7, f32>, SDTCisVT<8, f32>]>,
393  []
394>;
395
396def COS_HW : SDNode<"AMDGPUISD::COS_HW",
397  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
398>;
399
400def SIN_HW : SDNode<"AMDGPUISD::SIN_HW",
401  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>
402>;
403
404def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>;
405
406def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>;
407
408multiclass TexPattern<bits<32> TextureOp, Instruction inst, ValueType vt = v4f32> {
409def : R600Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR,
410          (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw),
411          (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz),
412          (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z),
413          (i32 imm:$DST_SEL_W),
414          (i32 imm:$RESOURCE_ID), (i32 imm:$SAMPLER_ID),
415          (i32 imm:$COORD_TYPE_X), (i32 imm:$COORD_TYPE_Y), (i32 imm:$COORD_TYPE_Z),
416          (i32 imm:$COORD_TYPE_W)),
417          (inst R600_Reg128:$SRC_GPR,
418          imm:$srcx, imm:$srcy, imm:$srcz, imm:$srcw,
419          imm:$offsetx, imm:$offsety, imm:$offsetz,
420          imm:$DST_SEL_X, imm:$DST_SEL_Y, imm:$DST_SEL_Z,
421          imm:$DST_SEL_W,
422          imm:$RESOURCE_ID, imm:$SAMPLER_ID,
423          imm:$COORD_TYPE_X, imm:$COORD_TYPE_Y, imm:$COORD_TYPE_Z,
424          imm:$COORD_TYPE_W)>;
425}
426
427//===----------------------------------------------------------------------===//
428// Interpolation Instructions
429//===----------------------------------------------------------------------===//
430
431let Namespace = "R600" in {
432
433def INTERP_VEC_LOAD :  AMDGPUShaderInst <
434  (outs R600_Reg128:$dst),
435  (ins i32imm:$src0),
436  "INTERP_LOAD $src0 : $dst">;
437
438}
439
440def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
441  let bank_swizzle = 5;
442}
443
444def INTERP_ZW : R600_2OP <0xD7, "INTERP_ZW", []> {
445  let bank_swizzle = 5;
446}
447
448def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
449
450//===----------------------------------------------------------------------===//
451// Export Instructions
452//===----------------------------------------------------------------------===//
453
454class ExportWord0 {
455  field bits<32> Word0;
456
457  bits<13> arraybase;
458  bits<2> type;
459  bits<7> gpr;
460  bits<2> elem_size;
461
462  let Word0{12-0} = arraybase;
463  let Word0{14-13} = type;
464  let Word0{21-15} = gpr;
465  let Word0{22} = 0; // RW_REL
466  let Word0{29-23} = 0; // INDEX_GPR
467  let Word0{31-30} = elem_size;
468}
469
470class ExportSwzWord1 {
471  field bits<32> Word1;
472
473  bits<3> sw_x;
474  bits<3> sw_y;
475  bits<3> sw_z;
476  bits<3> sw_w;
477  bits<1> eop;
478  bits<8> inst;
479
480  let Word1{2-0} = sw_x;
481  let Word1{5-3} = sw_y;
482  let Word1{8-6} = sw_z;
483  let Word1{11-9} = sw_w;
484}
485
486class ExportBufWord1 {
487  field bits<32> Word1;
488
489  bits<12> arraySize;
490  bits<4> compMask;
491  bits<1> eop;
492  bits<8> inst;
493
494  let Word1{11-0} = arraySize;
495  let Word1{15-12} = compMask;
496}
497
498multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
499  def : R600Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
500    (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
501        (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
502        imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
503  >;
504
505}
506
507multiclass SteamOutputExportPattern<Instruction ExportInst,
508    bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
509// Stream0
510  def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
511      (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
512      (ExportInst R600_Reg128:$src, 0, imm:$arraybase,
513      4095, imm:$mask, buf0inst, 0)>;
514// Stream1
515  def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
516      (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
517      (ExportInst $src, 0, imm:$arraybase,
518      4095, imm:$mask, buf1inst, 0)>;
519// Stream2
520  def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
521      (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
522      (ExportInst $src, 0, imm:$arraybase,
523      4095, imm:$mask, buf2inst, 0)>;
524// Stream3
525  def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
526      (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
527      (ExportInst $src, 0, imm:$arraybase,
528      4095, imm:$mask, buf3inst, 0)>;
529}
530
531// Export Instructions should not be duplicated by TailDuplication pass
532// (which assumes that duplicable instruction are affected by exec mask)
533let usesCustomInserter = 1, isNotDuplicable = 1 in {
534
535class ExportSwzInst : InstR600ISA<(
536    outs),
537    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
538    RSel:$sw_x, RSel:$sw_y, RSel:$sw_z, RSel:$sw_w, i32imm:$inst,
539    i32imm:$eop),
540    !strconcat("EXPORT", " $gpr.$sw_x$sw_y$sw_z$sw_w"),
541    []>, ExportWord0, ExportSwzWord1 {
542  let elem_size = 3;
543  let Inst{31-0} = Word0;
544  let Inst{63-32} = Word1;
545  let IsExport = 1;
546}
547
548} // End usesCustomInserter = 1
549
550class ExportBufInst : InstR600ISA<(
551    outs),
552    (ins R600_Reg128:$gpr, i32imm:$type, i32imm:$arraybase,
553    i32imm:$arraySize, i32imm:$compMask, i32imm:$inst, i32imm:$eop),
554    !strconcat("EXPORT", " $gpr"),
555    []>, ExportWord0, ExportBufWord1 {
556  let elem_size = 0;
557  let Inst{31-0} = Word0;
558  let Inst{63-32} = Word1;
559  let IsExport = 1;
560}
561
562//===----------------------------------------------------------------------===//
563// Control Flow Instructions
564//===----------------------------------------------------------------------===//
565
566
567def KCACHE : InstFlag<"printKCache">;
568
569class ALU_CLAUSE<bits<4> inst, string OpName> : R600WrapperInst <(outs),
570(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
571KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
572i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
573i32imm:$COUNT, i32imm:$Enabled),
574!strconcat(OpName, " $COUNT, @$ADDR, "
575"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
576[] >, CF_ALU_WORD0, CF_ALU_WORD1 {
577  field bits<64> Inst;
578
579  let CF_INST = inst;
580  let ALT_CONST = 0;
581  let WHOLE_QUAD_MODE = 0;
582  let BARRIER = 1;
583  let isCodeGenOnly = 1;
584  let UseNamedOperandTable = 1;
585
586  let Inst{31-0} = Word0;
587  let Inst{63-32} = Word1;
588}
589
590class CF_WORD0_R600 {
591  field bits<32> Word0;
592
593  bits<32> ADDR;
594
595  let Word0 = ADDR;
596}
597
598class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : R600WrapperInst <(outs),
599ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
600  field bits<64> Inst;
601  bits<4> CNT;
602
603  let CF_INST = inst;
604  let BARRIER = 1;
605  let CF_CONST = 0;
606  let VALID_PIXEL_MODE = 0;
607  let COND = 0;
608  let COUNT = CNT{2-0};
609  let CALL_COUNT = 0;
610  let COUNT_3 = CNT{3};
611  let END_OF_PROGRAM = 0;
612  let WHOLE_QUAD_MODE = 0;
613
614  let Inst{31-0} = Word0;
615  let Inst{63-32} = Word1;
616}
617
618class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : R600WrapperInst <(outs),
619ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
620  field bits<64> Inst;
621
622  let CF_INST = inst;
623  let BARRIER = 1;
624  let JUMPTABLE_SEL = 0;
625  let CF_CONST = 0;
626  let VALID_PIXEL_MODE = 0;
627  let COND = 0;
628  let END_OF_PROGRAM = 0;
629
630  let Inst{31-0} = Word0;
631  let Inst{63-32} = Word1;
632}
633
634def CF_ALU : ALU_CLAUSE<8, "ALU">;
635def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
636def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
637def CF_ALU_CONTINUE : ALU_CLAUSE<13, "ALU_CONTINUE">;
638def CF_ALU_BREAK : ALU_CLAUSE<14, "ALU_BREAK">;
639def CF_ALU_ELSE_AFTER : ALU_CLAUSE<15, "ALU_ELSE_AFTER">;
640
641def FETCH_CLAUSE : R600WrapperInst <(outs),
642(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
643  field bits<8> Inst;
644  bits<8> num;
645  let Inst = num;
646  let isCodeGenOnly = 1;
647}
648
649def ALU_CLAUSE : R600WrapperInst <(outs),
650(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
651  field bits<8> Inst;
652  bits<8> num;
653  let Inst = num;
654  let isCodeGenOnly = 1;
655}
656
657def LITERALS : R600WrapperInst <(outs),
658(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
659  let isCodeGenOnly = 1;
660
661  field bits<64> Inst;
662  bits<32> literal1;
663  bits<32> literal2;
664
665  let Inst{31-0} = literal1;
666  let Inst{63-32} = literal2;
667}
668
669def PAD : R600WrapperInst <(outs), (ins), "PAD", [] > {
670  field bits<64> Inst;
671}
672
673//===----------------------------------------------------------------------===//
674// Common Instructions R600, R700, Evergreen, Cayman
675//===----------------------------------------------------------------------===//
676
677let isCodeGenOnly = 1, isPseudo = 1 in {
678
679let Namespace = "R600", usesCustomInserter = 1  in {
680
681class FABS <RegisterClass rc> : AMDGPUShaderInst <
682  (outs rc:$dst),
683  (ins rc:$src0),
684  "FABS $dst, $src0",
685  [(set f32:$dst, (fabs f32:$src0))]
686>;
687
688class FNEG <RegisterClass rc> : AMDGPUShaderInst <
689  (outs rc:$dst),
690  (ins rc:$src0),
691  "FNEG $dst, $src0",
692  [(set f32:$dst, (fneg f32:$src0))]
693>;
694
695} // usesCustomInserter = 1
696
697multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
698                    ComplexPattern addrPat> {
699let UseNamedOperandTable = 1 in {
700
701  def RegisterLoad : AMDGPUShaderInst <
702    (outs dstClass:$dst),
703    (ins addrClass:$addr, i32imm:$chan),
704    "RegisterLoad $dst, $addr",
705    [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
706  > {
707    let isRegisterLoad = 1;
708  }
709
710  def RegisterStore : AMDGPUShaderInst <
711    (outs),
712    (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
713    "RegisterStore $val, $addr",
714    [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
715  > {
716    let isRegisterStore = 1;
717  }
718}
719}
720
721} // End isCodeGenOnly = 1, isPseudo = 1
722
723
724def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
725// Non-IEEE MUL: 0 * anything = 0
726def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE">;
727def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
728// TODO: Do these actually match the regular fmin/fmax behavior?
729def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
730def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
731// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
732// DX10 min/max returns the other operand if one is NaN,
733// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
734def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
735def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
736
737// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
738// so some of the instruction names don't match the asm string.
739// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
740def SETE : R600_2OP <
741  0x08, "SETE",
742  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
743>;
744
745def SGT : R600_2OP <
746  0x09, "SETGT",
747  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
748>;
749
750def SGE : R600_2OP <
751  0xA, "SETGE",
752  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
753>;
754
755def SNE : R600_2OP <
756  0xB, "SETNE",
757  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE_NE))]
758>;
759
760def SETE_DX10 : R600_2OP <
761  0xC, "SETE_DX10",
762  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
763>;
764
765def SETGT_DX10 : R600_2OP <
766  0xD, "SETGT_DX10",
767  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
768>;
769
770def SETGE_DX10 : R600_2OP <
771  0xE, "SETGE_DX10",
772  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
773>;
774
775// FIXME: This should probably be COND_ONE
776def SETNE_DX10 : R600_2OP <
777  0xF, "SETNE_DX10",
778  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE_NE))]
779>;
780
781// FIXME: Need combine for AMDGPUfract
782def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
783def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
784def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
785def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
786def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
787
788def MOV : R600_1OP <0x19, "MOV", []>;
789
790
791// This is a hack to get rid of DUMMY_CHAIN nodes.
792// Most DUMMY_CHAINs should be eliminated during legalization, but undef
793// values can sneak in some to selection.
794let isPseudo = 1, isCodeGenOnly = 1 in {
795def DUMMY_CHAIN : R600WrapperInst <
796  (outs),
797  (ins),
798  "DUMMY_CHAIN",
799  [(R600dummy_chain)]
800>;
801} // end let isPseudo = 1, isCodeGenOnly = 1
802
803
804let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
805
806class MOV_IMM <Operand immType> : R600WrapperInst <
807  (outs R600_Reg32:$dst),
808  (ins immType:$imm),
809  "",
810  []
811> {
812  let Namespace = "R600";
813}
814
815} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
816
817def MOV_IMM_I32 : MOV_IMM<i32imm>;
818def : R600Pat <
819  (imm:$val),
820  (MOV_IMM_I32 imm:$val)
821>;
822
823def MOV_IMM_GLOBAL_ADDR : MOV_IMM<i32imm>;
824def : R600Pat <
825  (AMDGPUconstdata_ptr tglobaladdr:$addr),
826  (MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr)
827>;
828
829
830def MOV_IMM_F32 : MOV_IMM<f32imm>;
831def : R600Pat <
832  (fpimm:$val),
833  (MOV_IMM_F32  fpimm:$val)
834>;
835
836def PRED_SETE : R600_2OP <0x20, "PRED_SETE", []>;
837def PRED_SETGT : R600_2OP <0x21, "PRED_SETGT", []>;
838def PRED_SETGE : R600_2OP <0x22, "PRED_SETGE", []>;
839def PRED_SETNE : R600_2OP <0x23, "PRED_SETNE", []>;
840
841let hasSideEffects = 1 in {
842
843def KILLGT : R600_2OP <0x2D, "KILLGT", []>;
844
845} // end hasSideEffects
846
847def AND_INT : R600_2OP_Helper <0x30, "AND_INT", and>;
848def OR_INT : R600_2OP_Helper <0x31, "OR_INT", or>;
849def XOR_INT : R600_2OP_Helper <0x32, "XOR_INT", xor>;
850def NOT_INT : R600_1OP_Helper <0x33, "NOT_INT", not>;
851def ADD_INT : R600_2OP_Helper <0x34, "ADD_INT", add>;
852def SUB_INT : R600_2OP_Helper <0x35, "SUB_INT", sub>;
853def MAX_INT : R600_2OP_Helper <0x36, "MAX_INT", smax>;
854def MIN_INT : R600_2OP_Helper <0x37, "MIN_INT", smin>;
855def MAX_UINT : R600_2OP_Helper <0x38, "MAX_UINT", umax>;
856def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", umin>;
857
858def SETE_INT : R600_2OP <
859  0x3A, "SETE_INT",
860  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
861>;
862
863def SETGT_INT : R600_2OP <
864  0x3B, "SETGT_INT",
865  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
866>;
867
868def SETGE_INT : R600_2OP <
869  0x3C, "SETGE_INT",
870  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
871>;
872
873def SETNE_INT : R600_2OP <
874  0x3D, "SETNE_INT",
875  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
876>;
877
878def SETGT_UINT : R600_2OP <
879  0x3E, "SETGT_UINT",
880  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
881>;
882
883def SETGE_UINT : R600_2OP <
884  0x3F, "SETGE_UINT",
885  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
886>;
887
888def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
889def PRED_SETGT_INT : R600_2OP <0x43, "PRED_SETGE_INT", []>;
890def PRED_SETGE_INT : R600_2OP <0x44, "PRED_SETGE_INT", []>;
891def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
892
893def CNDE_INT : R600_3OP <
894  0x1C, "CNDE_INT",
895  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
896>;
897
898def CNDGE_INT : R600_3OP <
899  0x1E, "CNDGE_INT",
900  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))]
901>;
902
903def CNDGT_INT : R600_3OP <
904  0x1D, "CNDGT_INT",
905  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))]
906>;
907
908//===----------------------------------------------------------------------===//
909// Texture instructions
910//===----------------------------------------------------------------------===//
911
912let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
913
914class R600_TEX <bits<11> inst, string opName> :
915  InstR600 <(outs R600_Reg128:$DST_GPR),
916          (ins R600_Reg128:$SRC_GPR,
917          RSel:$srcx, RSel:$srcy, RSel:$srcz, RSel:$srcw,
918          i32imm:$offsetx, i32imm:$offsety, i32imm:$offsetz,
919          RSel:$DST_SEL_X, RSel:$DST_SEL_Y, RSel:$DST_SEL_Z, RSel:$DST_SEL_W,
920          i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID,
921          CT:$COORD_TYPE_X, CT:$COORD_TYPE_Y, CT:$COORD_TYPE_Z,
922          CT:$COORD_TYPE_W),
923          !strconcat("  ", opName,
924          " $DST_GPR.$DST_SEL_X$DST_SEL_Y$DST_SEL_Z$DST_SEL_W, "
925          "$SRC_GPR.$srcx$srcy$srcz$srcw "
926          "RID:$RESOURCE_ID SID:$SAMPLER_ID "
927          "CT:$COORD_TYPE_X$COORD_TYPE_Y$COORD_TYPE_Z$COORD_TYPE_W"),
928          [],
929          NullALU>, TEX_WORD0, TEX_WORD1, TEX_WORD2 {
930  let Inst{31-0} = Word0;
931  let Inst{63-32} = Word1;
932
933  let TEX_INST = inst{4-0};
934  let SRC_REL = 0;
935  let DST_REL = 0;
936  let LOD_BIAS = 0;
937
938  let INST_MOD = 0;
939  let FETCH_WHOLE_QUAD = 0;
940  let ALT_CONST = 0;
941  let SAMPLER_INDEX_MODE = 0;
942  let RESOURCE_INDEX_MODE = 0;
943
944  let TEXInst = 1;
945}
946
947} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
948
949
950
951def TEX_SAMPLE : R600_TEX <0x10, "TEX_SAMPLE">;
952def TEX_SAMPLE_C : R600_TEX <0x18, "TEX_SAMPLE_C">;
953def TEX_SAMPLE_L : R600_TEX <0x11, "TEX_SAMPLE_L">;
954def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
955def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
956def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
957def TEX_LD : R600_TEX <0x03, "TEX_LD">;
958def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> {
959  let INST_MOD = 1;
960}
961def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
962def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
963def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
964def TEX_SET_GRADIENTS_H : R600_TEX <0x0B, "TEX_SET_GRADIENTS_H">;
965def TEX_SET_GRADIENTS_V : R600_TEX <0x0C, "TEX_SET_GRADIENTS_V">;
966def TEX_SAMPLE_G : R600_TEX <0x14, "TEX_SAMPLE_G">;
967def TEX_SAMPLE_C_G : R600_TEX <0x1C, "TEX_SAMPLE_C_G">;
968
969defm : TexPattern<0, TEX_SAMPLE>;
970defm : TexPattern<1, TEX_SAMPLE_C>;
971defm : TexPattern<2, TEX_SAMPLE_L>;
972defm : TexPattern<3, TEX_SAMPLE_C_L>;
973defm : TexPattern<4, TEX_SAMPLE_LB>;
974defm : TexPattern<5, TEX_SAMPLE_C_LB>;
975defm : TexPattern<6, TEX_LD, v4i32>;
976defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
977defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
978defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
979defm : TexPattern<10, TEX_LDPTR, v4i32>;
980
981//===----------------------------------------------------------------------===//
982// Helper classes for common instructions
983//===----------------------------------------------------------------------===//
984
985class MUL_LIT_Common <bits<5> inst> : R600_3OP <
986  inst, "MUL_LIT",
987  []
988>;
989
990class MULADD_Common <bits<5> inst> : R600_3OP <
991  inst, "MULADD",
992  []
993>;
994
995class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
996  inst, "MULADD_IEEE",
997  [(set f32:$dst, (any_fmad f32:$src0, f32:$src1, f32:$src2))]
998>;
999
1000class FMA_Common <bits<5> inst> : R600_3OP <
1001  inst, "FMA",
1002  [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU
1003>
1004{
1005  let OtherPredicates = [FMA];
1006}
1007
1008class CNDE_Common <bits<5> inst> : R600_3OP <
1009  inst, "CNDE",
1010  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
1011>;
1012
1013class CNDGT_Common <bits<5> inst> : R600_3OP <
1014  inst, "CNDGT",
1015  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
1016> {
1017  let Itinerary = VecALU;
1018}
1019
1020class CNDGE_Common <bits<5> inst> : R600_3OP <
1021  inst, "CNDGE",
1022  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
1023> {
1024  let Itinerary = VecALU;
1025}
1026
1027
1028let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600"  in {
1029class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
1030// Slot X
1031   UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
1032   OMOD:$omod_X, REL:$dst_rel_X, CLAMP:$clamp_X,
1033   R600_TReg32_X:$src0_X, NEG:$src0_neg_X, REL:$src0_rel_X, ABS:$src0_abs_X, SEL:$src0_sel_X,
1034   R600_TReg32_X:$src1_X, NEG:$src1_neg_X, REL:$src1_rel_X, ABS:$src1_abs_X, SEL:$src1_sel_X,
1035   R600_Pred:$pred_sel_X,
1036// Slot Y
1037   UEM:$update_exec_mask_Y, UP:$update_pred_Y, WRITE:$write_Y,
1038   OMOD:$omod_Y, REL:$dst_rel_Y, CLAMP:$clamp_Y,
1039   R600_TReg32_Y:$src0_Y, NEG:$src0_neg_Y, REL:$src0_rel_Y, ABS:$src0_abs_Y, SEL:$src0_sel_Y,
1040   R600_TReg32_Y:$src1_Y, NEG:$src1_neg_Y, REL:$src1_rel_Y, ABS:$src1_abs_Y, SEL:$src1_sel_Y,
1041   R600_Pred:$pred_sel_Y,
1042// Slot Z
1043   UEM:$update_exec_mask_Z, UP:$update_pred_Z, WRITE:$write_Z,
1044   OMOD:$omod_Z, REL:$dst_rel_Z, CLAMP:$clamp_Z,
1045   R600_TReg32_Z:$src0_Z, NEG:$src0_neg_Z, REL:$src0_rel_Z, ABS:$src0_abs_Z, SEL:$src0_sel_Z,
1046   R600_TReg32_Z:$src1_Z, NEG:$src1_neg_Z, REL:$src1_rel_Z, ABS:$src1_abs_Z, SEL:$src1_sel_Z,
1047   R600_Pred:$pred_sel_Z,
1048// Slot W
1049   UEM:$update_exec_mask_W, UP:$update_pred_W, WRITE:$write_W,
1050   OMOD:$omod_W, REL:$dst_rel_W, CLAMP:$clamp_W,
1051   R600_TReg32_W:$src0_W, NEG:$src0_neg_W, REL:$src0_rel_W, ABS:$src0_abs_W, SEL:$src0_sel_W,
1052   R600_TReg32_W:$src1_W, NEG:$src1_neg_W, REL:$src1_rel_W, ABS:$src1_abs_W, SEL:$src1_sel_W,
1053   R600_Pred:$pred_sel_W,
1054   LITERAL:$literal0, LITERAL:$literal1),
1055  "",
1056  pattern,
1057  AnyALU> {
1058
1059  let UseNamedOperandTable = 1;
1060
1061}
1062}
1063
1064def DOT_4 : R600_VEC2OP<[(set R600_Reg32:$dst, (DOT4
1065  R600_TReg32_X:$src0_X, R600_TReg32_X:$src1_X,
1066  R600_TReg32_Y:$src0_Y, R600_TReg32_Y:$src1_Y,
1067  R600_TReg32_Z:$src0_Z, R600_TReg32_Z:$src1_Z,
1068  R600_TReg32_W:$src0_W, R600_TReg32_W:$src1_W))]>;
1069
1070
1071class DOT4_Common <bits<11> inst> : R600_2OP <inst, "DOT4", []>;
1072
1073
1074let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
1075multiclass CUBE_Common <bits<11> inst> {
1076
1077  def _pseudo : InstR600 <
1078    (outs R600_Reg128:$dst),
1079    (ins R600_Reg128:$src0),
1080    "CUBE $dst $src0",
1081    [(set v4f32:$dst, (int_r600_cube v4f32:$src0))],
1082    VecALU
1083  > {
1084    let isPseudo = 1;
1085    let UseNamedOperandTable = 1;
1086  }
1087
1088  def _real : R600_2OP <inst, "CUBE", []>;
1089}
1090} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
1091
1092class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1093  inst, "EXP_IEEE", fexp2
1094> {
1095  let Itinerary = TransALU;
1096}
1097
1098class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
1099  inst, "FLT_TO_INT", fp_to_sint
1100> {
1101  let Itinerary = TransALU;
1102}
1103
1104class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
1105  inst, "INT_TO_FLT", sint_to_fp
1106> {
1107  let Itinerary = TransALU;
1108}
1109
1110class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
1111  inst, "FLT_TO_UINT", fp_to_uint
1112> {
1113  let Itinerary = TransALU;
1114}
1115
1116class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
1117  inst, "UINT_TO_FLT", uint_to_fp
1118> {
1119  let Itinerary = TransALU;
1120}
1121
1122class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
1123  inst, "LOG_CLAMPED", []
1124>;
1125
1126class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1127  inst, "LOG_IEEE", flog2
1128> {
1129  let Itinerary = TransALU;
1130}
1131
1132class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
1133class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
1134class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
1135class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
1136  inst, "MULHI_INT", mulhs> {
1137  let Itinerary = TransALU;
1138}
1139
1140class MULHI_INT24_Common <bits<11> inst> : R600_2OP_Helper <
1141  inst, "MULHI_INT24", AMDGPUmulhi_i24> {
1142  let Itinerary = VecALU;
1143}
1144
1145class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
1146  inst, "MULHI", mulhu> {
1147  let Itinerary = TransALU;
1148}
1149
1150class MULHI_UINT24_Common <bits<11> inst> : R600_2OP_Helper <
1151  inst, "MULHI_UINT24", AMDGPUmulhi_u24> {
1152  let Itinerary = VecALU;
1153}
1154
1155class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
1156  inst, "MULLO_INT", mul> {
1157  let Itinerary = TransALU;
1158}
1159class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
1160  let Itinerary = TransALU;
1161}
1162
1163class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
1164  inst, "RECIP_CLAMPED", []
1165> {
1166  let Itinerary = TransALU;
1167}
1168
1169class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
1170  inst, "RECIP_IEEE", [(set f32:$dst, (AMDGPUrcp f32:$src0))]
1171> {
1172  let Itinerary = TransALU;
1173}
1174
1175class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
1176  inst, "RECIP_UINT", AMDGPUurecip
1177> {
1178  let Itinerary = TransALU;
1179}
1180
1181// Clamped to maximum.
1182class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
1183  inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamp
1184> {
1185  let Itinerary = TransALU;
1186}
1187
1188class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
1189  inst, "RECIPSQRT_IEEE", AMDGPUrsq> {
1190  let Itinerary = TransALU;
1191}
1192
1193// TODO: There is also RECIPSQRT_FF which clamps to zero.
1194
1195class SIN_Common <bits<11> inst> : R600_1OP <
1196  inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
1197  let Trig = 1;
1198  let Itinerary = TransALU;
1199}
1200
1201class COS_Common <bits<11> inst> : R600_1OP <
1202  inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
1203  let Trig = 1;
1204  let Itinerary = TransALU;
1205}
1206
1207def FABS_R600 : FABS<R600_Reg32>;
1208def FNEG_R600 : FNEG<R600_Reg32>;
1209
1210//===----------------------------------------------------------------------===//
1211// Helper patterns for complex intrinsics
1212//===----------------------------------------------------------------------===//
1213
1214// FIXME: Should be predicated on unsafe fp math.
1215multiclass DIV_Common <InstR600 recip_ieee> {
1216def : R600Pat<
1217  (fdiv f32:$src0, f32:$src1),
1218  (MUL_IEEE $src0, (recip_ieee $src1))
1219>;
1220
1221def : RcpPat<recip_ieee, f32>;
1222}
1223
1224class SqrtPat<Instruction RsqInst, Instruction RecipInst> : R600Pat <
1225  (fsqrt f32:$src),
1226  (RecipInst (RsqInst $src))
1227>;
1228
1229//===----------------------------------------------------------------------===//
1230// R600 / R700 Instructions
1231//===----------------------------------------------------------------------===//
1232
1233let Predicates = [isR600] in {
1234
1235  def MUL_LIT_r600 : MUL_LIT_Common<0x0C>;
1236  def MULADD_r600 : MULADD_Common<0x10>;
1237  def MULADD_IEEE_r600 : MULADD_IEEE_Common<0x14>;
1238  def CNDE_r600 : CNDE_Common<0x18>;
1239  def CNDGT_r600 : CNDGT_Common<0x19>;
1240  def CNDGE_r600 : CNDGE_Common<0x1A>;
1241  def DOT4_r600 : DOT4_Common<0x50>;
1242  defm CUBE_r600 : CUBE_Common<0x52>;
1243  def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>;
1244  def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>;
1245  def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>;
1246  def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>;
1247  def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>;
1248  def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>;
1249  def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>;
1250  def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>;
1251  def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>;
1252  def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>;
1253  def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>;
1254  def SIN_r600 : SIN_Common<0x6E>;
1255  def COS_r600 : COS_Common<0x6F>;
1256  def ASHR_r600 : ASHR_Common<0x70>;
1257  def LSHR_r600 : LSHR_Common<0x71>;
1258  def LSHL_r600 : LSHL_Common<0x72>;
1259  def MULLO_INT_r600 : MULLO_INT_Common<0x73>;
1260  def MULHI_INT_r600 : MULHI_INT_Common<0x74>;
1261  def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>;
1262  def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>;
1263  def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
1264
1265  defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
1266  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
1267
1268  def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>;
1269
1270  def R600_ExportSwz : ExportSwzInst {
1271    let Word1{20-17} = 0; // BURST_COUNT
1272    let Word1{21} = eop;
1273    let Word1{22} = 0; // VALID_PIXEL_MODE
1274    let Word1{30-23} = inst;
1275    let Word1{31} = 1; // BARRIER
1276  }
1277  defm : ExportPattern<R600_ExportSwz, 39>;
1278
1279  def R600_ExportBuf : ExportBufInst {
1280    let Word1{20-17} = 0; // BURST_COUNT
1281    let Word1{21} = eop;
1282    let Word1{22} = 0; // VALID_PIXEL_MODE
1283    let Word1{30-23} = inst;
1284    let Word1{31} = 1; // BARRIER
1285  }
1286  defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
1287
1288  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
1289  "TEX $CNT @$ADDR"> {
1290    let POP_COUNT = 0;
1291  }
1292  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
1293  "VTX $CNT @$ADDR"> {
1294    let POP_COUNT = 0;
1295  }
1296  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
1297  "LOOP_START_DX10 @$ADDR"> {
1298    let POP_COUNT = 0;
1299    let CNT = 0;
1300  }
1301  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
1302    let POP_COUNT = 0;
1303    let CNT = 0;
1304  }
1305  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
1306  "LOOP_BREAK @$ADDR"> {
1307    let POP_COUNT = 0;
1308    let CNT = 0;
1309  }
1310  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
1311  "CONTINUE @$ADDR"> {
1312    let POP_COUNT = 0;
1313    let CNT = 0;
1314  }
1315  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1316  "JUMP @$ADDR POP:$POP_COUNT"> {
1317    let CNT = 0;
1318  }
1319  def CF_PUSH_ELSE_R600 : CF_CLAUSE_R600<12, (ins i32imm:$ADDR),
1320  "PUSH_ELSE @$ADDR"> {
1321    let CNT = 0;
1322    let POP_COUNT = 0; // FIXME?
1323  }
1324  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1325  "ELSE @$ADDR POP:$POP_COUNT"> {
1326    let CNT = 0;
1327  }
1328  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
1329    let ADDR = 0;
1330    let CNT = 0;
1331    let POP_COUNT = 0;
1332  }
1333  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
1334  "POP @$ADDR POP:$POP_COUNT"> {
1335    let CNT = 0;
1336  }
1337  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
1338    let CNT = 0;
1339    let POP_COUNT = 0;
1340    let ADDR = 0;
1341    let END_OF_PROGRAM = 1;
1342  }
1343
1344}
1345
1346
1347//===----------------------------------------------------------------------===//
1348// Register loads and stores - for indirect addressing
1349//===----------------------------------------------------------------------===//
1350
1351let Namespace = "R600" in {
1352defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
1353}
1354
1355// Hardcode channel to 0
1356// NOTE: LSHR is not available here. LSHR is per family instruction
1357def : R600Pat <
1358  (i32 (load_private ADDRIndirect:$addr) ),
1359  (R600_RegisterLoad FRAMEri:$addr, (i32 0))
1360>;
1361def : R600Pat <
1362  (store_private i32:$val, ADDRIndirect:$addr),
1363  (R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0))
1364>;
1365
1366
1367//===----------------------------------------------------------------------===//
1368// Pseudo instructions
1369//===----------------------------------------------------------------------===//
1370
1371let isPseudo = 1 in {
1372
1373def PRED_X : InstR600 <
1374  (outs R600_Predicate_Bit:$dst),
1375  (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
1376  "", [], NullALU> {
1377  let FlagOperandIdx = 3;
1378}
1379
1380let isTerminator = 1, isBranch = 1 in {
1381def JUMP_COND : InstR600 <
1382          (outs),
1383          (ins brtarget:$target, R600_Predicate_Bit:$p),
1384          "JUMP $target ($p)",
1385          [], AnyALU
1386  >;
1387
1388def JUMP : InstR600 <
1389          (outs),
1390          (ins brtarget:$target),
1391          "JUMP $target",
1392          [], AnyALU
1393  >
1394{
1395  let isPredicable = 1;
1396  let isBarrier = 1;
1397}
1398
1399}  // End isTerminator = 1, isBranch = 1
1400
1401let usesCustomInserter = 1 in {
1402
1403let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
1404
1405def MASK_WRITE : InstR600 <
1406    (outs),
1407    (ins R600_Reg32:$src),
1408    "MASK_WRITE $src",
1409    [],
1410    NullALU
1411>;
1412
1413} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
1414
1415
1416def TXD: InstR600 <
1417  (outs R600_Reg128:$dst),
1418  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
1419       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
1420  "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget", [],
1421  NullALU > {
1422  let TEXInst = 1;
1423}
1424
1425def TXD_SHADOW: InstR600 <
1426  (outs R600_Reg128:$dst),
1427  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
1428       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
1429  "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
1430  [], NullALU> {
1431  let TEXInst = 1;
1432}
1433} // End isPseudo = 1
1434} // End usesCustomInserter = 1
1435
1436
1437//===----------------------------------------------------------------------===//
1438// Constant Buffer Addressing Support
1439//===----------------------------------------------------------------------===//
1440
1441let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600"  in {
1442def CONST_COPY : Instruction {
1443  let OutOperandList = (outs R600_Reg32:$dst);
1444  let InOperandList = (ins i32imm:$src);
1445  let Pattern =
1446      [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))];
1447  let AsmString = "CONST_COPY";
1448  let hasSideEffects = 0;
1449  let isAsCheapAsAMove = 1;
1450  let Itinerary = NullALU;
1451}
1452} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
1453
1454def TEX_VTX_CONSTBUF :
1455  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$buffer_id), "VTX_READ_eg $dst, $ptr",
1456      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$buffer_id)))]>,
1457  VTX_WORD1_GPR, VTX_WORD0_eg {
1458
1459  let VC_INST = 0;
1460  let FETCH_TYPE = 2;
1461  let FETCH_WHOLE_QUAD = 0;
1462  let SRC_REL = 0;
1463  let SRC_SEL_X = 0;
1464  let DST_REL = 0;
1465  let USE_CONST_FIELDS = 0;
1466  let NUM_FORMAT_ALL = 2;
1467  let FORMAT_COMP_ALL = 1;
1468  let SRF_MODE_ALL = 1;
1469  let MEGA_FETCH_COUNT = 16;
1470  let DST_SEL_X        = 0;
1471  let DST_SEL_Y        = 1;
1472  let DST_SEL_Z        = 2;
1473  let DST_SEL_W        = 3;
1474  let DATA_FORMAT      = 35;
1475
1476  let Inst{31-0} = Word0;
1477  let Inst{63-32} = Word1;
1478
1479// LLVM can only encode 64-bit instructions, so these fields are manually
1480// encoded in R600CodeEmitter
1481//
1482// bits<16> OFFSET;
1483// bits<2>  ENDIAN_SWAP = 0;
1484// bits<1>  CONST_BUF_NO_STRIDE = 0;
1485// bits<1>  MEGA_FETCH = 0;
1486// bits<1>  ALT_CONST = 0;
1487// bits<2>  BUFFER_INDEX_MODE = 0;
1488
1489
1490
1491// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
1492// is done in R600CodeEmitter
1493//
1494// Inst{79-64} = OFFSET;
1495// Inst{81-80} = ENDIAN_SWAP;
1496// Inst{82}    = CONST_BUF_NO_STRIDE;
1497// Inst{83}    = MEGA_FETCH;
1498// Inst{84}    = ALT_CONST;
1499// Inst{86-85} = BUFFER_INDEX_MODE;
1500// Inst{95-86} = 0; Reserved
1501
1502// VTX_WORD3 (Padding)
1503//
1504// Inst{127-96} = 0;
1505  let VTXInst = 1;
1506}
1507
1508def TEX_VTX_TEXBUF:
1509  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$buffer_id), "TEX_VTX_EXPLICIT_READ $dst, $ptr">,
1510VTX_WORD1_GPR, VTX_WORD0_eg {
1511
1512let VC_INST = 0;
1513let FETCH_TYPE = 2;
1514let FETCH_WHOLE_QUAD = 0;
1515let SRC_REL = 0;
1516let SRC_SEL_X = 0;
1517let DST_REL = 0;
1518let USE_CONST_FIELDS = 1;
1519let NUM_FORMAT_ALL = 0;
1520let FORMAT_COMP_ALL = 0;
1521let SRF_MODE_ALL = 1;
1522let MEGA_FETCH_COUNT = 16;
1523let DST_SEL_X        = 0;
1524let DST_SEL_Y        = 1;
1525let DST_SEL_Z        = 2;
1526let DST_SEL_W        = 3;
1527let DATA_FORMAT      = 0;
1528
1529let Inst{31-0} = Word0;
1530let Inst{63-32} = Word1;
1531
1532// LLVM can only encode 64-bit instructions, so these fields are manually
1533// encoded in R600CodeEmitter
1534//
1535// bits<16> OFFSET;
1536// bits<2>  ENDIAN_SWAP = 0;
1537// bits<1>  CONST_BUF_NO_STRIDE = 0;
1538// bits<1>  MEGA_FETCH = 0;
1539// bits<1>  ALT_CONST = 0;
1540// bits<2>  BUFFER_INDEX_MODE = 0;
1541
1542
1543
1544// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
1545// is done in R600CodeEmitter
1546//
1547// Inst{79-64} = OFFSET;
1548// Inst{81-80} = ENDIAN_SWAP;
1549// Inst{82}    = CONST_BUF_NO_STRIDE;
1550// Inst{83}    = MEGA_FETCH;
1551// Inst{84}    = ALT_CONST;
1552// Inst{86-85} = BUFFER_INDEX_MODE;
1553// Inst{95-86} = 0; Reserved
1554
1555// VTX_WORD3 (Padding)
1556//
1557// Inst{127-96} = 0;
1558  let VTXInst = 1;
1559}
1560
1561//===---------------------------------------------------------------------===//
1562// Flow and Program control Instructions
1563//===---------------------------------------------------------------------===//
1564
1565multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
1566    def _i32 : ILFormat<(outs),
1567  (ins brtarget:$target, rci:$src0),
1568        "; i32 Pseudo branch instruction",
1569  [(Op bb:$target, (i32 rci:$src0))]>;
1570    def _f32 : ILFormat<(outs),
1571  (ins brtarget:$target, rcf:$src0),
1572        "; f32 Pseudo branch instruction",
1573  [(Op bb:$target, (f32 rcf:$src0))]>;
1574}
1575
1576// Only scalar types should generate flow control
1577multiclass BranchInstr<string name> {
1578  def _i32 : ILFormat<(outs), (ins R600_Reg32:$src),
1579      !strconcat(name, " $src"), []>;
1580  def _f32 : ILFormat<(outs), (ins R600_Reg32:$src),
1581      !strconcat(name, " $src"), []>;
1582}
1583// Only scalar types should generate flow control
1584multiclass BranchInstr2<string name> {
1585  def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
1586      !strconcat(name, " $src0, $src1"), []>;
1587  def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
1588      !strconcat(name, " $src0, $src1"), []>;
1589}
1590
1591//===---------------------------------------------------------------------===//
1592// Custom Inserter for Branches and returns, this eventually will be a
1593// separate pass
1594//===---------------------------------------------------------------------===//
1595let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1,
1596    Namespace = "R600" in {
1597  def BRANCH : ILFormat<(outs), (ins brtarget:$target),
1598      "; Pseudo unconditional branch instruction",
1599      [(br bb:$target)]>;
1600  defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
1601}
1602
1603//===----------------------------------------------------------------------===//
1604// Branch Instructions
1605//===----------------------------------------------------------------------===//
1606
1607def IF_PREDICATE_SET  : ILFormat<(outs), (ins R600_Reg32:$src),
1608  "IF_PREDICATE_SET $src", []>;
1609
1610let isTerminator=1 in {
1611  def BREAK       : ILFormat< (outs), (ins),
1612      "BREAK", []>;
1613  def CONTINUE    : ILFormat< (outs), (ins),
1614      "CONTINUE", []>;
1615  def DEFAULT     : ILFormat< (outs), (ins),
1616      "DEFAULT", []>;
1617  def ELSE        : ILFormat< (outs), (ins),
1618      "ELSE", []>;
1619  def ENDSWITCH   : ILFormat< (outs), (ins),
1620      "ENDSWITCH", []>;
1621  def ENDMAIN     : ILFormat< (outs), (ins),
1622      "ENDMAIN", []>;
1623  def END         : ILFormat< (outs), (ins),
1624      "END", []>;
1625  def ENDFUNC     : ILFormat< (outs), (ins),
1626      "ENDFUNC", []>;
1627  def ENDIF       : ILFormat< (outs), (ins),
1628      "ENDIF", []>;
1629  def WHILELOOP   : ILFormat< (outs), (ins),
1630      "WHILE", []>;
1631  def ENDLOOP     : ILFormat< (outs), (ins),
1632      "ENDLOOP", []>;
1633  def FUNC        : ILFormat< (outs), (ins),
1634      "FUNC", []>;
1635  def RETDYN      : ILFormat< (outs), (ins),
1636      "RET_DYN", []>;
1637  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
1638  defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
1639  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
1640  defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
1641  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
1642  defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
1643  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
1644  defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
1645  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
1646  defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
1647  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
1648  defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
1649  defm IFC         : BranchInstr2<"IFC">;
1650  defm BREAKC      : BranchInstr2<"BREAKC">;
1651  defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
1652}
1653
1654//===----------------------------------------------------------------------===//
1655// Indirect addressing pseudo instructions
1656//===----------------------------------------------------------------------===//
1657
1658let isPseudo = 1 in {
1659
1660class ExtractVertical <RegisterClass vec_rc> : InstR600 <
1661  (outs R600_Reg32:$dst),
1662  (ins vec_rc:$vec, R600_Reg32:$index), "",
1663  [],
1664  AnyALU
1665>;
1666
1667let Constraints = "$dst = $vec" in {
1668
1669class InsertVertical <RegisterClass vec_rc> : InstR600 <
1670  (outs vec_rc:$dst),
1671  (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
1672  [],
1673  AnyALU
1674>;
1675
1676} // End Constraints = "$dst = $vec"
1677
1678} // End isPseudo = 1
1679
1680def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
1681def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
1682
1683def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
1684def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
1685
1686class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
1687                          ValueType scalar_ty> : R600Pat <
1688  (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
1689  (inst $vec, $index)
1690>;
1691
1692def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
1693def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
1694def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
1695def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
1696
1697class InsertVerticalPat <Instruction inst, ValueType vec_ty,
1698                         ValueType scalar_ty> : R600Pat <
1699  (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
1700  (inst $vec, $value, $index)
1701>;
1702
1703def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
1704def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
1705def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
1706def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
1707
1708//===----------------------------------------------------------------------===//
1709// ISel Patterns
1710//===----------------------------------------------------------------------===//
1711
1712let SubtargetPredicate = isR600toCayman in {
1713
1714// CND*_INT Patterns for f32 True / False values
1715
1716class CND_INT_f32 <InstR600 cnd, CondCode cc> : R600Pat <
1717  (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
1718  (cnd $src0, $src1, $src2)
1719>;
1720
1721def : CND_INT_f32 <CNDE_INT,  SETEQ>;
1722def : CND_INT_f32 <CNDGT_INT, SETGT>;
1723def : CND_INT_f32 <CNDGE_INT, SETGE>;
1724
1725//CNDGE_INT extra pattern
1726def : R600Pat <
1727  (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT),
1728  (CNDGE_INT $src0, $src1, $src2)
1729>;
1730
1731// KIL Patterns
1732def KIL : R600Pat <
1733  (int_r600_kill f32:$src0),
1734  (MASK_WRITE (KILLGT (f32 ZERO), $src0))
1735>;
1736
1737def : Extract_Element <f32, v4f32, 0, sub0>;
1738def : Extract_Element <f32, v4f32, 1, sub1>;
1739def : Extract_Element <f32, v4f32, 2, sub2>;
1740def : Extract_Element <f32, v4f32, 3, sub3>;
1741
1742def : Insert_Element <f32, v4f32, 0, sub0>;
1743def : Insert_Element <f32, v4f32, 1, sub1>;
1744def : Insert_Element <f32, v4f32, 2, sub2>;
1745def : Insert_Element <f32, v4f32, 3, sub3>;
1746
1747def : Extract_Element <i32, v4i32, 0, sub0>;
1748def : Extract_Element <i32, v4i32, 1, sub1>;
1749def : Extract_Element <i32, v4i32, 2, sub2>;
1750def : Extract_Element <i32, v4i32, 3, sub3>;
1751
1752def : Insert_Element <i32, v4i32, 0, sub0>;
1753def : Insert_Element <i32, v4i32, 1, sub1>;
1754def : Insert_Element <i32, v4i32, 2, sub2>;
1755def : Insert_Element <i32, v4i32, 3, sub3>;
1756
1757def : Extract_Element <f32, v2f32, 0, sub0>;
1758def : Extract_Element <f32, v2f32, 1, sub1>;
1759
1760def : Insert_Element <f32, v2f32, 0, sub0>;
1761def : Insert_Element <f32, v2f32, 1, sub1>;
1762
1763def : Extract_Element <i32, v2i32, 0, sub0>;
1764def : Extract_Element <i32, v2i32, 1, sub1>;
1765
1766def : Insert_Element <i32, v2i32, 0, sub0>;
1767def : Insert_Element <i32, v2i32, 1, sub1>;
1768
1769// bitconvert patterns
1770
1771def : BitConvert <i32, f32, R600_Reg32>;
1772def : BitConvert <f32, i32, R600_Reg32>;
1773def : BitConvert <v2f32, v2i32, R600_Reg64>;
1774def : BitConvert <v2i32, v2f32, R600_Reg64>;
1775def : BitConvert <v4f32, v4i32, R600_Reg128>;
1776def : BitConvert <v4i32, v4f32, R600_Reg128>;
1777
1778// DWORDADDR pattern
1779def : DwordAddrPat  <i32, R600_Reg32>;
1780
1781} // End SubtargetPredicate = isR600toCayman
1782
1783def getLDSNoRetOp : InstrMapping {
1784  let FilterClass = "R600_LDS_1A1D";
1785  let RowFields = ["BaseOp"];
1786  let ColFields = ["DisableEncoding"];
1787  let KeyCol = ["$dst"];
1788  let ValueCols = [[""""]];
1789}
1790