xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 13b9eb15179de69e3c6f7ed714b0499b0abf4394)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40 
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43 
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46 
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52 
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55 
56 #include "smu/smu_7_1_3_d.h"
57 
58 #include "ivsrcid/ivsrcid_vislands30.h"
59 
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62 
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67 
68 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77 
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84 
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88 
89 /* BPM Register Address*/
90 enum {
91 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96 	BPM_REG_FGCG_MAX
97 };
98 
99 #define RLC_FormatDirectRegListLength        14
100 
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145 
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157 
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169 
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176 
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196 
197 static const u32 golden_settings_tonga_a11[] =
198 {
199 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
203 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216 
217 static const u32 tonga_golden_common_all[] =
218 {
219 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228 
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307 
308 static const u32 golden_settings_vegam_a11[] =
309 {
310 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
321 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328 
329 static const u32 vegam_golden_common_all[] =
330 {
331 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338 
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
352 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359 
360 static const u32 polaris11_golden_common_all[] =
361 {
362 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369 
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
384 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390 
391 static const u32 polaris10_golden_common_all[] =
392 {
393 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402 
403 static const u32 fiji_golden_common_all[] =
404 {
405 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416 
417 static const u32 golden_settings_fiji_a10[] =
418 {
419 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431 
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470 
471 static const u32 golden_settings_iceland_a11[] =
472 {
473 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
477 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490 
491 static const u32 iceland_golden_common_all[] =
492 {
493 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502 
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570 
571 static const u32 cz_golden_settings_a11[] =
572 {
573 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
576 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586 
587 static const u32 cz_golden_common_all[] =
588 {
589 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598 
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677 
678 static const u32 stoney_golden_settings_a11[] =
679 {
680 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
682 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691 
692 static const u32 stoney_golden_common_all[] =
693 {
694 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703 
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712 
713 
714 static const char * const sq_edc_source_names[] = {
715 	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716 	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717 	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718 	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719 	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720 	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721 	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723 
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732 
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735 
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738 	uint32_t data;
739 
740 	switch (adev->asic_type) {
741 	case CHIP_TOPAZ:
742 		amdgpu_device_program_register_sequence(adev,
743 							iceland_mgcg_cgcg_init,
744 							ARRAY_SIZE(iceland_mgcg_cgcg_init));
745 		amdgpu_device_program_register_sequence(adev,
746 							golden_settings_iceland_a11,
747 							ARRAY_SIZE(golden_settings_iceland_a11));
748 		amdgpu_device_program_register_sequence(adev,
749 							iceland_golden_common_all,
750 							ARRAY_SIZE(iceland_golden_common_all));
751 		break;
752 	case CHIP_FIJI:
753 		amdgpu_device_program_register_sequence(adev,
754 							fiji_mgcg_cgcg_init,
755 							ARRAY_SIZE(fiji_mgcg_cgcg_init));
756 		amdgpu_device_program_register_sequence(adev,
757 							golden_settings_fiji_a10,
758 							ARRAY_SIZE(golden_settings_fiji_a10));
759 		amdgpu_device_program_register_sequence(adev,
760 							fiji_golden_common_all,
761 							ARRAY_SIZE(fiji_golden_common_all));
762 		break;
763 
764 	case CHIP_TONGA:
765 		amdgpu_device_program_register_sequence(adev,
766 							tonga_mgcg_cgcg_init,
767 							ARRAY_SIZE(tonga_mgcg_cgcg_init));
768 		amdgpu_device_program_register_sequence(adev,
769 							golden_settings_tonga_a11,
770 							ARRAY_SIZE(golden_settings_tonga_a11));
771 		amdgpu_device_program_register_sequence(adev,
772 							tonga_golden_common_all,
773 							ARRAY_SIZE(tonga_golden_common_all));
774 		break;
775 	case CHIP_VEGAM:
776 		amdgpu_device_program_register_sequence(adev,
777 							golden_settings_vegam_a11,
778 							ARRAY_SIZE(golden_settings_vegam_a11));
779 		amdgpu_device_program_register_sequence(adev,
780 							vegam_golden_common_all,
781 							ARRAY_SIZE(vegam_golden_common_all));
782 		break;
783 	case CHIP_POLARIS11:
784 	case CHIP_POLARIS12:
785 		amdgpu_device_program_register_sequence(adev,
786 							golden_settings_polaris11_a11,
787 							ARRAY_SIZE(golden_settings_polaris11_a11));
788 		amdgpu_device_program_register_sequence(adev,
789 							polaris11_golden_common_all,
790 							ARRAY_SIZE(polaris11_golden_common_all));
791 		break;
792 	case CHIP_POLARIS10:
793 		amdgpu_device_program_register_sequence(adev,
794 							golden_settings_polaris10_a11,
795 							ARRAY_SIZE(golden_settings_polaris10_a11));
796 		amdgpu_device_program_register_sequence(adev,
797 							polaris10_golden_common_all,
798 							ARRAY_SIZE(polaris10_golden_common_all));
799 		data = RREG32_SMC(ixCG_ACLK_CNTL);
800 		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801 		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802 		WREG32_SMC(ixCG_ACLK_CNTL, data);
803 		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809 		}
810 		break;
811 	case CHIP_CARRIZO:
812 		amdgpu_device_program_register_sequence(adev,
813 							cz_mgcg_cgcg_init,
814 							ARRAY_SIZE(cz_mgcg_cgcg_init));
815 		amdgpu_device_program_register_sequence(adev,
816 							cz_golden_settings_a11,
817 							ARRAY_SIZE(cz_golden_settings_a11));
818 		amdgpu_device_program_register_sequence(adev,
819 							cz_golden_common_all,
820 							ARRAY_SIZE(cz_golden_common_all));
821 		break;
822 	case CHIP_STONEY:
823 		amdgpu_device_program_register_sequence(adev,
824 							stoney_mgcg_cgcg_init,
825 							ARRAY_SIZE(stoney_mgcg_cgcg_init));
826 		amdgpu_device_program_register_sequence(adev,
827 							stoney_golden_settings_a11,
828 							ARRAY_SIZE(stoney_golden_settings_a11));
829 		amdgpu_device_program_register_sequence(adev,
830 							stoney_golden_common_all,
831 							ARRAY_SIZE(stoney_golden_common_all));
832 		break;
833 	default:
834 		break;
835 	}
836 }
837 
838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840 	struct amdgpu_device *adev = ring->adev;
841 	uint32_t tmp = 0;
842 	unsigned i;
843 	int r;
844 
845 	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846 	r = amdgpu_ring_alloc(ring, 3);
847 	if (r)
848 		return r;
849 
850 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851 	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852 	amdgpu_ring_write(ring, 0xDEADBEEF);
853 	amdgpu_ring_commit(ring);
854 
855 	for (i = 0; i < adev->usec_timeout; i++) {
856 		tmp = RREG32(mmSCRATCH_REG0);
857 		if (tmp == 0xDEADBEEF)
858 			break;
859 		udelay(1);
860 	}
861 
862 	if (i >= adev->usec_timeout)
863 		r = -ETIMEDOUT;
864 
865 	return r;
866 }
867 
868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870 	struct amdgpu_device *adev = ring->adev;
871 	struct amdgpu_ib ib;
872 	struct dma_fence *f = NULL;
873 
874 	unsigned int index;
875 	uint64_t gpu_addr;
876 	uint32_t tmp;
877 	long r;
878 
879 	r = amdgpu_device_wb_get(adev, &index);
880 	if (r)
881 		return r;
882 
883 	gpu_addr = adev->wb.gpu_addr + (index * 4);
884 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885 	memset(&ib, 0, sizeof(ib));
886 	r = amdgpu_ib_get(adev, NULL, 16,
887 					AMDGPU_IB_POOL_DIRECT, &ib);
888 	if (r)
889 		goto err1;
890 
891 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893 	ib.ptr[2] = lower_32_bits(gpu_addr);
894 	ib.ptr[3] = upper_32_bits(gpu_addr);
895 	ib.ptr[4] = 0xDEADBEEF;
896 	ib.length_dw = 5;
897 
898 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899 	if (r)
900 		goto err2;
901 
902 	r = dma_fence_wait_timeout(f, false, timeout);
903 	if (r == 0) {
904 		r = -ETIMEDOUT;
905 		goto err2;
906 	} else if (r < 0) {
907 		goto err2;
908 	}
909 
910 	tmp = adev->wb.wb[index];
911 	if (tmp == 0xDEADBEEF)
912 		r = 0;
913 	else
914 		r = -EINVAL;
915 
916 err2:
917 	amdgpu_ib_free(adev, &ib, NULL);
918 	dma_fence_put(f);
919 err1:
920 	amdgpu_device_wb_free(adev, index);
921 	return r;
922 }
923 
924 
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927 	release_firmware(adev->gfx.pfp_fw);
928 	adev->gfx.pfp_fw = NULL;
929 	release_firmware(adev->gfx.me_fw);
930 	adev->gfx.me_fw = NULL;
931 	release_firmware(adev->gfx.ce_fw);
932 	adev->gfx.ce_fw = NULL;
933 	release_firmware(adev->gfx.rlc_fw);
934 	adev->gfx.rlc_fw = NULL;
935 	release_firmware(adev->gfx.mec_fw);
936 	adev->gfx.mec_fw = NULL;
937 	if ((adev->asic_type != CHIP_STONEY) &&
938 	    (adev->asic_type != CHIP_TOPAZ))
939 		release_firmware(adev->gfx.mec2_fw);
940 	adev->gfx.mec2_fw = NULL;
941 
942 	kfree(adev->gfx.rlc.register_list_format);
943 }
944 
945 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
946 {
947 	const char *chip_name;
948 	char fw_name[30];
949 	int err;
950 	struct amdgpu_firmware_info *info = NULL;
951 	const struct common_firmware_header *header = NULL;
952 	const struct gfx_firmware_header_v1_0 *cp_hdr;
953 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
954 	unsigned int *tmp = NULL, i;
955 
956 	DRM_DEBUG("\n");
957 
958 	switch (adev->asic_type) {
959 	case CHIP_TOPAZ:
960 		chip_name = "topaz";
961 		break;
962 	case CHIP_TONGA:
963 		chip_name = "tonga";
964 		break;
965 	case CHIP_CARRIZO:
966 		chip_name = "carrizo";
967 		break;
968 	case CHIP_FIJI:
969 		chip_name = "fiji";
970 		break;
971 	case CHIP_STONEY:
972 		chip_name = "stoney";
973 		break;
974 	case CHIP_POLARIS10:
975 		chip_name = "polaris10";
976 		break;
977 	case CHIP_POLARIS11:
978 		chip_name = "polaris11";
979 		break;
980 	case CHIP_POLARIS12:
981 		chip_name = "polaris12";
982 		break;
983 	case CHIP_VEGAM:
984 		chip_name = "vegam";
985 		break;
986 	default:
987 		BUG();
988 	}
989 
990 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
991 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
992 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
993 		if (err == -ENOENT) {
994 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
995 			err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
996 		}
997 	} else {
998 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999 		err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000 	}
1001 	if (err)
1002 		goto out;
1003 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1004 	if (err)
1005 		goto out;
1006 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1007 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1008 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1009 
1010 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1011 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1012 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1013 		if (err == -ENOENT) {
1014 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1015 			err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1016 		}
1017 	} else {
1018 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019 		err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020 	}
1021 	if (err)
1022 		goto out;
1023 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1024 	if (err)
1025 		goto out;
1026 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1027 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1028 
1029 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1030 
1031 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1032 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1033 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1034 		if (err == -ENOENT) {
1035 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1036 			err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1037 		}
1038 	} else {
1039 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040 		err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041 	}
1042 	if (err)
1043 		goto out;
1044 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1045 	if (err)
1046 		goto out;
1047 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1048 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1049 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1050 
1051 	/*
1052 	 * Support for MCBP/Virtualization in combination with chained IBs is
1053 	 * formal released on feature version #46
1054 	 */
1055 	if (adev->gfx.ce_feature_version >= 46 &&
1056 	    adev->gfx.pfp_feature_version >= 46) {
1057 		adev->virt.chained_ib_support = true;
1058 		DRM_INFO("Chained IB support enabled!\n");
1059 	} else
1060 		adev->virt.chained_ib_support = false;
1061 
1062 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1063 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1064 	if (err)
1065 		goto out;
1066 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1067 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1068 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1069 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1070 
1071 	adev->gfx.rlc.save_and_restore_offset =
1072 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1073 	adev->gfx.rlc.clear_state_descriptor_offset =
1074 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1075 	adev->gfx.rlc.avail_scratch_ram_locations =
1076 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1077 	adev->gfx.rlc.reg_restore_list_size =
1078 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1079 	adev->gfx.rlc.reg_list_format_start =
1080 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1081 	adev->gfx.rlc.reg_list_format_separate_start =
1082 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1083 	adev->gfx.rlc.starting_offsets_start =
1084 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1085 	adev->gfx.rlc.reg_list_format_size_bytes =
1086 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1087 	adev->gfx.rlc.reg_list_size_bytes =
1088 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1089 
1090 	adev->gfx.rlc.register_list_format =
1091 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1092 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1093 
1094 	if (!adev->gfx.rlc.register_list_format) {
1095 		err = -ENOMEM;
1096 		goto out;
1097 	}
1098 
1099 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1101 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1102 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1103 
1104 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1105 
1106 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1107 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1108 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1109 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1110 
1111 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1112 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1113 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1114 		if (err == -ENOENT) {
1115 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1116 			err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1117 		}
1118 	} else {
1119 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120 		err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121 	}
1122 	if (err)
1123 		goto out;
1124 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1125 	if (err)
1126 		goto out;
1127 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1128 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1129 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1130 
1131 	if ((adev->asic_type != CHIP_STONEY) &&
1132 	    (adev->asic_type != CHIP_TOPAZ)) {
1133 		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1134 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1135 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1136 			if (err == -ENOENT) {
1137 				snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1138 				err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1139 			}
1140 		} else {
1141 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142 			err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143 		}
1144 		if (!err) {
1145 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1146 			if (err)
1147 				goto out;
1148 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1149 				adev->gfx.mec2_fw->data;
1150 			adev->gfx.mec2_fw_version =
1151 				le32_to_cpu(cp_hdr->header.ucode_version);
1152 			adev->gfx.mec2_feature_version =
1153 				le32_to_cpu(cp_hdr->ucode_feature_version);
1154 		} else {
1155 			err = 0;
1156 			adev->gfx.mec2_fw = NULL;
1157 		}
1158 	}
1159 
1160 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1161 	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1162 	info->fw = adev->gfx.pfp_fw;
1163 	header = (const struct common_firmware_header *)info->fw->data;
1164 	adev->firmware.fw_size +=
1165 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166 
1167 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1168 	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1169 	info->fw = adev->gfx.me_fw;
1170 	header = (const struct common_firmware_header *)info->fw->data;
1171 	adev->firmware.fw_size +=
1172 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173 
1174 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1175 	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1176 	info->fw = adev->gfx.ce_fw;
1177 	header = (const struct common_firmware_header *)info->fw->data;
1178 	adev->firmware.fw_size +=
1179 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180 
1181 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1182 	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1183 	info->fw = adev->gfx.rlc_fw;
1184 	header = (const struct common_firmware_header *)info->fw->data;
1185 	adev->firmware.fw_size +=
1186 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187 
1188 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1189 	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1190 	info->fw = adev->gfx.mec_fw;
1191 	header = (const struct common_firmware_header *)info->fw->data;
1192 	adev->firmware.fw_size +=
1193 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1194 
1195 	/* we need account JT in */
1196 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1197 	adev->firmware.fw_size +=
1198 		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1199 
1200 	if (amdgpu_sriov_vf(adev)) {
1201 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1202 		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1203 		info->fw = adev->gfx.mec_fw;
1204 		adev->firmware.fw_size +=
1205 			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1206 	}
1207 
1208 	if (adev->gfx.mec2_fw) {
1209 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1210 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1211 		info->fw = adev->gfx.mec2_fw;
1212 		header = (const struct common_firmware_header *)info->fw->data;
1213 		adev->firmware.fw_size +=
1214 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1215 	}
1216 
1217 out:
1218 	if (err) {
1219 		dev_err(adev->dev,
1220 			"gfx8: Failed to load firmware \"%s\"\n",
1221 			fw_name);
1222 		release_firmware(adev->gfx.pfp_fw);
1223 		adev->gfx.pfp_fw = NULL;
1224 		release_firmware(adev->gfx.me_fw);
1225 		adev->gfx.me_fw = NULL;
1226 		release_firmware(adev->gfx.ce_fw);
1227 		adev->gfx.ce_fw = NULL;
1228 		release_firmware(adev->gfx.rlc_fw);
1229 		adev->gfx.rlc_fw = NULL;
1230 		release_firmware(adev->gfx.mec_fw);
1231 		adev->gfx.mec_fw = NULL;
1232 		release_firmware(adev->gfx.mec2_fw);
1233 		adev->gfx.mec2_fw = NULL;
1234 	}
1235 	return err;
1236 }
1237 
1238 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1239 				    volatile u32 *buffer)
1240 {
1241 	u32 count = 0, i;
1242 	const struct cs_section_def *sect = NULL;
1243 	const struct cs_extent_def *ext = NULL;
1244 
1245 	if (adev->gfx.rlc.cs_data == NULL)
1246 		return;
1247 	if (buffer == NULL)
1248 		return;
1249 
1250 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1251 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1252 
1253 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1254 	buffer[count++] = cpu_to_le32(0x80000000);
1255 	buffer[count++] = cpu_to_le32(0x80000000);
1256 
1257 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1258 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1259 			if (sect->id == SECT_CONTEXT) {
1260 				buffer[count++] =
1261 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1262 				buffer[count++] = cpu_to_le32(ext->reg_index -
1263 						PACKET3_SET_CONTEXT_REG_START);
1264 				for (i = 0; i < ext->reg_count; i++)
1265 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1266 			} else {
1267 				return;
1268 			}
1269 		}
1270 	}
1271 
1272 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1273 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1274 			PACKET3_SET_CONTEXT_REG_START);
1275 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1276 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1277 
1278 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1279 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1280 
1281 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1282 	buffer[count++] = cpu_to_le32(0);
1283 }
1284 
1285 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1286 {
1287 	if (adev->asic_type == CHIP_CARRIZO)
1288 		return 5;
1289 	else
1290 		return 4;
1291 }
1292 
1293 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1294 {
1295 	const struct cs_section_def *cs_data;
1296 	int r;
1297 
1298 	adev->gfx.rlc.cs_data = vi_cs_data;
1299 
1300 	cs_data = adev->gfx.rlc.cs_data;
1301 
1302 	if (cs_data) {
1303 		/* init clear state block */
1304 		r = amdgpu_gfx_rlc_init_csb(adev);
1305 		if (r)
1306 			return r;
1307 	}
1308 
1309 	if ((adev->asic_type == CHIP_CARRIZO) ||
1310 	    (adev->asic_type == CHIP_STONEY)) {
1311 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1312 		r = amdgpu_gfx_rlc_init_cpt(adev);
1313 		if (r)
1314 			return r;
1315 	}
1316 
1317 	/* init spm vmid with 0xf */
1318 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1319 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1320 
1321 	return 0;
1322 }
1323 
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328 
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331 	int r;
1332 	u32 *hpd;
1333 	size_t mec_hpd_size;
1334 
1335 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336 
1337 	/* take ownership of the relevant compute queues */
1338 	amdgpu_gfx_compute_queue_acquire(adev);
1339 
1340 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341 	if (mec_hpd_size) {
1342 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343 					      AMDGPU_GEM_DOMAIN_VRAM |
1344 					      AMDGPU_GEM_DOMAIN_GTT,
1345 					      &adev->gfx.mec.hpd_eop_obj,
1346 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1347 					      (void **)&hpd);
1348 		if (r) {
1349 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1350 			return r;
1351 		}
1352 
1353 		memset(hpd, 0, mec_hpd_size);
1354 
1355 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1356 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1357 	}
1358 
1359 	return 0;
1360 }
1361 
1362 static const u32 vgpr_init_compute_shader[] =
1363 {
1364 	0x7e000209, 0x7e020208,
1365 	0x7e040207, 0x7e060206,
1366 	0x7e080205, 0x7e0a0204,
1367 	0x7e0c0203, 0x7e0e0202,
1368 	0x7e100201, 0x7e120200,
1369 	0x7e140209, 0x7e160208,
1370 	0x7e180207, 0x7e1a0206,
1371 	0x7e1c0205, 0x7e1e0204,
1372 	0x7e200203, 0x7e220202,
1373 	0x7e240201, 0x7e260200,
1374 	0x7e280209, 0x7e2a0208,
1375 	0x7e2c0207, 0x7e2e0206,
1376 	0x7e300205, 0x7e320204,
1377 	0x7e340203, 0x7e360202,
1378 	0x7e380201, 0x7e3a0200,
1379 	0x7e3c0209, 0x7e3e0208,
1380 	0x7e400207, 0x7e420206,
1381 	0x7e440205, 0x7e460204,
1382 	0x7e480203, 0x7e4a0202,
1383 	0x7e4c0201, 0x7e4e0200,
1384 	0x7e500209, 0x7e520208,
1385 	0x7e540207, 0x7e560206,
1386 	0x7e580205, 0x7e5a0204,
1387 	0x7e5c0203, 0x7e5e0202,
1388 	0x7e600201, 0x7e620200,
1389 	0x7e640209, 0x7e660208,
1390 	0x7e680207, 0x7e6a0206,
1391 	0x7e6c0205, 0x7e6e0204,
1392 	0x7e700203, 0x7e720202,
1393 	0x7e740201, 0x7e760200,
1394 	0x7e780209, 0x7e7a0208,
1395 	0x7e7c0207, 0x7e7e0206,
1396 	0xbf8a0000, 0xbf810000,
1397 };
1398 
1399 static const u32 sgpr_init_compute_shader[] =
1400 {
1401 	0xbe8a0100, 0xbe8c0102,
1402 	0xbe8e0104, 0xbe900106,
1403 	0xbe920108, 0xbe940100,
1404 	0xbe960102, 0xbe980104,
1405 	0xbe9a0106, 0xbe9c0108,
1406 	0xbe9e0100, 0xbea00102,
1407 	0xbea20104, 0xbea40106,
1408 	0xbea60108, 0xbea80100,
1409 	0xbeaa0102, 0xbeac0104,
1410 	0xbeae0106, 0xbeb00108,
1411 	0xbeb20100, 0xbeb40102,
1412 	0xbeb60104, 0xbeb80106,
1413 	0xbeba0108, 0xbebc0100,
1414 	0xbebe0102, 0xbec00104,
1415 	0xbec20106, 0xbec40108,
1416 	0xbec60100, 0xbec80102,
1417 	0xbee60004, 0xbee70005,
1418 	0xbeea0006, 0xbeeb0007,
1419 	0xbee80008, 0xbee90009,
1420 	0xbefc0000, 0xbf8a0000,
1421 	0xbf810000, 0x00000000,
1422 };
1423 
1424 static const u32 vgpr_init_regs[] =
1425 {
1426 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1427 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1428 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1429 	mmCOMPUTE_NUM_THREAD_Y, 1,
1430 	mmCOMPUTE_NUM_THREAD_Z, 1,
1431 	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1432 	mmCOMPUTE_PGM_RSRC2, 20,
1433 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1434 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1435 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1436 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1437 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1438 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1439 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1440 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1441 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1442 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1443 };
1444 
1445 static const u32 sgpr1_init_regs[] =
1446 {
1447 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1448 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1449 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1450 	mmCOMPUTE_NUM_THREAD_Y, 1,
1451 	mmCOMPUTE_NUM_THREAD_Z, 1,
1452 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1453 	mmCOMPUTE_PGM_RSRC2, 20,
1454 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1455 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1456 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1457 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1458 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1459 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1460 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1461 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1462 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1463 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1464 };
1465 
1466 static const u32 sgpr2_init_regs[] =
1467 {
1468 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1469 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1470 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1471 	mmCOMPUTE_NUM_THREAD_Y, 1,
1472 	mmCOMPUTE_NUM_THREAD_Z, 1,
1473 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1474 	mmCOMPUTE_PGM_RSRC2, 20,
1475 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1476 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1477 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1478 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1479 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1480 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1481 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1482 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1483 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1484 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1485 };
1486 
1487 static const u32 sec_ded_counter_registers[] =
1488 {
1489 	mmCPC_EDC_ATC_CNT,
1490 	mmCPC_EDC_SCRATCH_CNT,
1491 	mmCPC_EDC_UCODE_CNT,
1492 	mmCPF_EDC_ATC_CNT,
1493 	mmCPF_EDC_ROQ_CNT,
1494 	mmCPF_EDC_TAG_CNT,
1495 	mmCPG_EDC_ATC_CNT,
1496 	mmCPG_EDC_DMA_CNT,
1497 	mmCPG_EDC_TAG_CNT,
1498 	mmDC_EDC_CSINVOC_CNT,
1499 	mmDC_EDC_RESTORE_CNT,
1500 	mmDC_EDC_STATE_CNT,
1501 	mmGDS_EDC_CNT,
1502 	mmGDS_EDC_GRBM_CNT,
1503 	mmGDS_EDC_OA_DED,
1504 	mmSPI_EDC_CNT,
1505 	mmSQC_ATC_EDC_GATCL1_CNT,
1506 	mmSQC_EDC_CNT,
1507 	mmSQ_EDC_DED_CNT,
1508 	mmSQ_EDC_INFO,
1509 	mmSQ_EDC_SEC_CNT,
1510 	mmTCC_EDC_CNT,
1511 	mmTCP_ATC_EDC_GATCL1_CNT,
1512 	mmTCP_EDC_CNT,
1513 	mmTD_EDC_CNT
1514 };
1515 
1516 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1517 {
1518 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1519 	struct amdgpu_ib ib;
1520 	struct dma_fence *f = NULL;
1521 	int r, i;
1522 	u32 tmp;
1523 	unsigned total_size, vgpr_offset, sgpr_offset;
1524 	u64 gpu_addr;
1525 
1526 	/* only supported on CZ */
1527 	if (adev->asic_type != CHIP_CARRIZO)
1528 		return 0;
1529 
1530 	/* bail if the compute ring is not ready */
1531 	if (!ring->sched.ready)
1532 		return 0;
1533 
1534 	tmp = RREG32(mmGB_EDC_MODE);
1535 	WREG32(mmGB_EDC_MODE, 0);
1536 
1537 	total_size =
1538 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1539 	total_size +=
1540 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1541 	total_size +=
1542 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1543 	total_size = ALIGN(total_size, 256);
1544 	vgpr_offset = total_size;
1545 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1546 	sgpr_offset = total_size;
1547 	total_size += sizeof(sgpr_init_compute_shader);
1548 
1549 	/* allocate an indirect buffer to put the commands in */
1550 	memset(&ib, 0, sizeof(ib));
1551 	r = amdgpu_ib_get(adev, NULL, total_size,
1552 					AMDGPU_IB_POOL_DIRECT, &ib);
1553 	if (r) {
1554 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1555 		return r;
1556 	}
1557 
1558 	/* load the compute shaders */
1559 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1560 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1561 
1562 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1563 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1564 
1565 	/* init the ib length to 0 */
1566 	ib.length_dw = 0;
1567 
1568 	/* VGPR */
1569 	/* write the register state for the compute dispatch */
1570 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1571 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1572 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1573 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1574 	}
1575 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1576 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1577 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1578 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1579 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1580 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1581 
1582 	/* write dispatch packet */
1583 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1584 	ib.ptr[ib.length_dw++] = 8; /* x */
1585 	ib.ptr[ib.length_dw++] = 1; /* y */
1586 	ib.ptr[ib.length_dw++] = 1; /* z */
1587 	ib.ptr[ib.length_dw++] =
1588 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1589 
1590 	/* write CS partial flush packet */
1591 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1592 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1593 
1594 	/* SGPR1 */
1595 	/* write the register state for the compute dispatch */
1596 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1597 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1598 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1599 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1600 	}
1601 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1602 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1603 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1604 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1605 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1606 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1607 
1608 	/* write dispatch packet */
1609 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1610 	ib.ptr[ib.length_dw++] = 8; /* x */
1611 	ib.ptr[ib.length_dw++] = 1; /* y */
1612 	ib.ptr[ib.length_dw++] = 1; /* z */
1613 	ib.ptr[ib.length_dw++] =
1614 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1615 
1616 	/* write CS partial flush packet */
1617 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1618 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1619 
1620 	/* SGPR2 */
1621 	/* write the register state for the compute dispatch */
1622 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1623 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1624 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1625 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1626 	}
1627 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1628 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1629 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1630 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1631 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1632 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1633 
1634 	/* write dispatch packet */
1635 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1636 	ib.ptr[ib.length_dw++] = 8; /* x */
1637 	ib.ptr[ib.length_dw++] = 1; /* y */
1638 	ib.ptr[ib.length_dw++] = 1; /* z */
1639 	ib.ptr[ib.length_dw++] =
1640 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1641 
1642 	/* write CS partial flush packet */
1643 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1644 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1645 
1646 	/* shedule the ib on the ring */
1647 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1648 	if (r) {
1649 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1650 		goto fail;
1651 	}
1652 
1653 	/* wait for the GPU to finish processing the IB */
1654 	r = dma_fence_wait(f, false);
1655 	if (r) {
1656 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1657 		goto fail;
1658 	}
1659 
1660 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1661 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1662 	WREG32(mmGB_EDC_MODE, tmp);
1663 
1664 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1665 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1666 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1667 
1668 
1669 	/* read back registers to clear the counters */
1670 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1671 		RREG32(sec_ded_counter_registers[i]);
1672 
1673 fail:
1674 	amdgpu_ib_free(adev, &ib, NULL);
1675 	dma_fence_put(f);
1676 
1677 	return r;
1678 }
1679 
1680 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1681 {
1682 	u32 gb_addr_config;
1683 	u32 mc_arb_ramcfg;
1684 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1685 	u32 tmp;
1686 	int ret;
1687 
1688 	switch (adev->asic_type) {
1689 	case CHIP_TOPAZ:
1690 		adev->gfx.config.max_shader_engines = 1;
1691 		adev->gfx.config.max_tile_pipes = 2;
1692 		adev->gfx.config.max_cu_per_sh = 6;
1693 		adev->gfx.config.max_sh_per_se = 1;
1694 		adev->gfx.config.max_backends_per_se = 2;
1695 		adev->gfx.config.max_texture_channel_caches = 2;
1696 		adev->gfx.config.max_gprs = 256;
1697 		adev->gfx.config.max_gs_threads = 32;
1698 		adev->gfx.config.max_hw_contexts = 8;
1699 
1700 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1701 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1702 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1703 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1704 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1705 		break;
1706 	case CHIP_FIJI:
1707 		adev->gfx.config.max_shader_engines = 4;
1708 		adev->gfx.config.max_tile_pipes = 16;
1709 		adev->gfx.config.max_cu_per_sh = 16;
1710 		adev->gfx.config.max_sh_per_se = 1;
1711 		adev->gfx.config.max_backends_per_se = 4;
1712 		adev->gfx.config.max_texture_channel_caches = 16;
1713 		adev->gfx.config.max_gprs = 256;
1714 		adev->gfx.config.max_gs_threads = 32;
1715 		adev->gfx.config.max_hw_contexts = 8;
1716 
1717 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1718 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1719 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1720 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1721 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1722 		break;
1723 	case CHIP_POLARIS11:
1724 	case CHIP_POLARIS12:
1725 		ret = amdgpu_atombios_get_gfx_info(adev);
1726 		if (ret)
1727 			return ret;
1728 		adev->gfx.config.max_gprs = 256;
1729 		adev->gfx.config.max_gs_threads = 32;
1730 		adev->gfx.config.max_hw_contexts = 8;
1731 
1732 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1733 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1734 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1735 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1736 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1737 		break;
1738 	case CHIP_POLARIS10:
1739 	case CHIP_VEGAM:
1740 		ret = amdgpu_atombios_get_gfx_info(adev);
1741 		if (ret)
1742 			return ret;
1743 		adev->gfx.config.max_gprs = 256;
1744 		adev->gfx.config.max_gs_threads = 32;
1745 		adev->gfx.config.max_hw_contexts = 8;
1746 
1747 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752 		break;
1753 	case CHIP_TONGA:
1754 		adev->gfx.config.max_shader_engines = 4;
1755 		adev->gfx.config.max_tile_pipes = 8;
1756 		adev->gfx.config.max_cu_per_sh = 8;
1757 		adev->gfx.config.max_sh_per_se = 1;
1758 		adev->gfx.config.max_backends_per_se = 2;
1759 		adev->gfx.config.max_texture_channel_caches = 8;
1760 		adev->gfx.config.max_gprs = 256;
1761 		adev->gfx.config.max_gs_threads = 32;
1762 		adev->gfx.config.max_hw_contexts = 8;
1763 
1764 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769 		break;
1770 	case CHIP_CARRIZO:
1771 		adev->gfx.config.max_shader_engines = 1;
1772 		adev->gfx.config.max_tile_pipes = 2;
1773 		adev->gfx.config.max_sh_per_se = 1;
1774 		adev->gfx.config.max_backends_per_se = 2;
1775 		adev->gfx.config.max_cu_per_sh = 8;
1776 		adev->gfx.config.max_texture_channel_caches = 2;
1777 		adev->gfx.config.max_gprs = 256;
1778 		adev->gfx.config.max_gs_threads = 32;
1779 		adev->gfx.config.max_hw_contexts = 8;
1780 
1781 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1782 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1783 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1784 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1785 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1786 		break;
1787 	case CHIP_STONEY:
1788 		adev->gfx.config.max_shader_engines = 1;
1789 		adev->gfx.config.max_tile_pipes = 2;
1790 		adev->gfx.config.max_sh_per_se = 1;
1791 		adev->gfx.config.max_backends_per_se = 1;
1792 		adev->gfx.config.max_cu_per_sh = 3;
1793 		adev->gfx.config.max_texture_channel_caches = 2;
1794 		adev->gfx.config.max_gprs = 256;
1795 		adev->gfx.config.max_gs_threads = 16;
1796 		adev->gfx.config.max_hw_contexts = 8;
1797 
1798 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1799 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1800 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1801 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1802 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1803 		break;
1804 	default:
1805 		adev->gfx.config.max_shader_engines = 2;
1806 		adev->gfx.config.max_tile_pipes = 4;
1807 		adev->gfx.config.max_cu_per_sh = 2;
1808 		adev->gfx.config.max_sh_per_se = 1;
1809 		adev->gfx.config.max_backends_per_se = 2;
1810 		adev->gfx.config.max_texture_channel_caches = 4;
1811 		adev->gfx.config.max_gprs = 256;
1812 		adev->gfx.config.max_gs_threads = 32;
1813 		adev->gfx.config.max_hw_contexts = 8;
1814 
1815 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1816 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1817 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1818 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1819 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1820 		break;
1821 	}
1822 
1823 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1824 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1825 
1826 	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1827 				MC_ARB_RAMCFG, NOOFBANK);
1828 	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1829 				MC_ARB_RAMCFG, NOOFRANKS);
1830 
1831 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1832 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1833 	if (adev->flags & AMD_IS_APU) {
1834 		/* Get memory bank mapping mode. */
1835 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1836 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1837 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1838 
1839 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1840 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1841 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1842 
1843 		/* Validate settings in case only one DIMM installed. */
1844 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1845 			dimm00_addr_map = 0;
1846 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1847 			dimm01_addr_map = 0;
1848 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1849 			dimm10_addr_map = 0;
1850 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1851 			dimm11_addr_map = 0;
1852 
1853 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1854 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1855 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1856 			adev->gfx.config.mem_row_size_in_kb = 2;
1857 		else
1858 			adev->gfx.config.mem_row_size_in_kb = 1;
1859 	} else {
1860 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1861 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1862 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1863 			adev->gfx.config.mem_row_size_in_kb = 4;
1864 	}
1865 
1866 	adev->gfx.config.shader_engine_tile_size = 32;
1867 	adev->gfx.config.num_gpus = 1;
1868 	adev->gfx.config.multi_gpu_tile_size = 64;
1869 
1870 	/* fix up row size */
1871 	switch (adev->gfx.config.mem_row_size_in_kb) {
1872 	case 1:
1873 	default:
1874 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1875 		break;
1876 	case 2:
1877 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1878 		break;
1879 	case 4:
1880 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1881 		break;
1882 	}
1883 	adev->gfx.config.gb_addr_config = gb_addr_config;
1884 
1885 	return 0;
1886 }
1887 
1888 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1889 					int mec, int pipe, int queue)
1890 {
1891 	int r;
1892 	unsigned irq_type;
1893 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1894 	unsigned int hw_prio;
1895 
1896 	ring = &adev->gfx.compute_ring[ring_id];
1897 
1898 	/* mec0 is me1 */
1899 	ring->me = mec + 1;
1900 	ring->pipe = pipe;
1901 	ring->queue = queue;
1902 
1903 	ring->ring_obj = NULL;
1904 	ring->use_doorbell = true;
1905 	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1906 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1907 				+ (ring_id * GFX8_MEC_HPD_SIZE);
1908 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1909 
1910 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1911 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1912 		+ ring->pipe;
1913 
1914 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1915 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1916 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1917 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1918 			     hw_prio, NULL);
1919 	if (r)
1920 		return r;
1921 
1922 
1923 	return 0;
1924 }
1925 
1926 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1927 
1928 static int gfx_v8_0_sw_init(void *handle)
1929 {
1930 	int i, j, k, r, ring_id;
1931 	struct amdgpu_ring *ring;
1932 	struct amdgpu_kiq *kiq;
1933 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1934 
1935 	switch (adev->asic_type) {
1936 	case CHIP_TONGA:
1937 	case CHIP_CARRIZO:
1938 	case CHIP_FIJI:
1939 	case CHIP_POLARIS10:
1940 	case CHIP_POLARIS11:
1941 	case CHIP_POLARIS12:
1942 	case CHIP_VEGAM:
1943 		adev->gfx.mec.num_mec = 2;
1944 		break;
1945 	case CHIP_TOPAZ:
1946 	case CHIP_STONEY:
1947 	default:
1948 		adev->gfx.mec.num_mec = 1;
1949 		break;
1950 	}
1951 
1952 	adev->gfx.mec.num_pipe_per_mec = 4;
1953 	adev->gfx.mec.num_queue_per_pipe = 8;
1954 
1955 	/* EOP Event */
1956 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1957 	if (r)
1958 		return r;
1959 
1960 	/* Privileged reg */
1961 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1962 			      &adev->gfx.priv_reg_irq);
1963 	if (r)
1964 		return r;
1965 
1966 	/* Privileged inst */
1967 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1968 			      &adev->gfx.priv_inst_irq);
1969 	if (r)
1970 		return r;
1971 
1972 	/* Add CP EDC/ECC irq  */
1973 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1974 			      &adev->gfx.cp_ecc_error_irq);
1975 	if (r)
1976 		return r;
1977 
1978 	/* SQ interrupts. */
1979 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1980 			      &adev->gfx.sq_irq);
1981 	if (r) {
1982 		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1983 		return r;
1984 	}
1985 
1986 	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1987 
1988 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1989 
1990 	r = gfx_v8_0_init_microcode(adev);
1991 	if (r) {
1992 		DRM_ERROR("Failed to load gfx firmware!\n");
1993 		return r;
1994 	}
1995 
1996 	r = adev->gfx.rlc.funcs->init(adev);
1997 	if (r) {
1998 		DRM_ERROR("Failed to init rlc BOs!\n");
1999 		return r;
2000 	}
2001 
2002 	r = gfx_v8_0_mec_init(adev);
2003 	if (r) {
2004 		DRM_ERROR("Failed to init MEC BOs!\n");
2005 		return r;
2006 	}
2007 
2008 	/* set up the gfx ring */
2009 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2010 		ring = &adev->gfx.gfx_ring[i];
2011 		ring->ring_obj = NULL;
2012 		sprintf(ring->name, "gfx");
2013 		/* no gfx doorbells on iceland */
2014 		if (adev->asic_type != CHIP_TOPAZ) {
2015 			ring->use_doorbell = true;
2016 			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2017 		}
2018 
2019 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2020 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2021 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2022 		if (r)
2023 			return r;
2024 	}
2025 
2026 
2027 	/* set up the compute queues - allocate horizontally across pipes */
2028 	ring_id = 0;
2029 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2030 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2031 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2032 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2033 					continue;
2034 
2035 				r = gfx_v8_0_compute_ring_init(adev,
2036 								ring_id,
2037 								i, k, j);
2038 				if (r)
2039 					return r;
2040 
2041 				ring_id++;
2042 			}
2043 		}
2044 	}
2045 
2046 	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2047 	if (r) {
2048 		DRM_ERROR("Failed to init KIQ BOs!\n");
2049 		return r;
2050 	}
2051 
2052 	kiq = &adev->gfx.kiq;
2053 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2054 	if (r)
2055 		return r;
2056 
2057 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2058 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2059 	if (r)
2060 		return r;
2061 
2062 	adev->gfx.ce_ram_size = 0x8000;
2063 
2064 	r = gfx_v8_0_gpu_early_init(adev);
2065 	if (r)
2066 		return r;
2067 
2068 	return 0;
2069 }
2070 
2071 static int gfx_v8_0_sw_fini(void *handle)
2072 {
2073 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2074 	int i;
2075 
2076 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2077 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2078 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2079 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2080 
2081 	amdgpu_gfx_mqd_sw_fini(adev);
2082 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2083 	amdgpu_gfx_kiq_fini(adev);
2084 
2085 	gfx_v8_0_mec_fini(adev);
2086 	amdgpu_gfx_rlc_fini(adev);
2087 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2088 				&adev->gfx.rlc.clear_state_gpu_addr,
2089 				(void **)&adev->gfx.rlc.cs_ptr);
2090 	if ((adev->asic_type == CHIP_CARRIZO) ||
2091 	    (adev->asic_type == CHIP_STONEY)) {
2092 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2093 				&adev->gfx.rlc.cp_table_gpu_addr,
2094 				(void **)&adev->gfx.rlc.cp_table_ptr);
2095 	}
2096 	gfx_v8_0_free_microcode(adev);
2097 
2098 	return 0;
2099 }
2100 
2101 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2102 {
2103 	uint32_t *modearray, *mod2array;
2104 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2105 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2106 	u32 reg_offset;
2107 
2108 	modearray = adev->gfx.config.tile_mode_array;
2109 	mod2array = adev->gfx.config.macrotile_mode_array;
2110 
2111 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2112 		modearray[reg_offset] = 0;
2113 
2114 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2115 		mod2array[reg_offset] = 0;
2116 
2117 	switch (adev->asic_type) {
2118 	case CHIP_TOPAZ:
2119 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2120 				PIPE_CONFIG(ADDR_SURF_P2) |
2121 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2122 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2123 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 				PIPE_CONFIG(ADDR_SURF_P2) |
2125 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2126 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2127 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128 				PIPE_CONFIG(ADDR_SURF_P2) |
2129 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2130 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2131 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2132 				PIPE_CONFIG(ADDR_SURF_P2) |
2133 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2134 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2135 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2136 				PIPE_CONFIG(ADDR_SURF_P2) |
2137 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2138 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2139 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2140 				PIPE_CONFIG(ADDR_SURF_P2) |
2141 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2142 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2143 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2144 				PIPE_CONFIG(ADDR_SURF_P2) |
2145 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2146 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2147 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2148 				PIPE_CONFIG(ADDR_SURF_P2));
2149 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2150 				PIPE_CONFIG(ADDR_SURF_P2) |
2151 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2152 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154 				 PIPE_CONFIG(ADDR_SURF_P2) |
2155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2157 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 				 PIPE_CONFIG(ADDR_SURF_P2) |
2159 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2160 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2162 				 PIPE_CONFIG(ADDR_SURF_P2) |
2163 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2164 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2165 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166 				 PIPE_CONFIG(ADDR_SURF_P2) |
2167 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2168 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2170 				 PIPE_CONFIG(ADDR_SURF_P2) |
2171 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2172 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2174 				 PIPE_CONFIG(ADDR_SURF_P2) |
2175 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2176 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2177 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2178 				 PIPE_CONFIG(ADDR_SURF_P2) |
2179 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2180 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2181 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2182 				 PIPE_CONFIG(ADDR_SURF_P2) |
2183 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2184 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2185 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2186 				 PIPE_CONFIG(ADDR_SURF_P2) |
2187 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2188 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2189 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2190 				 PIPE_CONFIG(ADDR_SURF_P2) |
2191 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2192 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2193 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2194 				 PIPE_CONFIG(ADDR_SURF_P2) |
2195 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2196 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2197 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2198 				 PIPE_CONFIG(ADDR_SURF_P2) |
2199 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2201 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2202 				 PIPE_CONFIG(ADDR_SURF_P2) |
2203 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2204 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2205 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2206 				 PIPE_CONFIG(ADDR_SURF_P2) |
2207 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2208 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2210 				 PIPE_CONFIG(ADDR_SURF_P2) |
2211 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2212 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2214 				 PIPE_CONFIG(ADDR_SURF_P2) |
2215 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2216 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2217 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2218 				 PIPE_CONFIG(ADDR_SURF_P2) |
2219 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2220 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2221 
2222 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2223 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2224 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2225 				NUM_BANKS(ADDR_SURF_8_BANK));
2226 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2227 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2228 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2229 				NUM_BANKS(ADDR_SURF_8_BANK));
2230 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2231 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2232 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2233 				NUM_BANKS(ADDR_SURF_8_BANK));
2234 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2235 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2236 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2237 				NUM_BANKS(ADDR_SURF_8_BANK));
2238 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2239 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2240 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2241 				NUM_BANKS(ADDR_SURF_8_BANK));
2242 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2243 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2244 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2245 				NUM_BANKS(ADDR_SURF_8_BANK));
2246 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2247 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2248 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2249 				NUM_BANKS(ADDR_SURF_8_BANK));
2250 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2252 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2253 				NUM_BANKS(ADDR_SURF_16_BANK));
2254 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2255 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2257 				NUM_BANKS(ADDR_SURF_16_BANK));
2258 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2259 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2261 				 NUM_BANKS(ADDR_SURF_16_BANK));
2262 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2263 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2264 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265 				 NUM_BANKS(ADDR_SURF_16_BANK));
2266 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2268 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2269 				 NUM_BANKS(ADDR_SURF_16_BANK));
2270 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273 				 NUM_BANKS(ADDR_SURF_16_BANK));
2274 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2276 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277 				 NUM_BANKS(ADDR_SURF_8_BANK));
2278 
2279 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2280 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2281 			    reg_offset != 23)
2282 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2283 
2284 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2285 			if (reg_offset != 7)
2286 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2287 
2288 		break;
2289 	case CHIP_FIJI:
2290 	case CHIP_VEGAM:
2291 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2293 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2294 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2295 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2296 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2297 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2298 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2299 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2300 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2301 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2302 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2303 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2304 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2305 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2306 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2307 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2310 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2312 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2314 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2318 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2320 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2321 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2322 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2324 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2325 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2326 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2328 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2329 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2333 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2334 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2337 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2338 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2341 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2349 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2350 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2353 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2354 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2357 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2358 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2361 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2362 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2365 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2369 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2370 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2373 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2374 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2377 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2382 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2390 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2394 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2398 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2409 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2410 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2413 
2414 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2415 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2416 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2417 				NUM_BANKS(ADDR_SURF_8_BANK));
2418 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2419 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2420 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2421 				NUM_BANKS(ADDR_SURF_8_BANK));
2422 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2423 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2424 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2425 				NUM_BANKS(ADDR_SURF_8_BANK));
2426 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2427 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2428 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2429 				NUM_BANKS(ADDR_SURF_8_BANK));
2430 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2432 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2433 				NUM_BANKS(ADDR_SURF_8_BANK));
2434 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2436 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2437 				NUM_BANKS(ADDR_SURF_8_BANK));
2438 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2440 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2441 				NUM_BANKS(ADDR_SURF_8_BANK));
2442 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2444 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 				NUM_BANKS(ADDR_SURF_8_BANK));
2446 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 				NUM_BANKS(ADDR_SURF_8_BANK));
2450 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453 				 NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 				 NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2460 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 				 NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 				 NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 				 NUM_BANKS(ADDR_SURF_4_BANK));
2470 
2471 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2472 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2473 
2474 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2475 			if (reg_offset != 7)
2476 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2477 
2478 		break;
2479 	case CHIP_TONGA:
2480 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2481 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2482 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2483 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2484 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2485 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2486 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2487 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2488 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2489 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2490 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2491 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2492 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2494 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2495 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2496 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2499 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2501 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2503 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2505 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2507 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2509 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2510 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2511 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2513 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2514 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2515 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2517 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2521 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2522 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2523 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2525 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2526 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2527 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2528 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2529 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2530 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2533 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2537 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2538 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2539 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2541 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2544 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2545 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2547 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2548 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2550 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2551 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2554 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2558 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2559 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2562 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2563 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2566 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2571 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2579 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2583 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2587 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2594 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2595 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2598 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2602 
2603 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2605 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2606 				NUM_BANKS(ADDR_SURF_16_BANK));
2607 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610 				NUM_BANKS(ADDR_SURF_16_BANK));
2611 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614 				NUM_BANKS(ADDR_SURF_16_BANK));
2615 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2617 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2618 				NUM_BANKS(ADDR_SURF_16_BANK));
2619 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626 				NUM_BANKS(ADDR_SURF_16_BANK));
2627 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2641 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2642 				 NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646 				 NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2650 				 NUM_BANKS(ADDR_SURF_8_BANK));
2651 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654 				 NUM_BANKS(ADDR_SURF_4_BANK));
2655 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658 				 NUM_BANKS(ADDR_SURF_4_BANK));
2659 
2660 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2661 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2662 
2663 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2664 			if (reg_offset != 7)
2665 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2666 
2667 		break;
2668 	case CHIP_POLARIS11:
2669 	case CHIP_POLARIS12:
2670 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2673 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2674 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2675 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2677 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2678 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2680 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2681 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2682 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2683 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2685 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2686 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2687 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2688 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2689 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2690 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2693 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2694 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2695 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2697 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2698 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2699 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2701 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2703 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2704 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2705 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2707 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2708 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2711 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2715 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2716 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2717 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2719 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2720 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2723 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2724 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2727 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2728 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2729 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2731 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2732 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2733 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2735 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2736 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2737 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2739 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2740 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2741 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2743 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2744 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2745 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2747 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2748 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2749 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2751 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2752 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2753 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2755 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2756 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2757 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2759 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2760 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2761 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2763 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2764 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2765 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2768 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2769 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2771 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2773 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2777 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2779 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2780 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2783 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2784 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2785 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2787 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2788 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2791 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2792 
2793 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2795 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2796 				NUM_BANKS(ADDR_SURF_16_BANK));
2797 
2798 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2800 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2801 				NUM_BANKS(ADDR_SURF_16_BANK));
2802 
2803 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2804 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2805 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2806 				NUM_BANKS(ADDR_SURF_16_BANK));
2807 
2808 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811 				NUM_BANKS(ADDR_SURF_16_BANK));
2812 
2813 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2815 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2816 				NUM_BANKS(ADDR_SURF_16_BANK));
2817 
2818 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2820 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2821 				NUM_BANKS(ADDR_SURF_16_BANK));
2822 
2823 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2825 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2826 				NUM_BANKS(ADDR_SURF_16_BANK));
2827 
2828 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 
2833 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 
2838 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841 				NUM_BANKS(ADDR_SURF_16_BANK));
2842 
2843 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 				NUM_BANKS(ADDR_SURF_16_BANK));
2847 
2848 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 
2853 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856 				NUM_BANKS(ADDR_SURF_8_BANK));
2857 
2858 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2861 				NUM_BANKS(ADDR_SURF_4_BANK));
2862 
2863 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2864 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2865 
2866 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2867 			if (reg_offset != 7)
2868 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2869 
2870 		break;
2871 	case CHIP_POLARIS10:
2872 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2874 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2875 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2876 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2878 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2879 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2880 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2882 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2883 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2884 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2886 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2887 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2888 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2889 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2890 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2891 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2892 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2893 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2894 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2895 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2896 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2898 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2899 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2900 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2905 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2906 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2907 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2909 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2913 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2914 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2915 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2917 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2918 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2919 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2921 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2922 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2926 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2927 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2928 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2930 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2931 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2933 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2934 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2937 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2938 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2940 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2941 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2942 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2943 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2945 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2946 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2947 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2949 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2950 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2951 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2953 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2954 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2955 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2957 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2958 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2959 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2961 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2962 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2963 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2964 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2965 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2966 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2967 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2970 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2971 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2973 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2975 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2979 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2981 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2982 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2985 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2986 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2987 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2989 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2990 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2991 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2993 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2994 
2995 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2997 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 				NUM_BANKS(ADDR_SURF_16_BANK));
2999 
3000 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 				NUM_BANKS(ADDR_SURF_16_BANK));
3004 
3005 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3006 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3007 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3008 				NUM_BANKS(ADDR_SURF_16_BANK));
3009 
3010 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013 				NUM_BANKS(ADDR_SURF_16_BANK));
3014 
3015 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 
3020 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3022 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3023 				NUM_BANKS(ADDR_SURF_16_BANK));
3024 
3025 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3028 				NUM_BANKS(ADDR_SURF_16_BANK));
3029 
3030 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3032 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033 				NUM_BANKS(ADDR_SURF_16_BANK));
3034 
3035 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038 				NUM_BANKS(ADDR_SURF_16_BANK));
3039 
3040 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3042 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043 				NUM_BANKS(ADDR_SURF_16_BANK));
3044 
3045 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3047 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3048 				NUM_BANKS(ADDR_SURF_16_BANK));
3049 
3050 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3052 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3053 				NUM_BANKS(ADDR_SURF_8_BANK));
3054 
3055 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058 				NUM_BANKS(ADDR_SURF_4_BANK));
3059 
3060 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063 				NUM_BANKS(ADDR_SURF_4_BANK));
3064 
3065 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3066 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3067 
3068 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3069 			if (reg_offset != 7)
3070 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3071 
3072 		break;
3073 	case CHIP_STONEY:
3074 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3075 				PIPE_CONFIG(ADDR_SURF_P2) |
3076 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3077 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3078 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3079 				PIPE_CONFIG(ADDR_SURF_P2) |
3080 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3081 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3082 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3083 				PIPE_CONFIG(ADDR_SURF_P2) |
3084 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3085 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3086 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3087 				PIPE_CONFIG(ADDR_SURF_P2) |
3088 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3089 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3090 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3091 				PIPE_CONFIG(ADDR_SURF_P2) |
3092 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3093 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3094 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3095 				PIPE_CONFIG(ADDR_SURF_P2) |
3096 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3097 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3098 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099 				PIPE_CONFIG(ADDR_SURF_P2) |
3100 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3101 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3102 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3103 				PIPE_CONFIG(ADDR_SURF_P2));
3104 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3105 				PIPE_CONFIG(ADDR_SURF_P2) |
3106 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3107 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3108 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109 				 PIPE_CONFIG(ADDR_SURF_P2) |
3110 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3111 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3112 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3113 				 PIPE_CONFIG(ADDR_SURF_P2) |
3114 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3115 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3116 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3117 				 PIPE_CONFIG(ADDR_SURF_P2) |
3118 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3119 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3120 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3121 				 PIPE_CONFIG(ADDR_SURF_P2) |
3122 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3123 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3124 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3125 				 PIPE_CONFIG(ADDR_SURF_P2) |
3126 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3127 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3128 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3129 				 PIPE_CONFIG(ADDR_SURF_P2) |
3130 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3131 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3132 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3133 				 PIPE_CONFIG(ADDR_SURF_P2) |
3134 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3135 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3136 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3137 				 PIPE_CONFIG(ADDR_SURF_P2) |
3138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3140 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3141 				 PIPE_CONFIG(ADDR_SURF_P2) |
3142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3144 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3145 				 PIPE_CONFIG(ADDR_SURF_P2) |
3146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3148 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3149 				 PIPE_CONFIG(ADDR_SURF_P2) |
3150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3152 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3153 				 PIPE_CONFIG(ADDR_SURF_P2) |
3154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3156 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3157 				 PIPE_CONFIG(ADDR_SURF_P2) |
3158 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3159 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3160 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3161 				 PIPE_CONFIG(ADDR_SURF_P2) |
3162 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3163 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3165 				 PIPE_CONFIG(ADDR_SURF_P2) |
3166 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3167 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3168 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3169 				 PIPE_CONFIG(ADDR_SURF_P2) |
3170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3172 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3173 				 PIPE_CONFIG(ADDR_SURF_P2) |
3174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3176 
3177 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3178 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3179 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3180 				NUM_BANKS(ADDR_SURF_8_BANK));
3181 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3182 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3183 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3184 				NUM_BANKS(ADDR_SURF_8_BANK));
3185 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3188 				NUM_BANKS(ADDR_SURF_8_BANK));
3189 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3190 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3191 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3192 				NUM_BANKS(ADDR_SURF_8_BANK));
3193 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3194 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3195 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3196 				NUM_BANKS(ADDR_SURF_8_BANK));
3197 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3198 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3199 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3200 				NUM_BANKS(ADDR_SURF_8_BANK));
3201 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3202 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3203 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3204 				NUM_BANKS(ADDR_SURF_8_BANK));
3205 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208 				NUM_BANKS(ADDR_SURF_16_BANK));
3209 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3210 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3211 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212 				NUM_BANKS(ADDR_SURF_16_BANK));
3213 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3214 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3215 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3216 				 NUM_BANKS(ADDR_SURF_16_BANK));
3217 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3218 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3219 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3220 				 NUM_BANKS(ADDR_SURF_16_BANK));
3221 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3223 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3224 				 NUM_BANKS(ADDR_SURF_16_BANK));
3225 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3228 				 NUM_BANKS(ADDR_SURF_16_BANK));
3229 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232 				 NUM_BANKS(ADDR_SURF_8_BANK));
3233 
3234 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3235 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3236 			    reg_offset != 23)
3237 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3238 
3239 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3240 			if (reg_offset != 7)
3241 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3242 
3243 		break;
3244 	default:
3245 		dev_warn(adev->dev,
3246 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3247 			 adev->asic_type);
3248 		fallthrough;
3249 
3250 	case CHIP_CARRIZO:
3251 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3252 				PIPE_CONFIG(ADDR_SURF_P2) |
3253 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3254 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3255 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3256 				PIPE_CONFIG(ADDR_SURF_P2) |
3257 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3258 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3259 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3260 				PIPE_CONFIG(ADDR_SURF_P2) |
3261 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3262 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3263 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264 				PIPE_CONFIG(ADDR_SURF_P2) |
3265 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3266 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3267 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3268 				PIPE_CONFIG(ADDR_SURF_P2) |
3269 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3270 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3271 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3272 				PIPE_CONFIG(ADDR_SURF_P2) |
3273 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3274 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3275 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3276 				PIPE_CONFIG(ADDR_SURF_P2) |
3277 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3278 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3279 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3280 				PIPE_CONFIG(ADDR_SURF_P2));
3281 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3282 				PIPE_CONFIG(ADDR_SURF_P2) |
3283 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3284 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3285 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286 				 PIPE_CONFIG(ADDR_SURF_P2) |
3287 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3288 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3289 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290 				 PIPE_CONFIG(ADDR_SURF_P2) |
3291 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3292 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3293 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3294 				 PIPE_CONFIG(ADDR_SURF_P2) |
3295 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3296 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298 				 PIPE_CONFIG(ADDR_SURF_P2) |
3299 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3300 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3301 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3302 				 PIPE_CONFIG(ADDR_SURF_P2) |
3303 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3304 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3305 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3306 				 PIPE_CONFIG(ADDR_SURF_P2) |
3307 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3308 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3309 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3310 				 PIPE_CONFIG(ADDR_SURF_P2) |
3311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3313 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3314 				 PIPE_CONFIG(ADDR_SURF_P2) |
3315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3317 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3318 				 PIPE_CONFIG(ADDR_SURF_P2) |
3319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3321 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3322 				 PIPE_CONFIG(ADDR_SURF_P2) |
3323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3325 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3326 				 PIPE_CONFIG(ADDR_SURF_P2) |
3327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3329 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3330 				 PIPE_CONFIG(ADDR_SURF_P2) |
3331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3333 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3334 				 PIPE_CONFIG(ADDR_SURF_P2) |
3335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3337 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3338 				 PIPE_CONFIG(ADDR_SURF_P2) |
3339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3342 				 PIPE_CONFIG(ADDR_SURF_P2) |
3343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3345 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3346 				 PIPE_CONFIG(ADDR_SURF_P2) |
3347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3349 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3350 				 PIPE_CONFIG(ADDR_SURF_P2) |
3351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3353 
3354 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3356 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3357 				NUM_BANKS(ADDR_SURF_8_BANK));
3358 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3359 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3360 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 				NUM_BANKS(ADDR_SURF_8_BANK));
3362 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3363 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3364 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3365 				NUM_BANKS(ADDR_SURF_8_BANK));
3366 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3367 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3368 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3369 				NUM_BANKS(ADDR_SURF_8_BANK));
3370 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3372 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3373 				NUM_BANKS(ADDR_SURF_8_BANK));
3374 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3375 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3376 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3377 				NUM_BANKS(ADDR_SURF_8_BANK));
3378 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3379 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3380 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3381 				NUM_BANKS(ADDR_SURF_8_BANK));
3382 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385 				NUM_BANKS(ADDR_SURF_16_BANK));
3386 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389 				NUM_BANKS(ADDR_SURF_16_BANK));
3390 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3391 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3392 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3393 				 NUM_BANKS(ADDR_SURF_16_BANK));
3394 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3395 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3396 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3397 				 NUM_BANKS(ADDR_SURF_16_BANK));
3398 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3400 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3401 				 NUM_BANKS(ADDR_SURF_16_BANK));
3402 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3405 				 NUM_BANKS(ADDR_SURF_16_BANK));
3406 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409 				 NUM_BANKS(ADDR_SURF_8_BANK));
3410 
3411 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3412 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3413 			    reg_offset != 23)
3414 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3415 
3416 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3417 			if (reg_offset != 7)
3418 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3419 
3420 		break;
3421 	}
3422 }
3423 
3424 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3425 				  u32 se_num, u32 sh_num, u32 instance)
3426 {
3427 	u32 data;
3428 
3429 	if (instance == 0xffffffff)
3430 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3431 	else
3432 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3433 
3434 	if (se_num == 0xffffffff)
3435 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3436 	else
3437 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3438 
3439 	if (sh_num == 0xffffffff)
3440 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3441 	else
3442 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3443 
3444 	WREG32(mmGRBM_GFX_INDEX, data);
3445 }
3446 
3447 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3448 				  u32 me, u32 pipe, u32 q, u32 vm)
3449 {
3450 	vi_srbm_select(adev, me, pipe, q, vm);
3451 }
3452 
3453 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3454 {
3455 	u32 data, mask;
3456 
3457 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3458 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3459 
3460 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3461 
3462 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3463 					 adev->gfx.config.max_sh_per_se);
3464 
3465 	return (~data) & mask;
3466 }
3467 
3468 static void
3469 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3470 {
3471 	switch (adev->asic_type) {
3472 	case CHIP_FIJI:
3473 	case CHIP_VEGAM:
3474 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3475 			  RB_XSEL2(1) | PKR_MAP(2) |
3476 			  PKR_XSEL(1) | PKR_YSEL(1) |
3477 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3478 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3479 			   SE_PAIR_YSEL(2);
3480 		break;
3481 	case CHIP_TONGA:
3482 	case CHIP_POLARIS10:
3483 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3484 			  SE_XSEL(1) | SE_YSEL(1);
3485 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3486 			   SE_PAIR_YSEL(2);
3487 		break;
3488 	case CHIP_TOPAZ:
3489 	case CHIP_CARRIZO:
3490 		*rconf |= RB_MAP_PKR0(2);
3491 		*rconf1 |= 0x0;
3492 		break;
3493 	case CHIP_POLARIS11:
3494 	case CHIP_POLARIS12:
3495 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3496 			  SE_XSEL(1) | SE_YSEL(1);
3497 		*rconf1 |= 0x0;
3498 		break;
3499 	case CHIP_STONEY:
3500 		*rconf |= 0x0;
3501 		*rconf1 |= 0x0;
3502 		break;
3503 	default:
3504 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3505 		break;
3506 	}
3507 }
3508 
3509 static void
3510 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3511 					u32 raster_config, u32 raster_config_1,
3512 					unsigned rb_mask, unsigned num_rb)
3513 {
3514 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3515 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3516 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3517 	unsigned rb_per_se = num_rb / num_se;
3518 	unsigned se_mask[4];
3519 	unsigned se;
3520 
3521 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3522 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3523 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3524 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3525 
3526 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3527 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3528 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3529 
3530 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3531 			     (!se_mask[2] && !se_mask[3]))) {
3532 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3533 
3534 		if (!se_mask[0] && !se_mask[1]) {
3535 			raster_config_1 |=
3536 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3537 		} else {
3538 			raster_config_1 |=
3539 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3540 		}
3541 	}
3542 
3543 	for (se = 0; se < num_se; se++) {
3544 		unsigned raster_config_se = raster_config;
3545 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3546 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3547 		int idx = (se / 2) * 2;
3548 
3549 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3550 			raster_config_se &= ~SE_MAP_MASK;
3551 
3552 			if (!se_mask[idx]) {
3553 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3554 			} else {
3555 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3556 			}
3557 		}
3558 
3559 		pkr0_mask &= rb_mask;
3560 		pkr1_mask &= rb_mask;
3561 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3562 			raster_config_se &= ~PKR_MAP_MASK;
3563 
3564 			if (!pkr0_mask) {
3565 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3566 			} else {
3567 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3568 			}
3569 		}
3570 
3571 		if (rb_per_se >= 2) {
3572 			unsigned rb0_mask = 1 << (se * rb_per_se);
3573 			unsigned rb1_mask = rb0_mask << 1;
3574 
3575 			rb0_mask &= rb_mask;
3576 			rb1_mask &= rb_mask;
3577 			if (!rb0_mask || !rb1_mask) {
3578 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3579 
3580 				if (!rb0_mask) {
3581 					raster_config_se |=
3582 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3583 				} else {
3584 					raster_config_se |=
3585 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3586 				}
3587 			}
3588 
3589 			if (rb_per_se > 2) {
3590 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3591 				rb1_mask = rb0_mask << 1;
3592 				rb0_mask &= rb_mask;
3593 				rb1_mask &= rb_mask;
3594 				if (!rb0_mask || !rb1_mask) {
3595 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3596 
3597 					if (!rb0_mask) {
3598 						raster_config_se |=
3599 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3600 					} else {
3601 						raster_config_se |=
3602 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3603 					}
3604 				}
3605 			}
3606 		}
3607 
3608 		/* GRBM_GFX_INDEX has a different offset on VI */
3609 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3610 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3611 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3612 	}
3613 
3614 	/* GRBM_GFX_INDEX has a different offset on VI */
3615 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3616 }
3617 
3618 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3619 {
3620 	int i, j;
3621 	u32 data;
3622 	u32 raster_config = 0, raster_config_1 = 0;
3623 	u32 active_rbs = 0;
3624 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3625 					adev->gfx.config.max_sh_per_se;
3626 	unsigned num_rb_pipes;
3627 
3628 	mutex_lock(&adev->grbm_idx_mutex);
3629 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3630 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3631 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3632 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3633 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3634 					       rb_bitmap_width_per_sh);
3635 		}
3636 	}
3637 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3638 
3639 	adev->gfx.config.backend_enable_mask = active_rbs;
3640 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3641 
3642 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3643 			     adev->gfx.config.max_shader_engines, 16);
3644 
3645 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3646 
3647 	if (!adev->gfx.config.backend_enable_mask ||
3648 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3649 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3650 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3651 	} else {
3652 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3653 							adev->gfx.config.backend_enable_mask,
3654 							num_rb_pipes);
3655 	}
3656 
3657 	/* cache the values for userspace */
3658 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3659 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3660 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3661 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3662 				RREG32(mmCC_RB_BACKEND_DISABLE);
3663 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3664 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3665 			adev->gfx.config.rb_config[i][j].raster_config =
3666 				RREG32(mmPA_SC_RASTER_CONFIG);
3667 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3668 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3669 		}
3670 	}
3671 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3672 	mutex_unlock(&adev->grbm_idx_mutex);
3673 }
3674 
3675 #define DEFAULT_SH_MEM_BASES	(0x6000)
3676 /**
3677  * gfx_v8_0_init_compute_vmid - gart enable
3678  *
3679  * @adev: amdgpu_device pointer
3680  *
3681  * Initialize compute vmid sh_mem registers
3682  *
3683  */
3684 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3685 {
3686 	int i;
3687 	uint32_t sh_mem_config;
3688 	uint32_t sh_mem_bases;
3689 
3690 	/*
3691 	 * Configure apertures:
3692 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3693 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3694 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3695 	 */
3696 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3697 
3698 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3699 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3700 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3701 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3702 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3703 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3704 
3705 	mutex_lock(&adev->srbm_mutex);
3706 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3707 		vi_srbm_select(adev, 0, 0, 0, i);
3708 		/* CP and shaders */
3709 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3710 		WREG32(mmSH_MEM_APE1_BASE, 1);
3711 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3712 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3713 	}
3714 	vi_srbm_select(adev, 0, 0, 0, 0);
3715 	mutex_unlock(&adev->srbm_mutex);
3716 
3717 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3718 	   access. These should be enabled by FW for target VMIDs. */
3719 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3720 		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3721 		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3722 		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3723 		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3724 	}
3725 }
3726 
3727 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3728 {
3729 	int vmid;
3730 
3731 	/*
3732 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3733 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3734 	 * the driver can enable them for graphics. VMID0 should maintain
3735 	 * access so that HWS firmware can save/restore entries.
3736 	 */
3737 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3738 		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3739 		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3740 		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3741 		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3742 	}
3743 }
3744 
3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3746 {
3747 	switch (adev->asic_type) {
3748 	default:
3749 		adev->gfx.config.double_offchip_lds_buf = 1;
3750 		break;
3751 	case CHIP_CARRIZO:
3752 	case CHIP_STONEY:
3753 		adev->gfx.config.double_offchip_lds_buf = 0;
3754 		break;
3755 	}
3756 }
3757 
3758 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3759 {
3760 	u32 tmp, sh_static_mem_cfg;
3761 	int i;
3762 
3763 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3764 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3767 
3768 	gfx_v8_0_tiling_mode_table_init(adev);
3769 	gfx_v8_0_setup_rb(adev);
3770 	gfx_v8_0_get_cu_info(adev);
3771 	gfx_v8_0_config_init(adev);
3772 
3773 	/* XXX SH_MEM regs */
3774 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3775 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3776 				   SWIZZLE_ENABLE, 1);
3777 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3778 				   ELEMENT_SIZE, 1);
3779 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3780 				   INDEX_STRIDE, 3);
3781 	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3782 
3783 	mutex_lock(&adev->srbm_mutex);
3784 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3785 		vi_srbm_select(adev, 0, 0, 0, i);
3786 		/* CP and shaders */
3787 		if (i == 0) {
3788 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3789 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792 			WREG32(mmSH_MEM_CONFIG, tmp);
3793 			WREG32(mmSH_MEM_BASES, 0);
3794 		} else {
3795 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3796 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799 			WREG32(mmSH_MEM_CONFIG, tmp);
3800 			tmp = adev->gmc.shared_aperture_start >> 48;
3801 			WREG32(mmSH_MEM_BASES, tmp);
3802 		}
3803 
3804 		WREG32(mmSH_MEM_APE1_BASE, 1);
3805 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3806 	}
3807 	vi_srbm_select(adev, 0, 0, 0, 0);
3808 	mutex_unlock(&adev->srbm_mutex);
3809 
3810 	gfx_v8_0_init_compute_vmid(adev);
3811 	gfx_v8_0_init_gds_vmid(adev);
3812 
3813 	mutex_lock(&adev->grbm_idx_mutex);
3814 	/*
3815 	 * making sure that the following register writes will be broadcasted
3816 	 * to all the shaders
3817 	 */
3818 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3819 
3820 	WREG32(mmPA_SC_FIFO_SIZE,
3821 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3822 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3823 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3824 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3825 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3826 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3827 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3828 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3829 
3830 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3831 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3832 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3833 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3834 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3835 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3836 
3837 	mutex_unlock(&adev->grbm_idx_mutex);
3838 
3839 }
3840 
3841 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3842 {
3843 	u32 i, j, k;
3844 	u32 mask;
3845 
3846 	mutex_lock(&adev->grbm_idx_mutex);
3847 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3848 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3849 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3850 			for (k = 0; k < adev->usec_timeout; k++) {
3851 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3852 					break;
3853 				udelay(1);
3854 			}
3855 			if (k == adev->usec_timeout) {
3856 				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3857 						      0xffffffff, 0xffffffff);
3858 				mutex_unlock(&adev->grbm_idx_mutex);
3859 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3860 					 i, j);
3861 				return;
3862 			}
3863 		}
3864 	}
3865 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3866 	mutex_unlock(&adev->grbm_idx_mutex);
3867 
3868 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3869 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3870 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3871 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3872 	for (k = 0; k < adev->usec_timeout; k++) {
3873 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3874 			break;
3875 		udelay(1);
3876 	}
3877 }
3878 
3879 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3880 					       bool enable)
3881 {
3882 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3883 
3884 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3885 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3886 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3887 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3888 
3889 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3890 }
3891 
3892 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3893 {
3894 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3895 	/* csib */
3896 	WREG32(mmRLC_CSIB_ADDR_HI,
3897 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3898 	WREG32(mmRLC_CSIB_ADDR_LO,
3899 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3900 	WREG32(mmRLC_CSIB_LENGTH,
3901 			adev->gfx.rlc.clear_state_size);
3902 }
3903 
3904 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3905 				int ind_offset,
3906 				int list_size,
3907 				int *unique_indices,
3908 				int *indices_count,
3909 				int max_indices,
3910 				int *ind_start_offsets,
3911 				int *offset_count,
3912 				int max_offset)
3913 {
3914 	int indices;
3915 	bool new_entry = true;
3916 
3917 	for (; ind_offset < list_size; ind_offset++) {
3918 
3919 		if (new_entry) {
3920 			new_entry = false;
3921 			ind_start_offsets[*offset_count] = ind_offset;
3922 			*offset_count = *offset_count + 1;
3923 			BUG_ON(*offset_count >= max_offset);
3924 		}
3925 
3926 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3927 			new_entry = true;
3928 			continue;
3929 		}
3930 
3931 		ind_offset += 2;
3932 
3933 		/* look for the matching indice */
3934 		for (indices = 0;
3935 			indices < *indices_count;
3936 			indices++) {
3937 			if (unique_indices[indices] ==
3938 				register_list_format[ind_offset])
3939 				break;
3940 		}
3941 
3942 		if (indices >= *indices_count) {
3943 			unique_indices[*indices_count] =
3944 				register_list_format[ind_offset];
3945 			indices = *indices_count;
3946 			*indices_count = *indices_count + 1;
3947 			BUG_ON(*indices_count >= max_indices);
3948 		}
3949 
3950 		register_list_format[ind_offset] = indices;
3951 	}
3952 }
3953 
3954 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3955 {
3956 	int i, temp, data;
3957 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3958 	int indices_count = 0;
3959 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3960 	int offset_count = 0;
3961 
3962 	int list_size;
3963 	unsigned int *register_list_format =
3964 		kmemdup(adev->gfx.rlc.register_list_format,
3965 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3966 	if (!register_list_format)
3967 		return -ENOMEM;
3968 
3969 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3970 				RLC_FormatDirectRegListLength,
3971 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3972 				unique_indices,
3973 				&indices_count,
3974 				ARRAY_SIZE(unique_indices),
3975 				indirect_start_offsets,
3976 				&offset_count,
3977 				ARRAY_SIZE(indirect_start_offsets));
3978 
3979 	/* save and restore list */
3980 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3981 
3982 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3983 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3984 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3985 
3986 	/* indirect list */
3987 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3988 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3989 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3990 
3991 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3992 	list_size = list_size >> 1;
3993 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3994 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3995 
3996 	/* starting offsets starts */
3997 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3998 		adev->gfx.rlc.starting_offsets_start);
3999 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4000 		WREG32(mmRLC_GPM_SCRATCH_DATA,
4001 				indirect_start_offsets[i]);
4002 
4003 	/* unique indices */
4004 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4005 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4006 	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4007 		if (unique_indices[i] != 0) {
4008 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4009 			WREG32(data + i, unique_indices[i] >> 20);
4010 		}
4011 	}
4012 	kfree(register_list_format);
4013 
4014 	return 0;
4015 }
4016 
4017 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4018 {
4019 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4020 }
4021 
4022 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4023 {
4024 	uint32_t data;
4025 
4026 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4027 
4028 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4029 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4030 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4031 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4032 	WREG32(mmRLC_PG_DELAY, data);
4033 
4034 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4035 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4036 
4037 }
4038 
4039 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4040 						bool enable)
4041 {
4042 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4043 }
4044 
4045 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4046 						  bool enable)
4047 {
4048 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4049 }
4050 
4051 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4052 {
4053 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4054 }
4055 
4056 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4057 {
4058 	if ((adev->asic_type == CHIP_CARRIZO) ||
4059 	    (adev->asic_type == CHIP_STONEY)) {
4060 		gfx_v8_0_init_csb(adev);
4061 		gfx_v8_0_init_save_restore_list(adev);
4062 		gfx_v8_0_enable_save_restore_machine(adev);
4063 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4064 		gfx_v8_0_init_power_gating(adev);
4065 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4066 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4067 		   (adev->asic_type == CHIP_POLARIS12) ||
4068 		   (adev->asic_type == CHIP_VEGAM)) {
4069 		gfx_v8_0_init_csb(adev);
4070 		gfx_v8_0_init_save_restore_list(adev);
4071 		gfx_v8_0_enable_save_restore_machine(adev);
4072 		gfx_v8_0_init_power_gating(adev);
4073 	}
4074 
4075 }
4076 
4077 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4078 {
4079 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4080 
4081 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4082 	gfx_v8_0_wait_for_rlc_serdes(adev);
4083 }
4084 
4085 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4086 {
4087 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4088 	udelay(50);
4089 
4090 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4091 	udelay(50);
4092 }
4093 
4094 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4095 {
4096 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4097 
4098 	/* carrizo do enable cp interrupt after cp inited */
4099 	if (!(adev->flags & AMD_IS_APU))
4100 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4101 
4102 	udelay(50);
4103 }
4104 
4105 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4106 {
4107 	if (amdgpu_sriov_vf(adev)) {
4108 		gfx_v8_0_init_csb(adev);
4109 		return 0;
4110 	}
4111 
4112 	adev->gfx.rlc.funcs->stop(adev);
4113 	adev->gfx.rlc.funcs->reset(adev);
4114 	gfx_v8_0_init_pg(adev);
4115 	adev->gfx.rlc.funcs->start(adev);
4116 
4117 	return 0;
4118 }
4119 
4120 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4121 {
4122 	u32 tmp = RREG32(mmCP_ME_CNTL);
4123 
4124 	if (enable) {
4125 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4126 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4127 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4128 	} else {
4129 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4130 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4131 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4132 	}
4133 	WREG32(mmCP_ME_CNTL, tmp);
4134 	udelay(50);
4135 }
4136 
4137 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4138 {
4139 	u32 count = 0;
4140 	const struct cs_section_def *sect = NULL;
4141 	const struct cs_extent_def *ext = NULL;
4142 
4143 	/* begin clear state */
4144 	count += 2;
4145 	/* context control state */
4146 	count += 3;
4147 
4148 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4149 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4150 			if (sect->id == SECT_CONTEXT)
4151 				count += 2 + ext->reg_count;
4152 			else
4153 				return 0;
4154 		}
4155 	}
4156 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4157 	count += 4;
4158 	/* end clear state */
4159 	count += 2;
4160 	/* clear state */
4161 	count += 2;
4162 
4163 	return count;
4164 }
4165 
4166 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4167 {
4168 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4169 	const struct cs_section_def *sect = NULL;
4170 	const struct cs_extent_def *ext = NULL;
4171 	int r, i;
4172 
4173 	/* init the CP */
4174 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4175 	WREG32(mmCP_ENDIAN_SWAP, 0);
4176 	WREG32(mmCP_DEVICE_ID, 1);
4177 
4178 	gfx_v8_0_cp_gfx_enable(adev, true);
4179 
4180 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4181 	if (r) {
4182 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4183 		return r;
4184 	}
4185 
4186 	/* clear state buffer */
4187 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4188 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4189 
4190 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4191 	amdgpu_ring_write(ring, 0x80000000);
4192 	amdgpu_ring_write(ring, 0x80000000);
4193 
4194 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4195 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4196 			if (sect->id == SECT_CONTEXT) {
4197 				amdgpu_ring_write(ring,
4198 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4199 					       ext->reg_count));
4200 				amdgpu_ring_write(ring,
4201 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4202 				for (i = 0; i < ext->reg_count; i++)
4203 					amdgpu_ring_write(ring, ext->extent[i]);
4204 			}
4205 		}
4206 	}
4207 
4208 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4209 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4210 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4211 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4212 
4213 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4214 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4215 
4216 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4217 	amdgpu_ring_write(ring, 0);
4218 
4219 	/* init the CE partitions */
4220 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4221 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4222 	amdgpu_ring_write(ring, 0x8000);
4223 	amdgpu_ring_write(ring, 0x8000);
4224 
4225 	amdgpu_ring_commit(ring);
4226 
4227 	return 0;
4228 }
4229 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4230 {
4231 	u32 tmp;
4232 	/* no gfx doorbells on iceland */
4233 	if (adev->asic_type == CHIP_TOPAZ)
4234 		return;
4235 
4236 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4237 
4238 	if (ring->use_doorbell) {
4239 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4240 				DOORBELL_OFFSET, ring->doorbell_index);
4241 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4242 						DOORBELL_HIT, 0);
4243 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4244 					    DOORBELL_EN, 1);
4245 	} else {
4246 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4247 	}
4248 
4249 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4250 
4251 	if (adev->flags & AMD_IS_APU)
4252 		return;
4253 
4254 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4255 					DOORBELL_RANGE_LOWER,
4256 					adev->doorbell_index.gfx_ring0);
4257 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4258 
4259 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4260 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4261 }
4262 
4263 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4264 {
4265 	struct amdgpu_ring *ring;
4266 	u32 tmp;
4267 	u32 rb_bufsz;
4268 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4269 
4270 	/* Set the write pointer delay */
4271 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4272 
4273 	/* set the RB to use vmid 0 */
4274 	WREG32(mmCP_RB_VMID, 0);
4275 
4276 	/* Set ring buffer size */
4277 	ring = &adev->gfx.gfx_ring[0];
4278 	rb_bufsz = order_base_2(ring->ring_size / 8);
4279 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4280 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4281 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4282 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4283 #ifdef __BIG_ENDIAN
4284 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4285 #endif
4286 	WREG32(mmCP_RB0_CNTL, tmp);
4287 
4288 	/* Initialize the ring buffer's read and write pointers */
4289 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4290 	ring->wptr = 0;
4291 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4292 
4293 	/* set the wb address wether it's enabled or not */
4294 	rptr_addr = ring->rptr_gpu_addr;
4295 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4296 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4297 
4298 	wptr_gpu_addr = ring->wptr_gpu_addr;
4299 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4300 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4301 	mdelay(1);
4302 	WREG32(mmCP_RB0_CNTL, tmp);
4303 
4304 	rb_addr = ring->gpu_addr >> 8;
4305 	WREG32(mmCP_RB0_BASE, rb_addr);
4306 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4307 
4308 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4309 	/* start the ring */
4310 	amdgpu_ring_clear_ring(ring);
4311 	gfx_v8_0_cp_gfx_start(adev);
4312 	ring->sched.ready = true;
4313 
4314 	return 0;
4315 }
4316 
4317 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4318 {
4319 	if (enable) {
4320 		WREG32(mmCP_MEC_CNTL, 0);
4321 	} else {
4322 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4323 		adev->gfx.kiq.ring.sched.ready = false;
4324 	}
4325 	udelay(50);
4326 }
4327 
4328 /* KIQ functions */
4329 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4330 {
4331 	uint32_t tmp;
4332 	struct amdgpu_device *adev = ring->adev;
4333 
4334 	/* tell RLC which is KIQ queue */
4335 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4336 	tmp &= 0xffffff00;
4337 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4338 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4339 	tmp |= 0x80;
4340 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4341 }
4342 
4343 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4344 {
4345 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4346 	uint64_t queue_mask = 0;
4347 	int r, i;
4348 
4349 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4350 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4351 			continue;
4352 
4353 		/* This situation may be hit in the future if a new HW
4354 		 * generation exposes more than 64 queues. If so, the
4355 		 * definition of queue_mask needs updating */
4356 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4357 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4358 			break;
4359 		}
4360 
4361 		queue_mask |= (1ull << i);
4362 	}
4363 
4364 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4365 	if (r) {
4366 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4367 		return r;
4368 	}
4369 	/* set resources */
4370 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4371 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4372 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4373 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4374 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4375 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4376 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4377 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4378 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4379 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4380 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4381 		uint64_t wptr_addr = ring->wptr_gpu_addr;
4382 
4383 		/* map queues */
4384 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4385 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4386 		amdgpu_ring_write(kiq_ring,
4387 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4388 		amdgpu_ring_write(kiq_ring,
4389 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4390 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4391 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4392 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4393 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4394 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4395 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4396 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4397 	}
4398 
4399 	amdgpu_ring_commit(kiq_ring);
4400 
4401 	return 0;
4402 }
4403 
4404 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4405 {
4406 	int i, r = 0;
4407 
4408 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4409 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4410 		for (i = 0; i < adev->usec_timeout; i++) {
4411 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4412 				break;
4413 			udelay(1);
4414 		}
4415 		if (i == adev->usec_timeout)
4416 			r = -ETIMEDOUT;
4417 	}
4418 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4419 	WREG32(mmCP_HQD_PQ_RPTR, 0);
4420 	WREG32(mmCP_HQD_PQ_WPTR, 0);
4421 
4422 	return r;
4423 }
4424 
4425 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4426 {
4427 	struct amdgpu_device *adev = ring->adev;
4428 
4429 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4430 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4431 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4432 			mqd->cp_hqd_queue_priority =
4433 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4434 		}
4435 	}
4436 }
4437 
4438 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4439 {
4440 	struct amdgpu_device *adev = ring->adev;
4441 	struct vi_mqd *mqd = ring->mqd_ptr;
4442 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4443 	uint32_t tmp;
4444 
4445 	mqd->header = 0xC0310800;
4446 	mqd->compute_pipelinestat_enable = 0x00000001;
4447 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4448 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4449 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4450 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4451 	mqd->compute_misc_reserved = 0x00000003;
4452 	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4453 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4454 	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4455 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4456 	eop_base_addr = ring->eop_gpu_addr >> 8;
4457 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4458 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4459 
4460 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4461 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4462 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4463 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4464 
4465 	mqd->cp_hqd_eop_control = tmp;
4466 
4467 	/* enable doorbell? */
4468 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4469 			    CP_HQD_PQ_DOORBELL_CONTROL,
4470 			    DOORBELL_EN,
4471 			    ring->use_doorbell ? 1 : 0);
4472 
4473 	mqd->cp_hqd_pq_doorbell_control = tmp;
4474 
4475 	/* set the pointer to the MQD */
4476 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4477 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4478 
4479 	/* set MQD vmid to 0 */
4480 	tmp = RREG32(mmCP_MQD_CONTROL);
4481 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4482 	mqd->cp_mqd_control = tmp;
4483 
4484 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4485 	hqd_gpu_addr = ring->gpu_addr >> 8;
4486 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4487 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4488 
4489 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4490 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4491 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4492 			    (order_base_2(ring->ring_size / 4) - 1));
4493 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4494 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4495 #ifdef __BIG_ENDIAN
4496 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4497 #endif
4498 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4499 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4500 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4501 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4502 	mqd->cp_hqd_pq_control = tmp;
4503 
4504 	/* set the wb address whether it's enabled or not */
4505 	wb_gpu_addr = ring->rptr_gpu_addr;
4506 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4507 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4508 		upper_32_bits(wb_gpu_addr) & 0xffff;
4509 
4510 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4511 	wb_gpu_addr = ring->wptr_gpu_addr;
4512 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4513 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4514 
4515 	tmp = 0;
4516 	/* enable the doorbell if requested */
4517 	if (ring->use_doorbell) {
4518 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4519 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4520 				DOORBELL_OFFSET, ring->doorbell_index);
4521 
4522 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4523 					 DOORBELL_EN, 1);
4524 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4525 					 DOORBELL_SOURCE, 0);
4526 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4527 					 DOORBELL_HIT, 0);
4528 	}
4529 
4530 	mqd->cp_hqd_pq_doorbell_control = tmp;
4531 
4532 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4533 	ring->wptr = 0;
4534 	mqd->cp_hqd_pq_wptr = ring->wptr;
4535 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4536 
4537 	/* set the vmid for the queue */
4538 	mqd->cp_hqd_vmid = 0;
4539 
4540 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4541 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4542 	mqd->cp_hqd_persistent_state = tmp;
4543 
4544 	/* set MTYPE */
4545 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4546 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4547 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4548 	mqd->cp_hqd_ib_control = tmp;
4549 
4550 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4551 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4552 	mqd->cp_hqd_iq_timer = tmp;
4553 
4554 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4555 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4556 	mqd->cp_hqd_ctx_save_control = tmp;
4557 
4558 	/* defaults */
4559 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4560 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4561 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4562 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4563 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4564 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4565 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4566 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4567 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4568 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4569 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4570 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4571 
4572 	/* set static priority for a queue/ring */
4573 	gfx_v8_0_mqd_set_priority(ring, mqd);
4574 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4575 
4576 	/* map_queues packet doesn't need activate the queue,
4577 	 * so only kiq need set this field.
4578 	 */
4579 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4580 		mqd->cp_hqd_active = 1;
4581 
4582 	return 0;
4583 }
4584 
4585 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4586 			struct vi_mqd *mqd)
4587 {
4588 	uint32_t mqd_reg;
4589 	uint32_t *mqd_data;
4590 
4591 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4592 	mqd_data = &mqd->cp_mqd_base_addr_lo;
4593 
4594 	/* disable wptr polling */
4595 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4596 
4597 	/* program all HQD registers */
4598 	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4599 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4600 
4601 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4602 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4603 	 * on ASICs that do not support context-save.
4604 	 * EOP writes/reads can start anywhere in the ring.
4605 	 */
4606 	if (adev->asic_type != CHIP_TONGA) {
4607 		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4608 		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4609 		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4610 	}
4611 
4612 	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4613 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4614 
4615 	/* activate the HQD */
4616 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4617 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4618 
4619 	return 0;
4620 }
4621 
4622 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4623 {
4624 	struct amdgpu_device *adev = ring->adev;
4625 	struct vi_mqd *mqd = ring->mqd_ptr;
4626 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4627 
4628 	gfx_v8_0_kiq_setting(ring);
4629 
4630 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4631 		/* reset MQD to a clean status */
4632 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4633 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4634 
4635 		/* reset ring buffer */
4636 		ring->wptr = 0;
4637 		amdgpu_ring_clear_ring(ring);
4638 		mutex_lock(&adev->srbm_mutex);
4639 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4640 		gfx_v8_0_mqd_commit(adev, mqd);
4641 		vi_srbm_select(adev, 0, 0, 0, 0);
4642 		mutex_unlock(&adev->srbm_mutex);
4643 	} else {
4644 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4645 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4646 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4647 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4648 			amdgpu_ring_clear_ring(ring);
4649 		mutex_lock(&adev->srbm_mutex);
4650 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4651 		gfx_v8_0_mqd_init(ring);
4652 		gfx_v8_0_mqd_commit(adev, mqd);
4653 		vi_srbm_select(adev, 0, 0, 0, 0);
4654 		mutex_unlock(&adev->srbm_mutex);
4655 
4656 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4657 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4658 	}
4659 
4660 	return 0;
4661 }
4662 
4663 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4664 {
4665 	struct amdgpu_device *adev = ring->adev;
4666 	struct vi_mqd *mqd = ring->mqd_ptr;
4667 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4668 
4669 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4670 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4671 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4672 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4673 		mutex_lock(&adev->srbm_mutex);
4674 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4675 		gfx_v8_0_mqd_init(ring);
4676 		vi_srbm_select(adev, 0, 0, 0, 0);
4677 		mutex_unlock(&adev->srbm_mutex);
4678 
4679 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4680 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4681 	} else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4682 		/* reset MQD to a clean status */
4683 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4684 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4685 		/* reset ring buffer */
4686 		ring->wptr = 0;
4687 		amdgpu_ring_clear_ring(ring);
4688 	} else {
4689 		amdgpu_ring_clear_ring(ring);
4690 	}
4691 	return 0;
4692 }
4693 
4694 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4695 {
4696 	if (adev->asic_type > CHIP_TONGA) {
4697 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4698 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4699 	}
4700 	/* enable doorbells */
4701 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4702 }
4703 
4704 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4705 {
4706 	struct amdgpu_ring *ring;
4707 	int r;
4708 
4709 	ring = &adev->gfx.kiq.ring;
4710 
4711 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4712 	if (unlikely(r != 0))
4713 		return r;
4714 
4715 	r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4716 	if (unlikely(r != 0))
4717 		return r;
4718 
4719 	gfx_v8_0_kiq_init_queue(ring);
4720 	amdgpu_bo_kunmap(ring->mqd_obj);
4721 	ring->mqd_ptr = NULL;
4722 	amdgpu_bo_unreserve(ring->mqd_obj);
4723 	ring->sched.ready = true;
4724 	return 0;
4725 }
4726 
4727 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4728 {
4729 	struct amdgpu_ring *ring = NULL;
4730 	int r = 0, i;
4731 
4732 	gfx_v8_0_cp_compute_enable(adev, true);
4733 
4734 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4735 		ring = &adev->gfx.compute_ring[i];
4736 
4737 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4738 		if (unlikely(r != 0))
4739 			goto done;
4740 		r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4741 		if (!r) {
4742 			r = gfx_v8_0_kcq_init_queue(ring);
4743 			amdgpu_bo_kunmap(ring->mqd_obj);
4744 			ring->mqd_ptr = NULL;
4745 		}
4746 		amdgpu_bo_unreserve(ring->mqd_obj);
4747 		if (r)
4748 			goto done;
4749 	}
4750 
4751 	gfx_v8_0_set_mec_doorbell_range(adev);
4752 
4753 	r = gfx_v8_0_kiq_kcq_enable(adev);
4754 	if (r)
4755 		goto done;
4756 
4757 done:
4758 	return r;
4759 }
4760 
4761 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4762 {
4763 	int r, i;
4764 	struct amdgpu_ring *ring;
4765 
4766 	/* collect all the ring_tests here, gfx, kiq, compute */
4767 	ring = &adev->gfx.gfx_ring[0];
4768 	r = amdgpu_ring_test_helper(ring);
4769 	if (r)
4770 		return r;
4771 
4772 	ring = &adev->gfx.kiq.ring;
4773 	r = amdgpu_ring_test_helper(ring);
4774 	if (r)
4775 		return r;
4776 
4777 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4778 		ring = &adev->gfx.compute_ring[i];
4779 		amdgpu_ring_test_helper(ring);
4780 	}
4781 
4782 	return 0;
4783 }
4784 
4785 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4786 {
4787 	int r;
4788 
4789 	if (!(adev->flags & AMD_IS_APU))
4790 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4791 
4792 	r = gfx_v8_0_kiq_resume(adev);
4793 	if (r)
4794 		return r;
4795 
4796 	r = gfx_v8_0_cp_gfx_resume(adev);
4797 	if (r)
4798 		return r;
4799 
4800 	r = gfx_v8_0_kcq_resume(adev);
4801 	if (r)
4802 		return r;
4803 
4804 	r = gfx_v8_0_cp_test_all_rings(adev);
4805 	if (r)
4806 		return r;
4807 
4808 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4809 
4810 	return 0;
4811 }
4812 
4813 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4814 {
4815 	gfx_v8_0_cp_gfx_enable(adev, enable);
4816 	gfx_v8_0_cp_compute_enable(adev, enable);
4817 }
4818 
4819 static int gfx_v8_0_hw_init(void *handle)
4820 {
4821 	int r;
4822 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4823 
4824 	gfx_v8_0_init_golden_registers(adev);
4825 	gfx_v8_0_constants_init(adev);
4826 
4827 	r = adev->gfx.rlc.funcs->resume(adev);
4828 	if (r)
4829 		return r;
4830 
4831 	r = gfx_v8_0_cp_resume(adev);
4832 
4833 	return r;
4834 }
4835 
4836 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4837 {
4838 	int r, i;
4839 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4840 
4841 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4842 	if (r)
4843 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4844 
4845 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4846 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4847 
4848 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4849 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4850 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4851 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4852 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4853 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4854 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4855 		amdgpu_ring_write(kiq_ring, 0);
4856 		amdgpu_ring_write(kiq_ring, 0);
4857 		amdgpu_ring_write(kiq_ring, 0);
4858 	}
4859 	r = amdgpu_ring_test_helper(kiq_ring);
4860 	if (r)
4861 		DRM_ERROR("KCQ disable failed\n");
4862 
4863 	return r;
4864 }
4865 
4866 static bool gfx_v8_0_is_idle(void *handle)
4867 {
4868 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4869 
4870 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4871 		|| RREG32(mmGRBM_STATUS2) != 0x8)
4872 		return false;
4873 	else
4874 		return true;
4875 }
4876 
4877 static bool gfx_v8_0_rlc_is_idle(void *handle)
4878 {
4879 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4880 
4881 	if (RREG32(mmGRBM_STATUS2) != 0x8)
4882 		return false;
4883 	else
4884 		return true;
4885 }
4886 
4887 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4888 {
4889 	unsigned int i;
4890 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4891 
4892 	for (i = 0; i < adev->usec_timeout; i++) {
4893 		if (gfx_v8_0_rlc_is_idle(handle))
4894 			return 0;
4895 
4896 		udelay(1);
4897 	}
4898 	return -ETIMEDOUT;
4899 }
4900 
4901 static int gfx_v8_0_wait_for_idle(void *handle)
4902 {
4903 	unsigned int i;
4904 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4905 
4906 	for (i = 0; i < adev->usec_timeout; i++) {
4907 		if (gfx_v8_0_is_idle(handle))
4908 			return 0;
4909 
4910 		udelay(1);
4911 	}
4912 	return -ETIMEDOUT;
4913 }
4914 
4915 static int gfx_v8_0_hw_fini(void *handle)
4916 {
4917 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4918 
4919 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4920 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4921 
4922 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4923 
4924 	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4925 
4926 	/* disable KCQ to avoid CPC touch memory not valid anymore */
4927 	gfx_v8_0_kcq_disable(adev);
4928 
4929 	if (amdgpu_sriov_vf(adev)) {
4930 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4931 		return 0;
4932 	}
4933 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4934 	if (!gfx_v8_0_wait_for_idle(adev))
4935 		gfx_v8_0_cp_enable(adev, false);
4936 	else
4937 		pr_err("cp is busy, skip halt cp\n");
4938 	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4939 		adev->gfx.rlc.funcs->stop(adev);
4940 	else
4941 		pr_err("rlc is busy, skip halt rlc\n");
4942 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4943 
4944 	return 0;
4945 }
4946 
4947 static int gfx_v8_0_suspend(void *handle)
4948 {
4949 	return gfx_v8_0_hw_fini(handle);
4950 }
4951 
4952 static int gfx_v8_0_resume(void *handle)
4953 {
4954 	return gfx_v8_0_hw_init(handle);
4955 }
4956 
4957 static bool gfx_v8_0_check_soft_reset(void *handle)
4958 {
4959 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4960 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4961 	u32 tmp;
4962 
4963 	/* GRBM_STATUS */
4964 	tmp = RREG32(mmGRBM_STATUS);
4965 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4966 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4967 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4968 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4969 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4970 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4971 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4972 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4973 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4974 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4975 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4976 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4977 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4978 	}
4979 
4980 	/* GRBM_STATUS2 */
4981 	tmp = RREG32(mmGRBM_STATUS2);
4982 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4983 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4984 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4985 
4986 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4987 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4988 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4989 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4990 						SOFT_RESET_CPF, 1);
4991 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4992 						SOFT_RESET_CPC, 1);
4993 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4994 						SOFT_RESET_CPG, 1);
4995 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4996 						SOFT_RESET_GRBM, 1);
4997 	}
4998 
4999 	/* SRBM_STATUS */
5000 	tmp = RREG32(mmSRBM_STATUS);
5001 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5002 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5003 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5004 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5005 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5006 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5007 
5008 	if (grbm_soft_reset || srbm_soft_reset) {
5009 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5010 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5011 		return true;
5012 	} else {
5013 		adev->gfx.grbm_soft_reset = 0;
5014 		adev->gfx.srbm_soft_reset = 0;
5015 		return false;
5016 	}
5017 }
5018 
5019 static int gfx_v8_0_pre_soft_reset(void *handle)
5020 {
5021 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022 	u32 grbm_soft_reset = 0;
5023 
5024 	if ((!adev->gfx.grbm_soft_reset) &&
5025 	    (!adev->gfx.srbm_soft_reset))
5026 		return 0;
5027 
5028 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5029 
5030 	/* stop the rlc */
5031 	adev->gfx.rlc.funcs->stop(adev);
5032 
5033 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5034 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5035 		/* Disable GFX parsing/prefetching */
5036 		gfx_v8_0_cp_gfx_enable(adev, false);
5037 
5038 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5039 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5040 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5041 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5042 		int i;
5043 
5044 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5046 
5047 			mutex_lock(&adev->srbm_mutex);
5048 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5049 			gfx_v8_0_deactivate_hqd(adev, 2);
5050 			vi_srbm_select(adev, 0, 0, 0, 0);
5051 			mutex_unlock(&adev->srbm_mutex);
5052 		}
5053 		/* Disable MEC parsing/prefetching */
5054 		gfx_v8_0_cp_compute_enable(adev, false);
5055 	}
5056 
5057 	return 0;
5058 }
5059 
5060 static int gfx_v8_0_soft_reset(void *handle)
5061 {
5062 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5063 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5064 	u32 tmp;
5065 
5066 	if ((!adev->gfx.grbm_soft_reset) &&
5067 	    (!adev->gfx.srbm_soft_reset))
5068 		return 0;
5069 
5070 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5071 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5072 
5073 	if (grbm_soft_reset || srbm_soft_reset) {
5074 		tmp = RREG32(mmGMCON_DEBUG);
5075 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5076 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5077 		WREG32(mmGMCON_DEBUG, tmp);
5078 		udelay(50);
5079 	}
5080 
5081 	if (grbm_soft_reset) {
5082 		tmp = RREG32(mmGRBM_SOFT_RESET);
5083 		tmp |= grbm_soft_reset;
5084 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5085 		WREG32(mmGRBM_SOFT_RESET, tmp);
5086 		tmp = RREG32(mmGRBM_SOFT_RESET);
5087 
5088 		udelay(50);
5089 
5090 		tmp &= ~grbm_soft_reset;
5091 		WREG32(mmGRBM_SOFT_RESET, tmp);
5092 		tmp = RREG32(mmGRBM_SOFT_RESET);
5093 	}
5094 
5095 	if (srbm_soft_reset) {
5096 		tmp = RREG32(mmSRBM_SOFT_RESET);
5097 		tmp |= srbm_soft_reset;
5098 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5099 		WREG32(mmSRBM_SOFT_RESET, tmp);
5100 		tmp = RREG32(mmSRBM_SOFT_RESET);
5101 
5102 		udelay(50);
5103 
5104 		tmp &= ~srbm_soft_reset;
5105 		WREG32(mmSRBM_SOFT_RESET, tmp);
5106 		tmp = RREG32(mmSRBM_SOFT_RESET);
5107 	}
5108 
5109 	if (grbm_soft_reset || srbm_soft_reset) {
5110 		tmp = RREG32(mmGMCON_DEBUG);
5111 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5112 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5113 		WREG32(mmGMCON_DEBUG, tmp);
5114 	}
5115 
5116 	/* Wait a little for things to settle down */
5117 	udelay(50);
5118 
5119 	return 0;
5120 }
5121 
5122 static int gfx_v8_0_post_soft_reset(void *handle)
5123 {
5124 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5125 	u32 grbm_soft_reset = 0;
5126 
5127 	if ((!adev->gfx.grbm_soft_reset) &&
5128 	    (!adev->gfx.srbm_soft_reset))
5129 		return 0;
5130 
5131 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5132 
5133 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5134 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5135 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5136 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5137 		int i;
5138 
5139 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5140 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5141 
5142 			mutex_lock(&adev->srbm_mutex);
5143 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5144 			gfx_v8_0_deactivate_hqd(adev, 2);
5145 			vi_srbm_select(adev, 0, 0, 0, 0);
5146 			mutex_unlock(&adev->srbm_mutex);
5147 		}
5148 		gfx_v8_0_kiq_resume(adev);
5149 		gfx_v8_0_kcq_resume(adev);
5150 	}
5151 
5152 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5153 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5154 		gfx_v8_0_cp_gfx_resume(adev);
5155 
5156 	gfx_v8_0_cp_test_all_rings(adev);
5157 
5158 	adev->gfx.rlc.funcs->start(adev);
5159 
5160 	return 0;
5161 }
5162 
5163 /**
5164  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5165  *
5166  * @adev: amdgpu_device pointer
5167  *
5168  * Fetches a GPU clock counter snapshot.
5169  * Returns the 64 bit clock counter snapshot.
5170  */
5171 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5172 {
5173 	uint64_t clock;
5174 
5175 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5176 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5177 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5178 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5179 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5180 	return clock;
5181 }
5182 
5183 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5184 					  uint32_t vmid,
5185 					  uint32_t gds_base, uint32_t gds_size,
5186 					  uint32_t gws_base, uint32_t gws_size,
5187 					  uint32_t oa_base, uint32_t oa_size)
5188 {
5189 	/* GDS Base */
5190 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5191 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5192 				WRITE_DATA_DST_SEL(0)));
5193 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5194 	amdgpu_ring_write(ring, 0);
5195 	amdgpu_ring_write(ring, gds_base);
5196 
5197 	/* GDS Size */
5198 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200 				WRITE_DATA_DST_SEL(0)));
5201 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5202 	amdgpu_ring_write(ring, 0);
5203 	amdgpu_ring_write(ring, gds_size);
5204 
5205 	/* GWS */
5206 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5207 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5208 				WRITE_DATA_DST_SEL(0)));
5209 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5210 	amdgpu_ring_write(ring, 0);
5211 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5212 
5213 	/* OA */
5214 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5215 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5216 				WRITE_DATA_DST_SEL(0)));
5217 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5218 	amdgpu_ring_write(ring, 0);
5219 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5220 }
5221 
5222 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5223 {
5224 	WREG32(mmSQ_IND_INDEX,
5225 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5226 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5227 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5228 		(SQ_IND_INDEX__FORCE_READ_MASK));
5229 	return RREG32(mmSQ_IND_DATA);
5230 }
5231 
5232 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5233 			   uint32_t wave, uint32_t thread,
5234 			   uint32_t regno, uint32_t num, uint32_t *out)
5235 {
5236 	WREG32(mmSQ_IND_INDEX,
5237 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5238 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5239 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5240 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5241 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5242 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5243 	while (num--)
5244 		*(out++) = RREG32(mmSQ_IND_DATA);
5245 }
5246 
5247 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5248 {
5249 	/* type 0 wave data */
5250 	dst[(*no_fields)++] = 0;
5251 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5252 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5253 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5254 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5255 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5256 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5257 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5258 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5259 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5260 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5261 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5262 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5263 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5264 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5265 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5266 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5267 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5268 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5269 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5270 }
5271 
5272 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5273 				     uint32_t wave, uint32_t start,
5274 				     uint32_t size, uint32_t *dst)
5275 {
5276 	wave_read_regs(
5277 		adev, simd, wave, 0,
5278 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5279 }
5280 
5281 
5282 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5283 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5284 	.select_se_sh = &gfx_v8_0_select_se_sh,
5285 	.read_wave_data = &gfx_v8_0_read_wave_data,
5286 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5287 	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5288 };
5289 
5290 static int gfx_v8_0_early_init(void *handle)
5291 {
5292 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5293 
5294 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5295 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5296 					  AMDGPU_MAX_COMPUTE_RINGS);
5297 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5298 	gfx_v8_0_set_ring_funcs(adev);
5299 	gfx_v8_0_set_irq_funcs(adev);
5300 	gfx_v8_0_set_gds_init(adev);
5301 	gfx_v8_0_set_rlc_funcs(adev);
5302 
5303 	return 0;
5304 }
5305 
5306 static int gfx_v8_0_late_init(void *handle)
5307 {
5308 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5309 	int r;
5310 
5311 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5312 	if (r)
5313 		return r;
5314 
5315 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5316 	if (r)
5317 		return r;
5318 
5319 	/* requires IBs so do in late init after IB pool is initialized */
5320 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5321 	if (r)
5322 		return r;
5323 
5324 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5325 	if (r) {
5326 		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5327 		return r;
5328 	}
5329 
5330 	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5331 	if (r) {
5332 		DRM_ERROR(
5333 			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5334 			r);
5335 		return r;
5336 	}
5337 
5338 	return 0;
5339 }
5340 
5341 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5342 						       bool enable)
5343 {
5344 	if ((adev->asic_type == CHIP_POLARIS11) ||
5345 	    (adev->asic_type == CHIP_POLARIS12) ||
5346 	    (adev->asic_type == CHIP_VEGAM))
5347 		/* Send msg to SMU via Powerplay */
5348 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5349 
5350 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5351 }
5352 
5353 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5354 							bool enable)
5355 {
5356 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5357 }
5358 
5359 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5360 		bool enable)
5361 {
5362 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5363 }
5364 
5365 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5366 					  bool enable)
5367 {
5368 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5369 }
5370 
5371 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5372 						bool enable)
5373 {
5374 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5375 
5376 	/* Read any GFX register to wake up GFX. */
5377 	if (!enable)
5378 		RREG32(mmDB_RENDER_CONTROL);
5379 }
5380 
5381 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5382 					  bool enable)
5383 {
5384 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5385 		cz_enable_gfx_cg_power_gating(adev, true);
5386 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5387 			cz_enable_gfx_pipeline_power_gating(adev, true);
5388 	} else {
5389 		cz_enable_gfx_cg_power_gating(adev, false);
5390 		cz_enable_gfx_pipeline_power_gating(adev, false);
5391 	}
5392 }
5393 
5394 static int gfx_v8_0_set_powergating_state(void *handle,
5395 					  enum amd_powergating_state state)
5396 {
5397 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5398 	bool enable = (state == AMD_PG_STATE_GATE);
5399 
5400 	if (amdgpu_sriov_vf(adev))
5401 		return 0;
5402 
5403 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5404 				AMD_PG_SUPPORT_RLC_SMU_HS |
5405 				AMD_PG_SUPPORT_CP |
5406 				AMD_PG_SUPPORT_GFX_DMG))
5407 		amdgpu_gfx_rlc_enter_safe_mode(adev);
5408 	switch (adev->asic_type) {
5409 	case CHIP_CARRIZO:
5410 	case CHIP_STONEY:
5411 
5412 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5413 			cz_enable_sck_slow_down_on_power_up(adev, true);
5414 			cz_enable_sck_slow_down_on_power_down(adev, true);
5415 		} else {
5416 			cz_enable_sck_slow_down_on_power_up(adev, false);
5417 			cz_enable_sck_slow_down_on_power_down(adev, false);
5418 		}
5419 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5420 			cz_enable_cp_power_gating(adev, true);
5421 		else
5422 			cz_enable_cp_power_gating(adev, false);
5423 
5424 		cz_update_gfx_cg_power_gating(adev, enable);
5425 
5426 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5427 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5428 		else
5429 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5430 
5431 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5432 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5433 		else
5434 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5435 		break;
5436 	case CHIP_POLARIS11:
5437 	case CHIP_POLARIS12:
5438 	case CHIP_VEGAM:
5439 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5440 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5441 		else
5442 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5443 
5444 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5445 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5446 		else
5447 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5448 
5449 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5450 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5451 		else
5452 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5453 		break;
5454 	default:
5455 		break;
5456 	}
5457 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5458 				AMD_PG_SUPPORT_RLC_SMU_HS |
5459 				AMD_PG_SUPPORT_CP |
5460 				AMD_PG_SUPPORT_GFX_DMG))
5461 		amdgpu_gfx_rlc_exit_safe_mode(adev);
5462 	return 0;
5463 }
5464 
5465 static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
5466 {
5467 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5468 	int data;
5469 
5470 	if (amdgpu_sriov_vf(adev))
5471 		*flags = 0;
5472 
5473 	/* AMD_CG_SUPPORT_GFX_MGCG */
5474 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5475 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5476 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5477 
5478 	/* AMD_CG_SUPPORT_GFX_CGLG */
5479 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5480 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5481 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5482 
5483 	/* AMD_CG_SUPPORT_GFX_CGLS */
5484 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5485 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5486 
5487 	/* AMD_CG_SUPPORT_GFX_CGTS */
5488 	data = RREG32(mmCGTS_SM_CTRL_REG);
5489 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5490 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5491 
5492 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5493 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5494 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5495 
5496 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5497 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5498 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5499 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5500 
5501 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5502 	data = RREG32(mmCP_MEM_SLP_CNTL);
5503 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5504 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5505 }
5506 
5507 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5508 				     uint32_t reg_addr, uint32_t cmd)
5509 {
5510 	uint32_t data;
5511 
5512 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5513 
5514 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5515 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5516 
5517 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5518 	if (adev->asic_type == CHIP_STONEY)
5519 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5520 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5521 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5522 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5523 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5524 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5525 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5526 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5527 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5528 	else
5529 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5530 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5531 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5532 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5533 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5534 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5535 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5536 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5537 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5538 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5539 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5540 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5541 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5542 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5543 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5544 
5545 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5546 }
5547 
5548 #define MSG_ENTER_RLC_SAFE_MODE     1
5549 #define MSG_EXIT_RLC_SAFE_MODE      0
5550 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5551 #define RLC_GPR_REG2__REQ__SHIFT 0
5552 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5553 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5554 
5555 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5556 {
5557 	uint32_t rlc_setting;
5558 
5559 	rlc_setting = RREG32(mmRLC_CNTL);
5560 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5561 		return false;
5562 
5563 	return true;
5564 }
5565 
5566 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5567 {
5568 	uint32_t data;
5569 	unsigned i;
5570 	data = RREG32(mmRLC_CNTL);
5571 	data |= RLC_SAFE_MODE__CMD_MASK;
5572 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5573 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5574 	WREG32(mmRLC_SAFE_MODE, data);
5575 
5576 	/* wait for RLC_SAFE_MODE */
5577 	for (i = 0; i < adev->usec_timeout; i++) {
5578 		if ((RREG32(mmRLC_GPM_STAT) &
5579 		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5580 		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5581 		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5582 		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5583 			break;
5584 		udelay(1);
5585 	}
5586 	for (i = 0; i < adev->usec_timeout; i++) {
5587 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5588 			break;
5589 		udelay(1);
5590 	}
5591 }
5592 
5593 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5594 {
5595 	uint32_t data;
5596 	unsigned i;
5597 
5598 	data = RREG32(mmRLC_CNTL);
5599 	data |= RLC_SAFE_MODE__CMD_MASK;
5600 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5601 	WREG32(mmRLC_SAFE_MODE, data);
5602 
5603 	for (i = 0; i < adev->usec_timeout; i++) {
5604 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5605 			break;
5606 		udelay(1);
5607 	}
5608 }
5609 
5610 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5611 {
5612 	u32 data;
5613 
5614 	amdgpu_gfx_off_ctrl(adev, false);
5615 
5616 	if (amdgpu_sriov_is_pp_one_vf(adev))
5617 		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5618 	else
5619 		data = RREG32(mmRLC_SPM_VMID);
5620 
5621 	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5622 	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5623 
5624 	if (amdgpu_sriov_is_pp_one_vf(adev))
5625 		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5626 	else
5627 		WREG32(mmRLC_SPM_VMID, data);
5628 
5629 	amdgpu_gfx_off_ctrl(adev, true);
5630 }
5631 
5632 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5633 	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5634 	.set_safe_mode = gfx_v8_0_set_safe_mode,
5635 	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5636 	.init = gfx_v8_0_rlc_init,
5637 	.get_csb_size = gfx_v8_0_get_csb_size,
5638 	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5639 	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5640 	.resume = gfx_v8_0_rlc_resume,
5641 	.stop = gfx_v8_0_rlc_stop,
5642 	.reset = gfx_v8_0_rlc_reset,
5643 	.start = gfx_v8_0_rlc_start,
5644 	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5645 };
5646 
5647 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5648 						      bool enable)
5649 {
5650 	uint32_t temp, data;
5651 
5652 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5653 
5654 	/* It is disabled by HW by default */
5655 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5656 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5657 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5658 				/* 1 - RLC memory Light sleep */
5659 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5660 
5661 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5662 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5663 		}
5664 
5665 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5666 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5667 		if (adev->flags & AMD_IS_APU)
5668 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5669 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5670 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5671 		else
5672 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5673 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5674 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5675 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5676 
5677 		if (temp != data)
5678 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5679 
5680 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5681 		gfx_v8_0_wait_for_rlc_serdes(adev);
5682 
5683 		/* 5 - clear mgcg override */
5684 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5685 
5686 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5687 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5688 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5689 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5690 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5691 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5692 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5693 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5694 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5695 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5696 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5697 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5698 			if (temp != data)
5699 				WREG32(mmCGTS_SM_CTRL_REG, data);
5700 		}
5701 		udelay(50);
5702 
5703 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704 		gfx_v8_0_wait_for_rlc_serdes(adev);
5705 	} else {
5706 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5707 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5708 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5709 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5710 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5711 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5712 		if (temp != data)
5713 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5714 
5715 		/* 2 - disable MGLS in RLC */
5716 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5717 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5718 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5719 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5720 		}
5721 
5722 		/* 3 - disable MGLS in CP */
5723 		data = RREG32(mmCP_MEM_SLP_CNTL);
5724 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5725 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5726 			WREG32(mmCP_MEM_SLP_CNTL, data);
5727 		}
5728 
5729 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5730 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5731 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5732 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5733 		if (temp != data)
5734 			WREG32(mmCGTS_SM_CTRL_REG, data);
5735 
5736 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5737 		gfx_v8_0_wait_for_rlc_serdes(adev);
5738 
5739 		/* 6 - set mgcg override */
5740 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5741 
5742 		udelay(50);
5743 
5744 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745 		gfx_v8_0_wait_for_rlc_serdes(adev);
5746 	}
5747 
5748 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5749 }
5750 
5751 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5752 						      bool enable)
5753 {
5754 	uint32_t temp, temp1, data, data1;
5755 
5756 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5757 
5758 	amdgpu_gfx_rlc_enter_safe_mode(adev);
5759 
5760 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5761 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5762 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5763 		if (temp1 != data1)
5764 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5765 
5766 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5767 		gfx_v8_0_wait_for_rlc_serdes(adev);
5768 
5769 		/* 2 - clear cgcg override */
5770 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5771 
5772 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5773 		gfx_v8_0_wait_for_rlc_serdes(adev);
5774 
5775 		/* 3 - write cmd to set CGLS */
5776 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5777 
5778 		/* 4 - enable cgcg */
5779 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5780 
5781 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5782 			/* enable cgls*/
5783 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5784 
5785 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5786 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5787 
5788 			if (temp1 != data1)
5789 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5790 		} else {
5791 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5792 		}
5793 
5794 		if (temp != data)
5795 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5796 
5797 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5798 		 * Cmp_busy/GFX_Idle interrupts
5799 		 */
5800 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5801 	} else {
5802 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5803 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5804 
5805 		/* TEST CGCG */
5806 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5807 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5808 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5809 		if (temp1 != data1)
5810 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5811 
5812 		/* read gfx register to wake up cgcg */
5813 		RREG32(mmCB_CGTT_SCLK_CTRL);
5814 		RREG32(mmCB_CGTT_SCLK_CTRL);
5815 		RREG32(mmCB_CGTT_SCLK_CTRL);
5816 		RREG32(mmCB_CGTT_SCLK_CTRL);
5817 
5818 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5819 		gfx_v8_0_wait_for_rlc_serdes(adev);
5820 
5821 		/* write cmd to Set CGCG Override */
5822 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5823 
5824 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5825 		gfx_v8_0_wait_for_rlc_serdes(adev);
5826 
5827 		/* write cmd to Clear CGLS */
5828 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5829 
5830 		/* disable cgcg, cgls should be disabled too. */
5831 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5832 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5833 		if (temp != data)
5834 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5835 		/* enable interrupts again for PG */
5836 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5837 	}
5838 
5839 	gfx_v8_0_wait_for_rlc_serdes(adev);
5840 
5841 	amdgpu_gfx_rlc_exit_safe_mode(adev);
5842 }
5843 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5844 					    bool enable)
5845 {
5846 	if (enable) {
5847 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5848 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5849 		 */
5850 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5851 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5852 	} else {
5853 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5854 		 * ===  CGCG + CGLS ===
5855 		 */
5856 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5857 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5858 	}
5859 	return 0;
5860 }
5861 
5862 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5863 					  enum amd_clockgating_state state)
5864 {
5865 	uint32_t msg_id, pp_state = 0;
5866 	uint32_t pp_support_state = 0;
5867 
5868 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5869 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5870 			pp_support_state = PP_STATE_SUPPORT_LS;
5871 			pp_state = PP_STATE_LS;
5872 		}
5873 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5874 			pp_support_state |= PP_STATE_SUPPORT_CG;
5875 			pp_state |= PP_STATE_CG;
5876 		}
5877 		if (state == AMD_CG_STATE_UNGATE)
5878 			pp_state = 0;
5879 
5880 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5881 				PP_BLOCK_GFX_CG,
5882 				pp_support_state,
5883 				pp_state);
5884 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5885 	}
5886 
5887 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5888 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5889 			pp_support_state = PP_STATE_SUPPORT_LS;
5890 			pp_state = PP_STATE_LS;
5891 		}
5892 
5893 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5894 			pp_support_state |= PP_STATE_SUPPORT_CG;
5895 			pp_state |= PP_STATE_CG;
5896 		}
5897 
5898 		if (state == AMD_CG_STATE_UNGATE)
5899 			pp_state = 0;
5900 
5901 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5902 				PP_BLOCK_GFX_MG,
5903 				pp_support_state,
5904 				pp_state);
5905 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5906 	}
5907 
5908 	return 0;
5909 }
5910 
5911 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5912 					  enum amd_clockgating_state state)
5913 {
5914 
5915 	uint32_t msg_id, pp_state = 0;
5916 	uint32_t pp_support_state = 0;
5917 
5918 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5919 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920 			pp_support_state = PP_STATE_SUPPORT_LS;
5921 			pp_state = PP_STATE_LS;
5922 		}
5923 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5924 			pp_support_state |= PP_STATE_SUPPORT_CG;
5925 			pp_state |= PP_STATE_CG;
5926 		}
5927 		if (state == AMD_CG_STATE_UNGATE)
5928 			pp_state = 0;
5929 
5930 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5931 				PP_BLOCK_GFX_CG,
5932 				pp_support_state,
5933 				pp_state);
5934 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5935 	}
5936 
5937 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5938 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5939 			pp_support_state = PP_STATE_SUPPORT_LS;
5940 			pp_state = PP_STATE_LS;
5941 		}
5942 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5943 			pp_support_state |= PP_STATE_SUPPORT_CG;
5944 			pp_state |= PP_STATE_CG;
5945 		}
5946 		if (state == AMD_CG_STATE_UNGATE)
5947 			pp_state = 0;
5948 
5949 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5950 				PP_BLOCK_GFX_3D,
5951 				pp_support_state,
5952 				pp_state);
5953 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5954 	}
5955 
5956 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5957 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5958 			pp_support_state = PP_STATE_SUPPORT_LS;
5959 			pp_state = PP_STATE_LS;
5960 		}
5961 
5962 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5963 			pp_support_state |= PP_STATE_SUPPORT_CG;
5964 			pp_state |= PP_STATE_CG;
5965 		}
5966 
5967 		if (state == AMD_CG_STATE_UNGATE)
5968 			pp_state = 0;
5969 
5970 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5971 				PP_BLOCK_GFX_MG,
5972 				pp_support_state,
5973 				pp_state);
5974 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5975 	}
5976 
5977 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5978 		pp_support_state = PP_STATE_SUPPORT_LS;
5979 
5980 		if (state == AMD_CG_STATE_UNGATE)
5981 			pp_state = 0;
5982 		else
5983 			pp_state = PP_STATE_LS;
5984 
5985 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986 				PP_BLOCK_GFX_RLC,
5987 				pp_support_state,
5988 				pp_state);
5989 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5990 	}
5991 
5992 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5993 		pp_support_state = PP_STATE_SUPPORT_LS;
5994 
5995 		if (state == AMD_CG_STATE_UNGATE)
5996 			pp_state = 0;
5997 		else
5998 			pp_state = PP_STATE_LS;
5999 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6000 			PP_BLOCK_GFX_CP,
6001 			pp_support_state,
6002 			pp_state);
6003 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6004 	}
6005 
6006 	return 0;
6007 }
6008 
6009 static int gfx_v8_0_set_clockgating_state(void *handle,
6010 					  enum amd_clockgating_state state)
6011 {
6012 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6013 
6014 	if (amdgpu_sriov_vf(adev))
6015 		return 0;
6016 
6017 	switch (adev->asic_type) {
6018 	case CHIP_FIJI:
6019 	case CHIP_CARRIZO:
6020 	case CHIP_STONEY:
6021 		gfx_v8_0_update_gfx_clock_gating(adev,
6022 						 state == AMD_CG_STATE_GATE);
6023 		break;
6024 	case CHIP_TONGA:
6025 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6026 		break;
6027 	case CHIP_POLARIS10:
6028 	case CHIP_POLARIS11:
6029 	case CHIP_POLARIS12:
6030 	case CHIP_VEGAM:
6031 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6032 		break;
6033 	default:
6034 		break;
6035 	}
6036 	return 0;
6037 }
6038 
6039 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6040 {
6041 	return *ring->rptr_cpu_addr;
6042 }
6043 
6044 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6045 {
6046 	struct amdgpu_device *adev = ring->adev;
6047 
6048 	if (ring->use_doorbell)
6049 		/* XXX check if swapping is necessary on BE */
6050 		return *ring->wptr_cpu_addr;
6051 	else
6052 		return RREG32(mmCP_RB0_WPTR);
6053 }
6054 
6055 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6056 {
6057 	struct amdgpu_device *adev = ring->adev;
6058 
6059 	if (ring->use_doorbell) {
6060 		/* XXX check if swapping is necessary on BE */
6061 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6062 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6063 	} else {
6064 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6065 		(void)RREG32(mmCP_RB0_WPTR);
6066 	}
6067 }
6068 
6069 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6070 {
6071 	u32 ref_and_mask, reg_mem_engine;
6072 
6073 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6074 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6075 		switch (ring->me) {
6076 		case 1:
6077 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6078 			break;
6079 		case 2:
6080 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6081 			break;
6082 		default:
6083 			return;
6084 		}
6085 		reg_mem_engine = 0;
6086 	} else {
6087 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6088 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6089 	}
6090 
6091 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6092 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6093 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6094 				 reg_mem_engine));
6095 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6096 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6097 	amdgpu_ring_write(ring, ref_and_mask);
6098 	amdgpu_ring_write(ring, ref_and_mask);
6099 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6100 }
6101 
6102 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6103 {
6104 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6105 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6106 		EVENT_INDEX(4));
6107 
6108 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6109 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6110 		EVENT_INDEX(0));
6111 }
6112 
6113 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6114 					struct amdgpu_job *job,
6115 					struct amdgpu_ib *ib,
6116 					uint32_t flags)
6117 {
6118 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6119 	u32 header, control = 0;
6120 
6121 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6122 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6123 	else
6124 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6125 
6126 	control |= ib->length_dw | (vmid << 24);
6127 
6128 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6129 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6130 
6131 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6132 			gfx_v8_0_ring_emit_de_meta(ring);
6133 	}
6134 
6135 	amdgpu_ring_write(ring, header);
6136 	amdgpu_ring_write(ring,
6137 #ifdef __BIG_ENDIAN
6138 			  (2 << 0) |
6139 #endif
6140 			  (ib->gpu_addr & 0xFFFFFFFC));
6141 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6142 	amdgpu_ring_write(ring, control);
6143 }
6144 
6145 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6146 					  struct amdgpu_job *job,
6147 					  struct amdgpu_ib *ib,
6148 					  uint32_t flags)
6149 {
6150 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6151 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6152 
6153 	/* Currently, there is a high possibility to get wave ID mismatch
6154 	 * between ME and GDS, leading to a hw deadlock, because ME generates
6155 	 * different wave IDs than the GDS expects. This situation happens
6156 	 * randomly when at least 5 compute pipes use GDS ordered append.
6157 	 * The wave IDs generated by ME are also wrong after suspend/resume.
6158 	 * Those are probably bugs somewhere else in the kernel driver.
6159 	 *
6160 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6161 	 * GDS to 0 for this ring (me/pipe).
6162 	 */
6163 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6164 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6165 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6166 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6167 	}
6168 
6169 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6170 	amdgpu_ring_write(ring,
6171 #ifdef __BIG_ENDIAN
6172 				(2 << 0) |
6173 #endif
6174 				(ib->gpu_addr & 0xFFFFFFFC));
6175 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6176 	amdgpu_ring_write(ring, control);
6177 }
6178 
6179 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6180 					 u64 seq, unsigned flags)
6181 {
6182 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6183 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6184 
6185 	/* Workaround for cache flush problems. First send a dummy EOP
6186 	 * event down the pipe with seq one below.
6187 	 */
6188 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6189 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6190 				 EOP_TC_ACTION_EN |
6191 				 EOP_TC_WB_ACTION_EN |
6192 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6193 				 EVENT_INDEX(5)));
6194 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6195 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6196 				DATA_SEL(1) | INT_SEL(0));
6197 	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6198 	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6199 
6200 	/* Then send the real EOP event down the pipe:
6201 	 * EVENT_WRITE_EOP - flush caches, send int */
6202 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6203 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6204 				 EOP_TC_ACTION_EN |
6205 				 EOP_TC_WB_ACTION_EN |
6206 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6207 				 EVENT_INDEX(5)));
6208 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6209 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6210 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6211 	amdgpu_ring_write(ring, lower_32_bits(seq));
6212 	amdgpu_ring_write(ring, upper_32_bits(seq));
6213 
6214 }
6215 
6216 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6217 {
6218 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6219 	uint32_t seq = ring->fence_drv.sync_seq;
6220 	uint64_t addr = ring->fence_drv.gpu_addr;
6221 
6222 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6223 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6224 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6225 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6226 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6227 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6228 	amdgpu_ring_write(ring, seq);
6229 	amdgpu_ring_write(ring, 0xffffffff);
6230 	amdgpu_ring_write(ring, 4); /* poll interval */
6231 }
6232 
6233 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6234 					unsigned vmid, uint64_t pd_addr)
6235 {
6236 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6237 
6238 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6239 
6240 	/* wait for the invalidate to complete */
6241 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6242 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6243 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6244 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6245 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6246 	amdgpu_ring_write(ring, 0);
6247 	amdgpu_ring_write(ring, 0); /* ref */
6248 	amdgpu_ring_write(ring, 0); /* mask */
6249 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6250 
6251 	/* compute doesn't have PFP */
6252 	if (usepfp) {
6253 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6254 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6255 		amdgpu_ring_write(ring, 0x0);
6256 	}
6257 }
6258 
6259 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6260 {
6261 	return *ring->wptr_cpu_addr;
6262 }
6263 
6264 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6265 {
6266 	struct amdgpu_device *adev = ring->adev;
6267 
6268 	/* XXX check if swapping is necessary on BE */
6269 	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6270 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6271 }
6272 
6273 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6274 					     u64 addr, u64 seq,
6275 					     unsigned flags)
6276 {
6277 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6278 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6279 
6280 	/* RELEASE_MEM - flush caches, send int */
6281 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6282 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6283 				 EOP_TC_ACTION_EN |
6284 				 EOP_TC_WB_ACTION_EN |
6285 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6286 				 EVENT_INDEX(5)));
6287 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6288 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6289 	amdgpu_ring_write(ring, upper_32_bits(addr));
6290 	amdgpu_ring_write(ring, lower_32_bits(seq));
6291 	amdgpu_ring_write(ring, upper_32_bits(seq));
6292 }
6293 
6294 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6295 					 u64 seq, unsigned int flags)
6296 {
6297 	/* we only allocate 32bit for each seq wb address */
6298 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6299 
6300 	/* write fence seq to the "addr" */
6301 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6302 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6303 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6304 	amdgpu_ring_write(ring, lower_32_bits(addr));
6305 	amdgpu_ring_write(ring, upper_32_bits(addr));
6306 	amdgpu_ring_write(ring, lower_32_bits(seq));
6307 
6308 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6309 		/* set register to trigger INT */
6310 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6311 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6312 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6313 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6314 		amdgpu_ring_write(ring, 0);
6315 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6316 	}
6317 }
6318 
6319 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6320 {
6321 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6322 	amdgpu_ring_write(ring, 0);
6323 }
6324 
6325 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6326 {
6327 	uint32_t dw2 = 0;
6328 
6329 	if (amdgpu_sriov_vf(ring->adev))
6330 		gfx_v8_0_ring_emit_ce_meta(ring);
6331 
6332 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6333 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6334 		gfx_v8_0_ring_emit_vgt_flush(ring);
6335 		/* set load_global_config & load_global_uconfig */
6336 		dw2 |= 0x8001;
6337 		/* set load_cs_sh_regs */
6338 		dw2 |= 0x01000000;
6339 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6340 		dw2 |= 0x10002;
6341 
6342 		/* set load_ce_ram if preamble presented */
6343 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6344 			dw2 |= 0x10000000;
6345 	} else {
6346 		/* still load_ce_ram if this is the first time preamble presented
6347 		 * although there is no context switch happens.
6348 		 */
6349 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6350 			dw2 |= 0x10000000;
6351 	}
6352 
6353 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6354 	amdgpu_ring_write(ring, dw2);
6355 	amdgpu_ring_write(ring, 0);
6356 }
6357 
6358 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6359 {
6360 	unsigned ret;
6361 
6362 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6363 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6364 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6365 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6366 	ret = ring->wptr & ring->buf_mask;
6367 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6368 	return ret;
6369 }
6370 
6371 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6372 {
6373 	unsigned cur;
6374 
6375 	BUG_ON(offset > ring->buf_mask);
6376 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
6377 
6378 	cur = (ring->wptr & ring->buf_mask) - 1;
6379 	if (likely(cur > offset))
6380 		ring->ring[offset] = cur - offset;
6381 	else
6382 		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6383 }
6384 
6385 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6386 				    uint32_t reg_val_offs)
6387 {
6388 	struct amdgpu_device *adev = ring->adev;
6389 
6390 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6391 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6392 				(5 << 8) |	/* dst: memory */
6393 				(1 << 20));	/* write confirm */
6394 	amdgpu_ring_write(ring, reg);
6395 	amdgpu_ring_write(ring, 0);
6396 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6397 				reg_val_offs * 4));
6398 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6399 				reg_val_offs * 4));
6400 }
6401 
6402 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6403 				  uint32_t val)
6404 {
6405 	uint32_t cmd;
6406 
6407 	switch (ring->funcs->type) {
6408 	case AMDGPU_RING_TYPE_GFX:
6409 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6410 		break;
6411 	case AMDGPU_RING_TYPE_KIQ:
6412 		cmd = 1 << 16; /* no inc addr */
6413 		break;
6414 	default:
6415 		cmd = WR_CONFIRM;
6416 		break;
6417 	}
6418 
6419 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6420 	amdgpu_ring_write(ring, cmd);
6421 	amdgpu_ring_write(ring, reg);
6422 	amdgpu_ring_write(ring, 0);
6423 	amdgpu_ring_write(ring, val);
6424 }
6425 
6426 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6427 {
6428 	struct amdgpu_device *adev = ring->adev;
6429 	uint32_t value = 0;
6430 
6431 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6432 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6433 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6434 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6435 	WREG32(mmSQ_CMD, value);
6436 }
6437 
6438 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6439 						 enum amdgpu_interrupt_state state)
6440 {
6441 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6442 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6443 }
6444 
6445 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6446 						     int me, int pipe,
6447 						     enum amdgpu_interrupt_state state)
6448 {
6449 	u32 mec_int_cntl, mec_int_cntl_reg;
6450 
6451 	/*
6452 	 * amdgpu controls only the first MEC. That's why this function only
6453 	 * handles the setting of interrupts for this specific MEC. All other
6454 	 * pipes' interrupts are set by amdkfd.
6455 	 */
6456 
6457 	if (me == 1) {
6458 		switch (pipe) {
6459 		case 0:
6460 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6461 			break;
6462 		case 1:
6463 			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6464 			break;
6465 		case 2:
6466 			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6467 			break;
6468 		case 3:
6469 			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6470 			break;
6471 		default:
6472 			DRM_DEBUG("invalid pipe %d\n", pipe);
6473 			return;
6474 		}
6475 	} else {
6476 		DRM_DEBUG("invalid me %d\n", me);
6477 		return;
6478 	}
6479 
6480 	switch (state) {
6481 	case AMDGPU_IRQ_STATE_DISABLE:
6482 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6483 		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6484 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6485 		break;
6486 	case AMDGPU_IRQ_STATE_ENABLE:
6487 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6488 		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6489 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6490 		break;
6491 	default:
6492 		break;
6493 	}
6494 }
6495 
6496 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6497 					     struct amdgpu_irq_src *source,
6498 					     unsigned type,
6499 					     enum amdgpu_interrupt_state state)
6500 {
6501 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6502 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6503 
6504 	return 0;
6505 }
6506 
6507 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6508 					      struct amdgpu_irq_src *source,
6509 					      unsigned type,
6510 					      enum amdgpu_interrupt_state state)
6511 {
6512 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6513 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6514 
6515 	return 0;
6516 }
6517 
6518 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6519 					    struct amdgpu_irq_src *src,
6520 					    unsigned type,
6521 					    enum amdgpu_interrupt_state state)
6522 {
6523 	switch (type) {
6524 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6525 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6526 		break;
6527 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6528 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6529 		break;
6530 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6531 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6532 		break;
6533 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6534 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6535 		break;
6536 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6537 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6538 		break;
6539 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6540 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6541 		break;
6542 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6543 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6544 		break;
6545 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6546 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6547 		break;
6548 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6549 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6550 		break;
6551 	default:
6552 		break;
6553 	}
6554 	return 0;
6555 }
6556 
6557 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6558 					 struct amdgpu_irq_src *source,
6559 					 unsigned int type,
6560 					 enum amdgpu_interrupt_state state)
6561 {
6562 	int enable_flag;
6563 
6564 	switch (state) {
6565 	case AMDGPU_IRQ_STATE_DISABLE:
6566 		enable_flag = 0;
6567 		break;
6568 
6569 	case AMDGPU_IRQ_STATE_ENABLE:
6570 		enable_flag = 1;
6571 		break;
6572 
6573 	default:
6574 		return -EINVAL;
6575 	}
6576 
6577 	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6578 	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6579 	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6580 	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6581 	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6582 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6583 		     enable_flag);
6584 	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6585 		     enable_flag);
6586 	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6587 		     enable_flag);
6588 	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6589 		     enable_flag);
6590 	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6591 		     enable_flag);
6592 	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6593 		     enable_flag);
6594 	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6595 		     enable_flag);
6596 	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6597 		     enable_flag);
6598 
6599 	return 0;
6600 }
6601 
6602 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6603 				     struct amdgpu_irq_src *source,
6604 				     unsigned int type,
6605 				     enum amdgpu_interrupt_state state)
6606 {
6607 	int enable_flag;
6608 
6609 	switch (state) {
6610 	case AMDGPU_IRQ_STATE_DISABLE:
6611 		enable_flag = 1;
6612 		break;
6613 
6614 	case AMDGPU_IRQ_STATE_ENABLE:
6615 		enable_flag = 0;
6616 		break;
6617 
6618 	default:
6619 		return -EINVAL;
6620 	}
6621 
6622 	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6623 		     enable_flag);
6624 
6625 	return 0;
6626 }
6627 
6628 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6629 			    struct amdgpu_irq_src *source,
6630 			    struct amdgpu_iv_entry *entry)
6631 {
6632 	int i;
6633 	u8 me_id, pipe_id, queue_id;
6634 	struct amdgpu_ring *ring;
6635 
6636 	DRM_DEBUG("IH: CP EOP\n");
6637 	me_id = (entry->ring_id & 0x0c) >> 2;
6638 	pipe_id = (entry->ring_id & 0x03) >> 0;
6639 	queue_id = (entry->ring_id & 0x70) >> 4;
6640 
6641 	switch (me_id) {
6642 	case 0:
6643 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6644 		break;
6645 	case 1:
6646 	case 2:
6647 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6648 			ring = &adev->gfx.compute_ring[i];
6649 			/* Per-queue interrupt is supported for MEC starting from VI.
6650 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6651 			  */
6652 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6653 				amdgpu_fence_process(ring);
6654 		}
6655 		break;
6656 	}
6657 	return 0;
6658 }
6659 
6660 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6661 			   struct amdgpu_iv_entry *entry)
6662 {
6663 	u8 me_id, pipe_id, queue_id;
6664 	struct amdgpu_ring *ring;
6665 	int i;
6666 
6667 	me_id = (entry->ring_id & 0x0c) >> 2;
6668 	pipe_id = (entry->ring_id & 0x03) >> 0;
6669 	queue_id = (entry->ring_id & 0x70) >> 4;
6670 
6671 	switch (me_id) {
6672 	case 0:
6673 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6674 		break;
6675 	case 1:
6676 	case 2:
6677 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6678 			ring = &adev->gfx.compute_ring[i];
6679 			if (ring->me == me_id && ring->pipe == pipe_id &&
6680 			    ring->queue == queue_id)
6681 				drm_sched_fault(&ring->sched);
6682 		}
6683 		break;
6684 	}
6685 }
6686 
6687 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6688 				 struct amdgpu_irq_src *source,
6689 				 struct amdgpu_iv_entry *entry)
6690 {
6691 	DRM_ERROR("Illegal register access in command stream\n");
6692 	gfx_v8_0_fault(adev, entry);
6693 	return 0;
6694 }
6695 
6696 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6697 				  struct amdgpu_irq_src *source,
6698 				  struct amdgpu_iv_entry *entry)
6699 {
6700 	DRM_ERROR("Illegal instruction in command stream\n");
6701 	gfx_v8_0_fault(adev, entry);
6702 	return 0;
6703 }
6704 
6705 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6706 				     struct amdgpu_irq_src *source,
6707 				     struct amdgpu_iv_entry *entry)
6708 {
6709 	DRM_ERROR("CP EDC/ECC error detected.");
6710 	return 0;
6711 }
6712 
6713 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6714 				  bool from_wq)
6715 {
6716 	u32 enc, se_id, sh_id, cu_id;
6717 	char type[20];
6718 	int sq_edc_source = -1;
6719 
6720 	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6721 	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6722 
6723 	switch (enc) {
6724 		case 0:
6725 			DRM_INFO("SQ general purpose intr detected:"
6726 					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6727 					"host_cmd_overflow %d, cmd_timestamp %d,"
6728 					"reg_timestamp %d, thread_trace_buff_full %d,"
6729 					"wlt %d, thread_trace %d.\n",
6730 					se_id,
6731 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6732 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6733 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6734 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6735 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6736 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6737 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6738 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6739 					);
6740 			break;
6741 		case 1:
6742 		case 2:
6743 
6744 			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6745 			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6746 
6747 			/*
6748 			 * This function can be called either directly from ISR
6749 			 * or from BH in which case we can access SQ_EDC_INFO
6750 			 * instance
6751 			 */
6752 			if (from_wq) {
6753 				mutex_lock(&adev->grbm_idx_mutex);
6754 				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6755 
6756 				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6757 
6758 				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6759 				mutex_unlock(&adev->grbm_idx_mutex);
6760 			}
6761 
6762 			if (enc == 1)
6763 				sprintf(type, "instruction intr");
6764 			else
6765 				sprintf(type, "EDC/ECC error");
6766 
6767 			DRM_INFO(
6768 				"SQ %s detected: "
6769 					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6770 					"trap %s, sq_ed_info.source %s.\n",
6771 					type, se_id, sh_id, cu_id,
6772 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6773 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6774 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6775 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6776 					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6777 				);
6778 			break;
6779 		default:
6780 			DRM_ERROR("SQ invalid encoding type\n.");
6781 	}
6782 }
6783 
6784 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6785 {
6786 
6787 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6788 	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6789 
6790 	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6791 }
6792 
6793 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6794 			   struct amdgpu_irq_src *source,
6795 			   struct amdgpu_iv_entry *entry)
6796 {
6797 	unsigned ih_data = entry->src_data[0];
6798 
6799 	/*
6800 	 * Try to submit work so SQ_EDC_INFO can be accessed from
6801 	 * BH. If previous work submission hasn't finished yet
6802 	 * just print whatever info is possible directly from the ISR.
6803 	 */
6804 	if (work_pending(&adev->gfx.sq_work.work)) {
6805 		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6806 	} else {
6807 		adev->gfx.sq_work.ih_data = ih_data;
6808 		schedule_work(&adev->gfx.sq_work.work);
6809 	}
6810 
6811 	return 0;
6812 }
6813 
6814 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6815 {
6816 	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6817 	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6818 			  PACKET3_TC_ACTION_ENA |
6819 			  PACKET3_SH_KCACHE_ACTION_ENA |
6820 			  PACKET3_SH_ICACHE_ACTION_ENA |
6821 			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6822 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6823 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6824 	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6825 }
6826 
6827 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6828 {
6829 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6830 	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6831 			  PACKET3_TC_ACTION_ENA |
6832 			  PACKET3_SH_KCACHE_ACTION_ENA |
6833 			  PACKET3_SH_ICACHE_ACTION_ENA |
6834 			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6835 	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6836 	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6837 	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6838 	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6839 	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6840 }
6841 
6842 
6843 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6844 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6845 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6846 					uint32_t pipe, bool enable)
6847 {
6848 	uint32_t val;
6849 	uint32_t wcl_cs_reg;
6850 
6851 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6852 
6853 	switch (pipe) {
6854 	case 0:
6855 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6856 		break;
6857 	case 1:
6858 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6859 		break;
6860 	case 2:
6861 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6862 		break;
6863 	case 3:
6864 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6865 		break;
6866 	default:
6867 		DRM_DEBUG("invalid pipe %d\n", pipe);
6868 		return;
6869 	}
6870 
6871 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6872 
6873 }
6874 
6875 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6876 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6877 {
6878 	struct amdgpu_device *adev = ring->adev;
6879 	uint32_t val;
6880 	int i;
6881 
6882 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6883 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6884 	 * around 25% of gpu resources.
6885 	 */
6886 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6887 	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6888 
6889 	/* Restrict waves for normal/low priority compute queues as well
6890 	 * to get best QoS for high priority compute jobs.
6891 	 *
6892 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6893 	 */
6894 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6895 		if (i != ring->pipe)
6896 			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6897 
6898 	}
6899 
6900 }
6901 
6902 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6903 	.name = "gfx_v8_0",
6904 	.early_init = gfx_v8_0_early_init,
6905 	.late_init = gfx_v8_0_late_init,
6906 	.sw_init = gfx_v8_0_sw_init,
6907 	.sw_fini = gfx_v8_0_sw_fini,
6908 	.hw_init = gfx_v8_0_hw_init,
6909 	.hw_fini = gfx_v8_0_hw_fini,
6910 	.suspend = gfx_v8_0_suspend,
6911 	.resume = gfx_v8_0_resume,
6912 	.is_idle = gfx_v8_0_is_idle,
6913 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6914 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6915 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6916 	.soft_reset = gfx_v8_0_soft_reset,
6917 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6918 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6919 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6920 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6921 };
6922 
6923 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6924 	.type = AMDGPU_RING_TYPE_GFX,
6925 	.align_mask = 0xff,
6926 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6927 	.support_64bit_ptrs = false,
6928 	.get_rptr = gfx_v8_0_ring_get_rptr,
6929 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6930 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6931 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6932 		5 +  /* COND_EXEC */
6933 		7 +  /* PIPELINE_SYNC */
6934 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6935 		12 +  /* FENCE for VM_FLUSH */
6936 		20 + /* GDS switch */
6937 		4 + /* double SWITCH_BUFFER,
6938 		       the first COND_EXEC jump to the place just
6939 			   prior to this double SWITCH_BUFFER  */
6940 		5 + /* COND_EXEC */
6941 		7 +	 /*	HDP_flush */
6942 		4 +	 /*	VGT_flush */
6943 		14 + /*	CE_META */
6944 		31 + /*	DE_META */
6945 		3 + /* CNTX_CTRL */
6946 		5 + /* HDP_INVL */
6947 		12 + 12 + /* FENCE x2 */
6948 		2 + /* SWITCH_BUFFER */
6949 		5, /* SURFACE_SYNC */
6950 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6951 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6952 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6953 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6954 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6955 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6956 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6957 	.test_ring = gfx_v8_0_ring_test_ring,
6958 	.test_ib = gfx_v8_0_ring_test_ib,
6959 	.insert_nop = amdgpu_ring_insert_nop,
6960 	.pad_ib = amdgpu_ring_generic_pad_ib,
6961 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6962 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6963 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6964 	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6965 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6966 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6967 	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6968 };
6969 
6970 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6971 	.type = AMDGPU_RING_TYPE_COMPUTE,
6972 	.align_mask = 0xff,
6973 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6974 	.support_64bit_ptrs = false,
6975 	.get_rptr = gfx_v8_0_ring_get_rptr,
6976 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6977 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6978 	.emit_frame_size =
6979 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6980 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6981 		5 + /* hdp_invalidate */
6982 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6983 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6984 		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6985 		7 + /* gfx_v8_0_emit_mem_sync_compute */
6986 		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6987 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6988 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6989 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6990 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6991 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6992 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6993 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6994 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6995 	.test_ring = gfx_v8_0_ring_test_ring,
6996 	.test_ib = gfx_v8_0_ring_test_ib,
6997 	.insert_nop = amdgpu_ring_insert_nop,
6998 	.pad_ib = amdgpu_ring_generic_pad_ib,
6999 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7000 	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7001 	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
7002 };
7003 
7004 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7005 	.type = AMDGPU_RING_TYPE_KIQ,
7006 	.align_mask = 0xff,
7007 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7008 	.support_64bit_ptrs = false,
7009 	.get_rptr = gfx_v8_0_ring_get_rptr,
7010 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7011 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7012 	.emit_frame_size =
7013 		20 + /* gfx_v8_0_ring_emit_gds_switch */
7014 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7015 		5 + /* hdp_invalidate */
7016 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7017 		17 + /* gfx_v8_0_ring_emit_vm_flush */
7018 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7019 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
7020 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7021 	.test_ring = gfx_v8_0_ring_test_ring,
7022 	.insert_nop = amdgpu_ring_insert_nop,
7023 	.pad_ib = amdgpu_ring_generic_pad_ib,
7024 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7025 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7026 };
7027 
7028 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7029 {
7030 	int i;
7031 
7032 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7033 
7034 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7035 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7036 
7037 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7038 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7039 }
7040 
7041 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7042 	.set = gfx_v8_0_set_eop_interrupt_state,
7043 	.process = gfx_v8_0_eop_irq,
7044 };
7045 
7046 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7047 	.set = gfx_v8_0_set_priv_reg_fault_state,
7048 	.process = gfx_v8_0_priv_reg_irq,
7049 };
7050 
7051 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7052 	.set = gfx_v8_0_set_priv_inst_fault_state,
7053 	.process = gfx_v8_0_priv_inst_irq,
7054 };
7055 
7056 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7057 	.set = gfx_v8_0_set_cp_ecc_int_state,
7058 	.process = gfx_v8_0_cp_ecc_error_irq,
7059 };
7060 
7061 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7062 	.set = gfx_v8_0_set_sq_int_state,
7063 	.process = gfx_v8_0_sq_irq,
7064 };
7065 
7066 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7067 {
7068 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7069 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7070 
7071 	adev->gfx.priv_reg_irq.num_types = 1;
7072 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7073 
7074 	adev->gfx.priv_inst_irq.num_types = 1;
7075 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7076 
7077 	adev->gfx.cp_ecc_error_irq.num_types = 1;
7078 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7079 
7080 	adev->gfx.sq_irq.num_types = 1;
7081 	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7082 }
7083 
7084 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7085 {
7086 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7087 }
7088 
7089 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7090 {
7091 	/* init asci gds info */
7092 	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7093 	adev->gds.gws_size = 64;
7094 	adev->gds.oa_size = 16;
7095 	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7096 }
7097 
7098 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7099 						 u32 bitmap)
7100 {
7101 	u32 data;
7102 
7103 	if (!bitmap)
7104 		return;
7105 
7106 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7107 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7108 
7109 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7110 }
7111 
7112 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7113 {
7114 	u32 data, mask;
7115 
7116 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7117 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7118 
7119 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7120 
7121 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7122 }
7123 
7124 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7125 {
7126 	int i, j, k, counter, active_cu_number = 0;
7127 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7128 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7129 	unsigned disable_masks[4 * 2];
7130 	u32 ao_cu_num;
7131 
7132 	memset(cu_info, 0, sizeof(*cu_info));
7133 
7134 	if (adev->flags & AMD_IS_APU)
7135 		ao_cu_num = 2;
7136 	else
7137 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7138 
7139 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7140 
7141 	mutex_lock(&adev->grbm_idx_mutex);
7142 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7143 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7144 			mask = 1;
7145 			ao_bitmap = 0;
7146 			counter = 0;
7147 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7148 			if (i < 4 && j < 2)
7149 				gfx_v8_0_set_user_cu_inactive_bitmap(
7150 					adev, disable_masks[i * 2 + j]);
7151 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7152 			cu_info->bitmap[i][j] = bitmap;
7153 
7154 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7155 				if (bitmap & mask) {
7156 					if (counter < ao_cu_num)
7157 						ao_bitmap |= mask;
7158 					counter ++;
7159 				}
7160 				mask <<= 1;
7161 			}
7162 			active_cu_number += counter;
7163 			if (i < 2 && j < 2)
7164 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7165 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7166 		}
7167 	}
7168 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7169 	mutex_unlock(&adev->grbm_idx_mutex);
7170 
7171 	cu_info->number = active_cu_number;
7172 	cu_info->ao_cu_mask = ao_cu_mask;
7173 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7174 	cu_info->max_waves_per_simd = 10;
7175 	cu_info->max_scratch_slots_per_cu = 32;
7176 	cu_info->wave_front_size = 64;
7177 	cu_info->lds_size = 64;
7178 }
7179 
7180 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7181 {
7182 	.type = AMD_IP_BLOCK_TYPE_GFX,
7183 	.major = 8,
7184 	.minor = 0,
7185 	.rev = 0,
7186 	.funcs = &gfx_v8_0_ip_funcs,
7187 };
7188 
7189 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7190 {
7191 	.type = AMD_IP_BLOCK_TYPE_GFX,
7192 	.major = 8,
7193 	.minor = 1,
7194 	.rev = 0,
7195 	.funcs = &gfx_v8_0_ip_funcs,
7196 };
7197 
7198 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7199 {
7200 	uint64_t ce_payload_addr;
7201 	int cnt_ce;
7202 	union {
7203 		struct vi_ce_ib_state regular;
7204 		struct vi_ce_ib_state_chained_ib chained;
7205 	} ce_payload = {};
7206 
7207 	if (ring->adev->virt.chained_ib_support) {
7208 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7209 			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7210 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7211 	} else {
7212 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7213 			offsetof(struct vi_gfx_meta_data, ce_payload);
7214 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7215 	}
7216 
7217 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7218 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7219 				WRITE_DATA_DST_SEL(8) |
7220 				WR_CONFIRM) |
7221 				WRITE_DATA_CACHE_POLICY(0));
7222 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7223 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7224 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7225 }
7226 
7227 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7228 {
7229 	uint64_t de_payload_addr, gds_addr, csa_addr;
7230 	int cnt_de;
7231 	union {
7232 		struct vi_de_ib_state regular;
7233 		struct vi_de_ib_state_chained_ib chained;
7234 	} de_payload = {};
7235 
7236 	csa_addr = amdgpu_csa_vaddr(ring->adev);
7237 	gds_addr = csa_addr + 4096;
7238 	if (ring->adev->virt.chained_ib_support) {
7239 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7240 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7241 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7242 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7243 	} else {
7244 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7245 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7246 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7247 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7248 	}
7249 
7250 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7251 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7252 				WRITE_DATA_DST_SEL(8) |
7253 				WR_CONFIRM) |
7254 				WRITE_DATA_CACHE_POLICY(0));
7255 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7256 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7257 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7258 }
7259