xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision e8cc149ed906a371a5962ff8065393bae28165c9)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40 
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43 
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46 
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52 
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55 
56 #include "smu/smu_7_1_3_d.h"
57 
58 #include "ivsrcid/ivsrcid_vislands30.h"
59 
60 #define GFX8_NUM_GFX_RINGS     1
61 #define GFX8_MEC_HPD_SIZE 4096
62 
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67 
68 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77 
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
84 
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD    1
87 #define CLE_BPM_SERDES_CMD    0
88 
89 /* BPM Register Address*/
90 enum {
91 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
92 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
93 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
94 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
95 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
96 	BPM_REG_FGCG_MAX
97 };
98 
99 #define RLC_FormatDirectRegListLength        14
100 
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145 
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157 
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169 
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176 
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196 
197 static const u32 golden_settings_tonga_a11[] =
198 {
199 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
203 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216 
217 static const u32 tonga_golden_common_all[] =
218 {
219 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228 
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307 
308 static const u32 golden_settings_vegam_a11[] =
309 {
310 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
321 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328 
329 static const u32 vegam_golden_common_all[] =
330 {
331 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338 
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
352 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359 
360 static const u32 polaris11_golden_common_all[] =
361 {
362 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369 
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
384 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390 
391 static const u32 polaris10_golden_common_all[] =
392 {
393 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402 
403 static const u32 fiji_golden_common_all[] =
404 {
405 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416 
417 static const u32 golden_settings_fiji_a10[] =
418 {
419 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431 
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470 
471 static const u32 golden_settings_iceland_a11[] =
472 {
473 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
477 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490 
491 static const u32 iceland_golden_common_all[] =
492 {
493 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502 
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570 
571 static const u32 cz_golden_settings_a11[] =
572 {
573 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
576 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586 
587 static const u32 cz_golden_common_all[] =
588 {
589 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598 
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677 
678 static const u32 stoney_golden_settings_a11[] =
679 {
680 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
682 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691 
692 static const u32 stoney_golden_common_all[] =
693 {
694 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703 
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712 
713 
714 static const char * const sq_edc_source_names[] = {
715 	"SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716 	"SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717 	"SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718 	"SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719 	"SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720 	"SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721 	"SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723 
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732 
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK                    0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT                  0x00000000L
735 
736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738 	uint32_t data;
739 
740 	switch (adev->asic_type) {
741 	case CHIP_TOPAZ:
742 		amdgpu_device_program_register_sequence(adev,
743 							iceland_mgcg_cgcg_init,
744 							ARRAY_SIZE(iceland_mgcg_cgcg_init));
745 		amdgpu_device_program_register_sequence(adev,
746 							golden_settings_iceland_a11,
747 							ARRAY_SIZE(golden_settings_iceland_a11));
748 		amdgpu_device_program_register_sequence(adev,
749 							iceland_golden_common_all,
750 							ARRAY_SIZE(iceland_golden_common_all));
751 		break;
752 	case CHIP_FIJI:
753 		amdgpu_device_program_register_sequence(adev,
754 							fiji_mgcg_cgcg_init,
755 							ARRAY_SIZE(fiji_mgcg_cgcg_init));
756 		amdgpu_device_program_register_sequence(adev,
757 							golden_settings_fiji_a10,
758 							ARRAY_SIZE(golden_settings_fiji_a10));
759 		amdgpu_device_program_register_sequence(adev,
760 							fiji_golden_common_all,
761 							ARRAY_SIZE(fiji_golden_common_all));
762 		break;
763 
764 	case CHIP_TONGA:
765 		amdgpu_device_program_register_sequence(adev,
766 							tonga_mgcg_cgcg_init,
767 							ARRAY_SIZE(tonga_mgcg_cgcg_init));
768 		amdgpu_device_program_register_sequence(adev,
769 							golden_settings_tonga_a11,
770 							ARRAY_SIZE(golden_settings_tonga_a11));
771 		amdgpu_device_program_register_sequence(adev,
772 							tonga_golden_common_all,
773 							ARRAY_SIZE(tonga_golden_common_all));
774 		break;
775 	case CHIP_VEGAM:
776 		amdgpu_device_program_register_sequence(adev,
777 							golden_settings_vegam_a11,
778 							ARRAY_SIZE(golden_settings_vegam_a11));
779 		amdgpu_device_program_register_sequence(adev,
780 							vegam_golden_common_all,
781 							ARRAY_SIZE(vegam_golden_common_all));
782 		break;
783 	case CHIP_POLARIS11:
784 	case CHIP_POLARIS12:
785 		amdgpu_device_program_register_sequence(adev,
786 							golden_settings_polaris11_a11,
787 							ARRAY_SIZE(golden_settings_polaris11_a11));
788 		amdgpu_device_program_register_sequence(adev,
789 							polaris11_golden_common_all,
790 							ARRAY_SIZE(polaris11_golden_common_all));
791 		break;
792 	case CHIP_POLARIS10:
793 		amdgpu_device_program_register_sequence(adev,
794 							golden_settings_polaris10_a11,
795 							ARRAY_SIZE(golden_settings_polaris10_a11));
796 		amdgpu_device_program_register_sequence(adev,
797 							polaris10_golden_common_all,
798 							ARRAY_SIZE(polaris10_golden_common_all));
799 		data = RREG32_SMC(ixCG_ACLK_CNTL);
800 		data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801 		data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802 		WREG32_SMC(ixCG_ACLK_CNTL, data);
803 		if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809 		}
810 		break;
811 	case CHIP_CARRIZO:
812 		amdgpu_device_program_register_sequence(adev,
813 							cz_mgcg_cgcg_init,
814 							ARRAY_SIZE(cz_mgcg_cgcg_init));
815 		amdgpu_device_program_register_sequence(adev,
816 							cz_golden_settings_a11,
817 							ARRAY_SIZE(cz_golden_settings_a11));
818 		amdgpu_device_program_register_sequence(adev,
819 							cz_golden_common_all,
820 							ARRAY_SIZE(cz_golden_common_all));
821 		break;
822 	case CHIP_STONEY:
823 		amdgpu_device_program_register_sequence(adev,
824 							stoney_mgcg_cgcg_init,
825 							ARRAY_SIZE(stoney_mgcg_cgcg_init));
826 		amdgpu_device_program_register_sequence(adev,
827 							stoney_golden_settings_a11,
828 							ARRAY_SIZE(stoney_golden_settings_a11));
829 		amdgpu_device_program_register_sequence(adev,
830 							stoney_golden_common_all,
831 							ARRAY_SIZE(stoney_golden_common_all));
832 		break;
833 	default:
834 		break;
835 	}
836 }
837 
838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840 	struct amdgpu_device *adev = ring->adev;
841 	uint32_t tmp = 0;
842 	unsigned i;
843 	int r;
844 
845 	WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846 	r = amdgpu_ring_alloc(ring, 3);
847 	if (r)
848 		return r;
849 
850 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851 	amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852 	amdgpu_ring_write(ring, 0xDEADBEEF);
853 	amdgpu_ring_commit(ring);
854 
855 	for (i = 0; i < adev->usec_timeout; i++) {
856 		tmp = RREG32(mmSCRATCH_REG0);
857 		if (tmp == 0xDEADBEEF)
858 			break;
859 		udelay(1);
860 	}
861 
862 	if (i >= adev->usec_timeout)
863 		r = -ETIMEDOUT;
864 
865 	return r;
866 }
867 
868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870 	struct amdgpu_device *adev = ring->adev;
871 	struct amdgpu_ib ib;
872 	struct dma_fence *f = NULL;
873 
874 	unsigned int index;
875 	uint64_t gpu_addr;
876 	uint32_t tmp;
877 	long r;
878 
879 	r = amdgpu_device_wb_get(adev, &index);
880 	if (r)
881 		return r;
882 
883 	gpu_addr = adev->wb.gpu_addr + (index * 4);
884 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885 	memset(&ib, 0, sizeof(ib));
886 
887 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
888 	if (r)
889 		goto err1;
890 
891 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893 	ib.ptr[2] = lower_32_bits(gpu_addr);
894 	ib.ptr[3] = upper_32_bits(gpu_addr);
895 	ib.ptr[4] = 0xDEADBEEF;
896 	ib.length_dw = 5;
897 
898 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899 	if (r)
900 		goto err2;
901 
902 	r = dma_fence_wait_timeout(f, false, timeout);
903 	if (r == 0) {
904 		r = -ETIMEDOUT;
905 		goto err2;
906 	} else if (r < 0) {
907 		goto err2;
908 	}
909 
910 	tmp = adev->wb.wb[index];
911 	if (tmp == 0xDEADBEEF)
912 		r = 0;
913 	else
914 		r = -EINVAL;
915 
916 err2:
917 	amdgpu_ib_free(&ib, NULL);
918 	dma_fence_put(f);
919 err1:
920 	amdgpu_device_wb_free(adev, index);
921 	return r;
922 }
923 
924 
925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
928 	amdgpu_ucode_release(&adev->gfx.me_fw);
929 	amdgpu_ucode_release(&adev->gfx.ce_fw);
930 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
931 	amdgpu_ucode_release(&adev->gfx.mec_fw);
932 	if ((adev->asic_type != CHIP_STONEY) &&
933 	    (adev->asic_type != CHIP_TOPAZ))
934 		amdgpu_ucode_release(&adev->gfx.mec2_fw);
935 
936 	kfree(adev->gfx.rlc.register_list_format);
937 }
938 
939 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
940 {
941 	const char *chip_name;
942 	int err;
943 	struct amdgpu_firmware_info *info = NULL;
944 	const struct common_firmware_header *header = NULL;
945 	const struct gfx_firmware_header_v1_0 *cp_hdr;
946 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
947 	unsigned int *tmp = NULL, i;
948 
949 	DRM_DEBUG("\n");
950 
951 	switch (adev->asic_type) {
952 	case CHIP_TOPAZ:
953 		chip_name = "topaz";
954 		break;
955 	case CHIP_TONGA:
956 		chip_name = "tonga";
957 		break;
958 	case CHIP_CARRIZO:
959 		chip_name = "carrizo";
960 		break;
961 	case CHIP_FIJI:
962 		chip_name = "fiji";
963 		break;
964 	case CHIP_STONEY:
965 		chip_name = "stoney";
966 		break;
967 	case CHIP_POLARIS10:
968 		chip_name = "polaris10";
969 		break;
970 	case CHIP_POLARIS11:
971 		chip_name = "polaris11";
972 		break;
973 	case CHIP_POLARIS12:
974 		chip_name = "polaris12";
975 		break;
976 	case CHIP_VEGAM:
977 		chip_name = "vegam";
978 		break;
979 	default:
980 		BUG();
981 	}
982 
983 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
984 		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
985 					   AMDGPU_UCODE_OPTIONAL,
986 					   "amdgpu/%s_pfp_2.bin", chip_name);
987 		if (err == -ENODEV) {
988 			err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
989 						   AMDGPU_UCODE_REQUIRED,
990 						   "amdgpu/%s_pfp.bin", chip_name);
991 		}
992 	} else {
993 		err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
994 					   AMDGPU_UCODE_REQUIRED,
995 					   "amdgpu/%s_pfp.bin", chip_name);
996 	}
997 	if (err)
998 		goto out;
999 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1000 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1001 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1002 
1003 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1004 		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1005 					   AMDGPU_UCODE_OPTIONAL,
1006 					   "amdgpu/%s_me_2.bin", chip_name);
1007 		if (err == -ENODEV) {
1008 			err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1009 						   AMDGPU_UCODE_REQUIRED,
1010 						   "amdgpu/%s_me.bin", chip_name);
1011 		}
1012 	} else {
1013 		err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1014 					   AMDGPU_UCODE_REQUIRED,
1015 					   "amdgpu/%s_me.bin", chip_name);
1016 	}
1017 	if (err)
1018 		goto out;
1019 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1020 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021 
1022 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023 
1024 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025 		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1026 					   AMDGPU_UCODE_OPTIONAL,
1027 					   "amdgpu/%s_ce_2.bin", chip_name);
1028 		if (err == -ENODEV) {
1029 			err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1030 						   AMDGPU_UCODE_REQUIRED,
1031 						   "amdgpu/%s_ce.bin", chip_name);
1032 		}
1033 	} else {
1034 		err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1035 					   AMDGPU_UCODE_REQUIRED,
1036 					   "amdgpu/%s_ce.bin", chip_name);
1037 	}
1038 	if (err)
1039 		goto out;
1040 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1041 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1043 
1044 	/*
1045 	 * Support for MCBP/Virtualization in combination with chained IBs is
1046 	 * formal released on feature version #46
1047 	 */
1048 	if (adev->gfx.ce_feature_version >= 46 &&
1049 	    adev->gfx.pfp_feature_version >= 46) {
1050 		adev->virt.chained_ib_support = true;
1051 		DRM_INFO("Chained IB support enabled!\n");
1052 	} else
1053 		adev->virt.chained_ib_support = false;
1054 
1055 	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1056 				   AMDGPU_UCODE_REQUIRED,
1057 				   "amdgpu/%s_rlc.bin", chip_name);
1058 	if (err)
1059 		goto out;
1060 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1061 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1062 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1063 
1064 	adev->gfx.rlc.save_and_restore_offset =
1065 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1066 	adev->gfx.rlc.clear_state_descriptor_offset =
1067 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1068 	adev->gfx.rlc.avail_scratch_ram_locations =
1069 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1070 	adev->gfx.rlc.reg_restore_list_size =
1071 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1072 	adev->gfx.rlc.reg_list_format_start =
1073 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1074 	adev->gfx.rlc.reg_list_format_separate_start =
1075 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1076 	adev->gfx.rlc.starting_offsets_start =
1077 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1078 	adev->gfx.rlc.reg_list_format_size_bytes =
1079 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1080 	adev->gfx.rlc.reg_list_size_bytes =
1081 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1082 
1083 	adev->gfx.rlc.register_list_format =
1084 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1085 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1086 
1087 	if (!adev->gfx.rlc.register_list_format) {
1088 		err = -ENOMEM;
1089 		goto out;
1090 	}
1091 
1092 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1093 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1094 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1095 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1096 
1097 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1098 
1099 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1101 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1102 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1103 
1104 	if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1105 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1106 					   AMDGPU_UCODE_OPTIONAL,
1107 					   "amdgpu/%s_mec_2.bin", chip_name);
1108 		if (err == -ENODEV) {
1109 			err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1110 						   AMDGPU_UCODE_REQUIRED,
1111 						   "amdgpu/%s_mec.bin", chip_name);
1112 		}
1113 	} else {
1114 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1115 					   AMDGPU_UCODE_REQUIRED,
1116 					   "amdgpu/%s_mec.bin", chip_name);
1117 	}
1118 	if (err)
1119 		goto out;
1120 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1121 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1122 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1123 
1124 	if ((adev->asic_type != CHIP_STONEY) &&
1125 	    (adev->asic_type != CHIP_TOPAZ)) {
1126 		if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1127 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1128 						   AMDGPU_UCODE_OPTIONAL,
1129 						   "amdgpu/%s_mec2_2.bin", chip_name);
1130 			if (err == -ENODEV) {
1131 				err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1132 							   AMDGPU_UCODE_REQUIRED,
1133 							   "amdgpu/%s_mec2.bin", chip_name);
1134 			}
1135 		} else {
1136 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1137 						   AMDGPU_UCODE_REQUIRED,
1138 						   "amdgpu/%s_mec2.bin", chip_name);
1139 		}
1140 		if (!err) {
1141 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1142 				adev->gfx.mec2_fw->data;
1143 			adev->gfx.mec2_fw_version =
1144 				le32_to_cpu(cp_hdr->header.ucode_version);
1145 			adev->gfx.mec2_feature_version =
1146 				le32_to_cpu(cp_hdr->ucode_feature_version);
1147 		} else {
1148 			err = 0;
1149 			adev->gfx.mec2_fw = NULL;
1150 		}
1151 	}
1152 
1153 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1154 	info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1155 	info->fw = adev->gfx.pfp_fw;
1156 	header = (const struct common_firmware_header *)info->fw->data;
1157 	adev->firmware.fw_size +=
1158 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1159 
1160 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1161 	info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1162 	info->fw = adev->gfx.me_fw;
1163 	header = (const struct common_firmware_header *)info->fw->data;
1164 	adev->firmware.fw_size +=
1165 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166 
1167 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1168 	info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1169 	info->fw = adev->gfx.ce_fw;
1170 	header = (const struct common_firmware_header *)info->fw->data;
1171 	adev->firmware.fw_size +=
1172 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173 
1174 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1175 	info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1176 	info->fw = adev->gfx.rlc_fw;
1177 	header = (const struct common_firmware_header *)info->fw->data;
1178 	adev->firmware.fw_size +=
1179 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180 
1181 	info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1182 	info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1183 	info->fw = adev->gfx.mec_fw;
1184 	header = (const struct common_firmware_header *)info->fw->data;
1185 	adev->firmware.fw_size +=
1186 		ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187 
1188 	/* we need account JT in */
1189 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1190 	adev->firmware.fw_size +=
1191 		ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1192 
1193 	if (amdgpu_sriov_vf(adev)) {
1194 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1195 		info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1196 		info->fw = adev->gfx.mec_fw;
1197 		adev->firmware.fw_size +=
1198 			ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1199 	}
1200 
1201 	if (adev->gfx.mec2_fw) {
1202 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1203 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1204 		info->fw = adev->gfx.mec2_fw;
1205 		header = (const struct common_firmware_header *)info->fw->data;
1206 		adev->firmware.fw_size +=
1207 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208 	}
1209 
1210 out:
1211 	if (err) {
1212 		dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
1213 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1214 		amdgpu_ucode_release(&adev->gfx.me_fw);
1215 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1216 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1217 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1218 		amdgpu_ucode_release(&adev->gfx.mec2_fw);
1219 	}
1220 	return err;
1221 }
1222 
1223 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1224 				    volatile u32 *buffer)
1225 {
1226 	u32 count = 0, i;
1227 	const struct cs_section_def *sect = NULL;
1228 	const struct cs_extent_def *ext = NULL;
1229 
1230 	if (adev->gfx.rlc.cs_data == NULL)
1231 		return;
1232 	if (buffer == NULL)
1233 		return;
1234 
1235 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1236 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1237 
1238 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1239 	buffer[count++] = cpu_to_le32(0x80000000);
1240 	buffer[count++] = cpu_to_le32(0x80000000);
1241 
1242 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1243 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1244 			if (sect->id == SECT_CONTEXT) {
1245 				buffer[count++] =
1246 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1247 				buffer[count++] = cpu_to_le32(ext->reg_index -
1248 						PACKET3_SET_CONTEXT_REG_START);
1249 				for (i = 0; i < ext->reg_count; i++)
1250 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1251 			}
1252 		}
1253 	}
1254 
1255 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1256 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1257 			PACKET3_SET_CONTEXT_REG_START);
1258 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1259 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1260 
1261 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1262 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1263 
1264 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1265 	buffer[count++] = cpu_to_le32(0);
1266 }
1267 
1268 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1269 {
1270 	if (adev->asic_type == CHIP_CARRIZO)
1271 		return 5;
1272 	else
1273 		return 4;
1274 }
1275 
1276 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1277 {
1278 	const struct cs_section_def *cs_data;
1279 	int r;
1280 
1281 	adev->gfx.rlc.cs_data = vi_cs_data;
1282 
1283 	cs_data = adev->gfx.rlc.cs_data;
1284 
1285 	if (cs_data) {
1286 		/* init clear state block */
1287 		r = amdgpu_gfx_rlc_init_csb(adev);
1288 		if (r)
1289 			return r;
1290 	}
1291 
1292 	if ((adev->asic_type == CHIP_CARRIZO) ||
1293 	    (adev->asic_type == CHIP_STONEY)) {
1294 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1295 		r = amdgpu_gfx_rlc_init_cpt(adev);
1296 		if (r)
1297 			return r;
1298 	}
1299 
1300 	/* init spm vmid with 0xf */
1301 	if (adev->gfx.rlc.funcs->update_spm_vmid)
1302 		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
1303 
1304 	return 0;
1305 }
1306 
1307 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1308 {
1309 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1310 }
1311 
1312 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1313 {
1314 	int r;
1315 	u32 *hpd;
1316 	size_t mec_hpd_size;
1317 
1318 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1319 
1320 	/* take ownership of the relevant compute queues */
1321 	amdgpu_gfx_compute_queue_acquire(adev);
1322 
1323 	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1324 	if (mec_hpd_size) {
1325 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1326 					      AMDGPU_GEM_DOMAIN_VRAM |
1327 					      AMDGPU_GEM_DOMAIN_GTT,
1328 					      &adev->gfx.mec.hpd_eop_obj,
1329 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1330 					      (void **)&hpd);
1331 		if (r) {
1332 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1333 			return r;
1334 		}
1335 
1336 		memset(hpd, 0, mec_hpd_size);
1337 
1338 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1339 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1340 	}
1341 
1342 	return 0;
1343 }
1344 
1345 static const u32 vgpr_init_compute_shader[] =
1346 {
1347 	0x7e000209, 0x7e020208,
1348 	0x7e040207, 0x7e060206,
1349 	0x7e080205, 0x7e0a0204,
1350 	0x7e0c0203, 0x7e0e0202,
1351 	0x7e100201, 0x7e120200,
1352 	0x7e140209, 0x7e160208,
1353 	0x7e180207, 0x7e1a0206,
1354 	0x7e1c0205, 0x7e1e0204,
1355 	0x7e200203, 0x7e220202,
1356 	0x7e240201, 0x7e260200,
1357 	0x7e280209, 0x7e2a0208,
1358 	0x7e2c0207, 0x7e2e0206,
1359 	0x7e300205, 0x7e320204,
1360 	0x7e340203, 0x7e360202,
1361 	0x7e380201, 0x7e3a0200,
1362 	0x7e3c0209, 0x7e3e0208,
1363 	0x7e400207, 0x7e420206,
1364 	0x7e440205, 0x7e460204,
1365 	0x7e480203, 0x7e4a0202,
1366 	0x7e4c0201, 0x7e4e0200,
1367 	0x7e500209, 0x7e520208,
1368 	0x7e540207, 0x7e560206,
1369 	0x7e580205, 0x7e5a0204,
1370 	0x7e5c0203, 0x7e5e0202,
1371 	0x7e600201, 0x7e620200,
1372 	0x7e640209, 0x7e660208,
1373 	0x7e680207, 0x7e6a0206,
1374 	0x7e6c0205, 0x7e6e0204,
1375 	0x7e700203, 0x7e720202,
1376 	0x7e740201, 0x7e760200,
1377 	0x7e780209, 0x7e7a0208,
1378 	0x7e7c0207, 0x7e7e0206,
1379 	0xbf8a0000, 0xbf810000,
1380 };
1381 
1382 static const u32 sgpr_init_compute_shader[] =
1383 {
1384 	0xbe8a0100, 0xbe8c0102,
1385 	0xbe8e0104, 0xbe900106,
1386 	0xbe920108, 0xbe940100,
1387 	0xbe960102, 0xbe980104,
1388 	0xbe9a0106, 0xbe9c0108,
1389 	0xbe9e0100, 0xbea00102,
1390 	0xbea20104, 0xbea40106,
1391 	0xbea60108, 0xbea80100,
1392 	0xbeaa0102, 0xbeac0104,
1393 	0xbeae0106, 0xbeb00108,
1394 	0xbeb20100, 0xbeb40102,
1395 	0xbeb60104, 0xbeb80106,
1396 	0xbeba0108, 0xbebc0100,
1397 	0xbebe0102, 0xbec00104,
1398 	0xbec20106, 0xbec40108,
1399 	0xbec60100, 0xbec80102,
1400 	0xbee60004, 0xbee70005,
1401 	0xbeea0006, 0xbeeb0007,
1402 	0xbee80008, 0xbee90009,
1403 	0xbefc0000, 0xbf8a0000,
1404 	0xbf810000, 0x00000000,
1405 };
1406 
1407 static const u32 vgpr_init_regs[] =
1408 {
1409 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1410 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1411 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1412 	mmCOMPUTE_NUM_THREAD_Y, 1,
1413 	mmCOMPUTE_NUM_THREAD_Z, 1,
1414 	mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1415 	mmCOMPUTE_PGM_RSRC2, 20,
1416 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1417 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1418 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1419 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1420 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1421 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1422 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1423 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1424 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1425 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1426 };
1427 
1428 static const u32 sgpr1_init_regs[] =
1429 {
1430 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1431 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1432 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1433 	mmCOMPUTE_NUM_THREAD_Y, 1,
1434 	mmCOMPUTE_NUM_THREAD_Z, 1,
1435 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1436 	mmCOMPUTE_PGM_RSRC2, 20,
1437 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1438 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1439 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1440 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1441 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1442 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1443 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1444 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1445 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1446 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1447 };
1448 
1449 static const u32 sgpr2_init_regs[] =
1450 {
1451 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1452 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1453 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1454 	mmCOMPUTE_NUM_THREAD_Y, 1,
1455 	mmCOMPUTE_NUM_THREAD_Z, 1,
1456 	mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1457 	mmCOMPUTE_PGM_RSRC2, 20,
1458 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1459 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1460 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1461 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1462 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1463 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1464 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1465 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1466 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1467 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1468 };
1469 
1470 static const u32 sec_ded_counter_registers[] =
1471 {
1472 	mmCPC_EDC_ATC_CNT,
1473 	mmCPC_EDC_SCRATCH_CNT,
1474 	mmCPC_EDC_UCODE_CNT,
1475 	mmCPF_EDC_ATC_CNT,
1476 	mmCPF_EDC_ROQ_CNT,
1477 	mmCPF_EDC_TAG_CNT,
1478 	mmCPG_EDC_ATC_CNT,
1479 	mmCPG_EDC_DMA_CNT,
1480 	mmCPG_EDC_TAG_CNT,
1481 	mmDC_EDC_CSINVOC_CNT,
1482 	mmDC_EDC_RESTORE_CNT,
1483 	mmDC_EDC_STATE_CNT,
1484 	mmGDS_EDC_CNT,
1485 	mmGDS_EDC_GRBM_CNT,
1486 	mmGDS_EDC_OA_DED,
1487 	mmSPI_EDC_CNT,
1488 	mmSQC_ATC_EDC_GATCL1_CNT,
1489 	mmSQC_EDC_CNT,
1490 	mmSQ_EDC_DED_CNT,
1491 	mmSQ_EDC_INFO,
1492 	mmSQ_EDC_SEC_CNT,
1493 	mmTCC_EDC_CNT,
1494 	mmTCP_ATC_EDC_GATCL1_CNT,
1495 	mmTCP_EDC_CNT,
1496 	mmTD_EDC_CNT
1497 };
1498 
1499 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1500 {
1501 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1502 	struct amdgpu_ib ib;
1503 	struct dma_fence *f = NULL;
1504 	int r, i;
1505 	u32 tmp;
1506 	unsigned total_size, vgpr_offset, sgpr_offset;
1507 	u64 gpu_addr;
1508 
1509 	/* only supported on CZ */
1510 	if (adev->asic_type != CHIP_CARRIZO)
1511 		return 0;
1512 
1513 	/* bail if the compute ring is not ready */
1514 	if (!ring->sched.ready)
1515 		return 0;
1516 
1517 	tmp = RREG32(mmGB_EDC_MODE);
1518 	WREG32(mmGB_EDC_MODE, 0);
1519 
1520 	total_size =
1521 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1522 	total_size +=
1523 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1524 	total_size +=
1525 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1526 	total_size = ALIGN(total_size, 256);
1527 	vgpr_offset = total_size;
1528 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1529 	sgpr_offset = total_size;
1530 	total_size += sizeof(sgpr_init_compute_shader);
1531 
1532 	/* allocate an indirect buffer to put the commands in */
1533 	memset(&ib, 0, sizeof(ib));
1534 	r = amdgpu_ib_get(adev, NULL, total_size,
1535 					AMDGPU_IB_POOL_DIRECT, &ib);
1536 	if (r) {
1537 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1538 		return r;
1539 	}
1540 
1541 	/* load the compute shaders */
1542 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1543 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1544 
1545 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1546 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1547 
1548 	/* init the ib length to 0 */
1549 	ib.length_dw = 0;
1550 
1551 	/* VGPR */
1552 	/* write the register state for the compute dispatch */
1553 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1554 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1555 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1556 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1557 	}
1558 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1559 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1560 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1561 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1562 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1563 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1564 
1565 	/* write dispatch packet */
1566 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1567 	ib.ptr[ib.length_dw++] = 8; /* x */
1568 	ib.ptr[ib.length_dw++] = 1; /* y */
1569 	ib.ptr[ib.length_dw++] = 1; /* z */
1570 	ib.ptr[ib.length_dw++] =
1571 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1572 
1573 	/* write CS partial flush packet */
1574 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1575 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1576 
1577 	/* SGPR1 */
1578 	/* write the register state for the compute dispatch */
1579 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1580 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1581 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1582 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1583 	}
1584 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1585 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1586 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1587 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1588 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1589 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1590 
1591 	/* write dispatch packet */
1592 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1593 	ib.ptr[ib.length_dw++] = 8; /* x */
1594 	ib.ptr[ib.length_dw++] = 1; /* y */
1595 	ib.ptr[ib.length_dw++] = 1; /* z */
1596 	ib.ptr[ib.length_dw++] =
1597 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1598 
1599 	/* write CS partial flush packet */
1600 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1601 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1602 
1603 	/* SGPR2 */
1604 	/* write the register state for the compute dispatch */
1605 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1606 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1607 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1608 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1609 	}
1610 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1611 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1612 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1613 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1614 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1615 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1616 
1617 	/* write dispatch packet */
1618 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1619 	ib.ptr[ib.length_dw++] = 8; /* x */
1620 	ib.ptr[ib.length_dw++] = 1; /* y */
1621 	ib.ptr[ib.length_dw++] = 1; /* z */
1622 	ib.ptr[ib.length_dw++] =
1623 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1624 
1625 	/* write CS partial flush packet */
1626 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1627 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1628 
1629 	/* shedule the ib on the ring */
1630 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1631 	if (r) {
1632 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1633 		goto fail;
1634 	}
1635 
1636 	/* wait for the GPU to finish processing the IB */
1637 	r = dma_fence_wait(f, false);
1638 	if (r) {
1639 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1640 		goto fail;
1641 	}
1642 
1643 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1644 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1645 	WREG32(mmGB_EDC_MODE, tmp);
1646 
1647 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1648 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1649 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1650 
1651 
1652 	/* read back registers to clear the counters */
1653 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1654 		RREG32(sec_ded_counter_registers[i]);
1655 
1656 fail:
1657 	amdgpu_ib_free(&ib, NULL);
1658 	dma_fence_put(f);
1659 
1660 	return r;
1661 }
1662 
1663 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1664 {
1665 	u32 gb_addr_config;
1666 	u32 mc_arb_ramcfg;
1667 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1668 	u32 tmp;
1669 	int ret;
1670 
1671 	switch (adev->asic_type) {
1672 	case CHIP_TOPAZ:
1673 		adev->gfx.config.max_shader_engines = 1;
1674 		adev->gfx.config.max_tile_pipes = 2;
1675 		adev->gfx.config.max_cu_per_sh = 6;
1676 		adev->gfx.config.max_sh_per_se = 1;
1677 		adev->gfx.config.max_backends_per_se = 2;
1678 		adev->gfx.config.max_texture_channel_caches = 2;
1679 		adev->gfx.config.max_gprs = 256;
1680 		adev->gfx.config.max_gs_threads = 32;
1681 		adev->gfx.config.max_hw_contexts = 8;
1682 
1683 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1684 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1685 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1686 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1687 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1688 		break;
1689 	case CHIP_FIJI:
1690 		adev->gfx.config.max_shader_engines = 4;
1691 		adev->gfx.config.max_tile_pipes = 16;
1692 		adev->gfx.config.max_cu_per_sh = 16;
1693 		adev->gfx.config.max_sh_per_se = 1;
1694 		adev->gfx.config.max_backends_per_se = 4;
1695 		adev->gfx.config.max_texture_channel_caches = 16;
1696 		adev->gfx.config.max_gprs = 256;
1697 		adev->gfx.config.max_gs_threads = 32;
1698 		adev->gfx.config.max_hw_contexts = 8;
1699 
1700 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1701 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1702 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1703 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1704 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1705 		break;
1706 	case CHIP_POLARIS11:
1707 	case CHIP_POLARIS12:
1708 		ret = amdgpu_atombios_get_gfx_info(adev);
1709 		if (ret)
1710 			return ret;
1711 		adev->gfx.config.max_gprs = 256;
1712 		adev->gfx.config.max_gs_threads = 32;
1713 		adev->gfx.config.max_hw_contexts = 8;
1714 
1715 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1716 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1717 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1718 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1719 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1720 		break;
1721 	case CHIP_POLARIS10:
1722 	case CHIP_VEGAM:
1723 		ret = amdgpu_atombios_get_gfx_info(adev);
1724 		if (ret)
1725 			return ret;
1726 		adev->gfx.config.max_gprs = 256;
1727 		adev->gfx.config.max_gs_threads = 32;
1728 		adev->gfx.config.max_hw_contexts = 8;
1729 
1730 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1731 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1732 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1733 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1734 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1735 		break;
1736 	case CHIP_TONGA:
1737 		adev->gfx.config.max_shader_engines = 4;
1738 		adev->gfx.config.max_tile_pipes = 8;
1739 		adev->gfx.config.max_cu_per_sh = 8;
1740 		adev->gfx.config.max_sh_per_se = 1;
1741 		adev->gfx.config.max_backends_per_se = 2;
1742 		adev->gfx.config.max_texture_channel_caches = 8;
1743 		adev->gfx.config.max_gprs = 256;
1744 		adev->gfx.config.max_gs_threads = 32;
1745 		adev->gfx.config.max_hw_contexts = 8;
1746 
1747 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1752 		break;
1753 	case CHIP_CARRIZO:
1754 		adev->gfx.config.max_shader_engines = 1;
1755 		adev->gfx.config.max_tile_pipes = 2;
1756 		adev->gfx.config.max_sh_per_se = 1;
1757 		adev->gfx.config.max_backends_per_se = 2;
1758 		adev->gfx.config.max_cu_per_sh = 8;
1759 		adev->gfx.config.max_texture_channel_caches = 2;
1760 		adev->gfx.config.max_gprs = 256;
1761 		adev->gfx.config.max_gs_threads = 32;
1762 		adev->gfx.config.max_hw_contexts = 8;
1763 
1764 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1769 		break;
1770 	case CHIP_STONEY:
1771 		adev->gfx.config.max_shader_engines = 1;
1772 		adev->gfx.config.max_tile_pipes = 2;
1773 		adev->gfx.config.max_sh_per_se = 1;
1774 		adev->gfx.config.max_backends_per_se = 1;
1775 		adev->gfx.config.max_cu_per_sh = 3;
1776 		adev->gfx.config.max_texture_channel_caches = 2;
1777 		adev->gfx.config.max_gprs = 256;
1778 		adev->gfx.config.max_gs_threads = 16;
1779 		adev->gfx.config.max_hw_contexts = 8;
1780 
1781 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1782 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1783 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1784 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1785 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1786 		break;
1787 	default:
1788 		adev->gfx.config.max_shader_engines = 2;
1789 		adev->gfx.config.max_tile_pipes = 4;
1790 		adev->gfx.config.max_cu_per_sh = 2;
1791 		adev->gfx.config.max_sh_per_se = 1;
1792 		adev->gfx.config.max_backends_per_se = 2;
1793 		adev->gfx.config.max_texture_channel_caches = 4;
1794 		adev->gfx.config.max_gprs = 256;
1795 		adev->gfx.config.max_gs_threads = 32;
1796 		adev->gfx.config.max_hw_contexts = 8;
1797 
1798 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1799 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1800 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1801 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1802 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1803 		break;
1804 	}
1805 
1806 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1807 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1808 
1809 	adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1810 				MC_ARB_RAMCFG, NOOFBANK);
1811 	adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1812 				MC_ARB_RAMCFG, NOOFRANKS);
1813 
1814 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1815 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1816 	if (adev->flags & AMD_IS_APU) {
1817 		/* Get memory bank mapping mode. */
1818 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1819 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1820 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1821 
1822 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1823 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1824 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1825 
1826 		/* Validate settings in case only one DIMM installed. */
1827 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1828 			dimm00_addr_map = 0;
1829 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1830 			dimm01_addr_map = 0;
1831 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1832 			dimm10_addr_map = 0;
1833 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1834 			dimm11_addr_map = 0;
1835 
1836 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1837 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1838 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1839 			adev->gfx.config.mem_row_size_in_kb = 2;
1840 		else
1841 			adev->gfx.config.mem_row_size_in_kb = 1;
1842 	} else {
1843 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1844 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1845 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1846 			adev->gfx.config.mem_row_size_in_kb = 4;
1847 	}
1848 
1849 	adev->gfx.config.shader_engine_tile_size = 32;
1850 	adev->gfx.config.num_gpus = 1;
1851 	adev->gfx.config.multi_gpu_tile_size = 64;
1852 
1853 	/* fix up row size */
1854 	switch (adev->gfx.config.mem_row_size_in_kb) {
1855 	case 1:
1856 	default:
1857 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1858 		break;
1859 	case 2:
1860 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1861 		break;
1862 	case 4:
1863 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1864 		break;
1865 	}
1866 	adev->gfx.config.gb_addr_config = gb_addr_config;
1867 
1868 	return 0;
1869 }
1870 
1871 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1872 					int mec, int pipe, int queue)
1873 {
1874 	int r;
1875 	unsigned irq_type;
1876 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1877 	unsigned int hw_prio;
1878 
1879 	ring = &adev->gfx.compute_ring[ring_id];
1880 
1881 	/* mec0 is me1 */
1882 	ring->me = mec + 1;
1883 	ring->pipe = pipe;
1884 	ring->queue = queue;
1885 
1886 	ring->ring_obj = NULL;
1887 	ring->use_doorbell = true;
1888 	ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1889 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1890 				+ (ring_id * GFX8_MEC_HPD_SIZE);
1891 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1892 
1893 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1894 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1895 		+ ring->pipe;
1896 
1897 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1898 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1899 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1900 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1901 			     hw_prio, NULL);
1902 	if (r)
1903 		return r;
1904 
1905 
1906 	return 0;
1907 }
1908 
1909 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1910 
1911 static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
1912 {
1913 	int i, j, k, r, ring_id;
1914 	int xcc_id = 0;
1915 	struct amdgpu_ring *ring;
1916 	struct amdgpu_device *adev = ip_block->adev;
1917 
1918 	switch (adev->asic_type) {
1919 	case CHIP_TONGA:
1920 	case CHIP_CARRIZO:
1921 	case CHIP_FIJI:
1922 	case CHIP_POLARIS10:
1923 	case CHIP_POLARIS11:
1924 	case CHIP_POLARIS12:
1925 	case CHIP_VEGAM:
1926 		adev->gfx.mec.num_mec = 2;
1927 		break;
1928 	case CHIP_TOPAZ:
1929 	case CHIP_STONEY:
1930 	default:
1931 		adev->gfx.mec.num_mec = 1;
1932 		break;
1933 	}
1934 
1935 	adev->gfx.mec.num_pipe_per_mec = 4;
1936 	adev->gfx.mec.num_queue_per_pipe = 8;
1937 
1938 	/* EOP Event */
1939 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1940 	if (r)
1941 		return r;
1942 
1943 	/* Privileged reg */
1944 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1945 			      &adev->gfx.priv_reg_irq);
1946 	if (r)
1947 		return r;
1948 
1949 	/* Privileged inst */
1950 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1951 			      &adev->gfx.priv_inst_irq);
1952 	if (r)
1953 		return r;
1954 
1955 	/* Add CP EDC/ECC irq  */
1956 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1957 			      &adev->gfx.cp_ecc_error_irq);
1958 	if (r)
1959 		return r;
1960 
1961 	/* SQ interrupts. */
1962 	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1963 			      &adev->gfx.sq_irq);
1964 	if (r) {
1965 		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1966 		return r;
1967 	}
1968 
1969 	INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1970 
1971 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1972 
1973 	r = gfx_v8_0_init_microcode(adev);
1974 	if (r) {
1975 		DRM_ERROR("Failed to load gfx firmware!\n");
1976 		return r;
1977 	}
1978 
1979 	r = adev->gfx.rlc.funcs->init(adev);
1980 	if (r) {
1981 		DRM_ERROR("Failed to init rlc BOs!\n");
1982 		return r;
1983 	}
1984 
1985 	r = gfx_v8_0_mec_init(adev);
1986 	if (r) {
1987 		DRM_ERROR("Failed to init MEC BOs!\n");
1988 		return r;
1989 	}
1990 
1991 	/* set up the gfx ring */
1992 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1993 		ring = &adev->gfx.gfx_ring[i];
1994 		ring->ring_obj = NULL;
1995 		sprintf(ring->name, "gfx");
1996 		/* no gfx doorbells on iceland */
1997 		if (adev->asic_type != CHIP_TOPAZ) {
1998 			ring->use_doorbell = true;
1999 			ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2000 		}
2001 
2002 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2003 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2004 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2005 		if (r)
2006 			return r;
2007 	}
2008 
2009 
2010 	/* set up the compute queues - allocate horizontally across pipes */
2011 	ring_id = 0;
2012 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2013 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2014 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2015 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2016 								     k, j))
2017 					continue;
2018 
2019 				r = gfx_v8_0_compute_ring_init(adev,
2020 								ring_id,
2021 								i, k, j);
2022 				if (r)
2023 					return r;
2024 
2025 				ring_id++;
2026 			}
2027 		}
2028 	}
2029 
2030 	r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2031 	if (r) {
2032 		DRM_ERROR("Failed to init KIQ BOs!\n");
2033 		return r;
2034 	}
2035 
2036 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2037 	if (r)
2038 		return r;
2039 
2040 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2041 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
2042 	if (r)
2043 		return r;
2044 
2045 	adev->gfx.ce_ram_size = 0x8000;
2046 
2047 	r = gfx_v8_0_gpu_early_init(adev);
2048 	if (r)
2049 		return r;
2050 
2051 	return 0;
2052 }
2053 
2054 static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
2055 {
2056 	struct amdgpu_device *adev = ip_block->adev;
2057 	int i;
2058 
2059 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2060 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2061 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2062 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2063 
2064 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2065 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2066 	amdgpu_gfx_kiq_fini(adev, 0);
2067 
2068 	gfx_v8_0_mec_fini(adev);
2069 	amdgpu_gfx_rlc_fini(adev);
2070 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2071 				&adev->gfx.rlc.clear_state_gpu_addr,
2072 				(void **)&adev->gfx.rlc.cs_ptr);
2073 	if ((adev->asic_type == CHIP_CARRIZO) ||
2074 	    (adev->asic_type == CHIP_STONEY)) {
2075 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2076 				&adev->gfx.rlc.cp_table_gpu_addr,
2077 				(void **)&adev->gfx.rlc.cp_table_ptr);
2078 	}
2079 	gfx_v8_0_free_microcode(adev);
2080 
2081 	return 0;
2082 }
2083 
2084 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2085 {
2086 	uint32_t *modearray, *mod2array;
2087 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2088 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2089 	u32 reg_offset;
2090 
2091 	modearray = adev->gfx.config.tile_mode_array;
2092 	mod2array = adev->gfx.config.macrotile_mode_array;
2093 
2094 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2095 		modearray[reg_offset] = 0;
2096 
2097 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2098 		mod2array[reg_offset] = 0;
2099 
2100 	switch (adev->asic_type) {
2101 	case CHIP_TOPAZ:
2102 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2103 				PIPE_CONFIG(ADDR_SURF_P2) |
2104 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2105 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2107 				PIPE_CONFIG(ADDR_SURF_P2) |
2108 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2109 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111 				PIPE_CONFIG(ADDR_SURF_P2) |
2112 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2113 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2114 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115 				PIPE_CONFIG(ADDR_SURF_P2) |
2116 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2117 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2118 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119 				PIPE_CONFIG(ADDR_SURF_P2) |
2120 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2121 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2123 				PIPE_CONFIG(ADDR_SURF_P2) |
2124 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2125 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2127 				PIPE_CONFIG(ADDR_SURF_P2) |
2128 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2129 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2131 				PIPE_CONFIG(ADDR_SURF_P2));
2132 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2133 				PIPE_CONFIG(ADDR_SURF_P2) |
2134 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2135 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137 				 PIPE_CONFIG(ADDR_SURF_P2) |
2138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2140 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2141 				 PIPE_CONFIG(ADDR_SURF_P2) |
2142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2144 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2145 				 PIPE_CONFIG(ADDR_SURF_P2) |
2146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149 				 PIPE_CONFIG(ADDR_SURF_P2) |
2150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2152 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2153 				 PIPE_CONFIG(ADDR_SURF_P2) |
2154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2157 				 PIPE_CONFIG(ADDR_SURF_P2) |
2158 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2160 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2161 				 PIPE_CONFIG(ADDR_SURF_P2) |
2162 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2165 				 PIPE_CONFIG(ADDR_SURF_P2) |
2166 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2169 				 PIPE_CONFIG(ADDR_SURF_P2) |
2170 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2173 				 PIPE_CONFIG(ADDR_SURF_P2) |
2174 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2175 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2176 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2177 				 PIPE_CONFIG(ADDR_SURF_P2) |
2178 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2179 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2181 				 PIPE_CONFIG(ADDR_SURF_P2) |
2182 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2185 				 PIPE_CONFIG(ADDR_SURF_P2) |
2186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2189 				 PIPE_CONFIG(ADDR_SURF_P2) |
2190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193 				 PIPE_CONFIG(ADDR_SURF_P2) |
2194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197 				 PIPE_CONFIG(ADDR_SURF_P2) |
2198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2201 				 PIPE_CONFIG(ADDR_SURF_P2) |
2202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2204 
2205 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2206 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2207 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 				NUM_BANKS(ADDR_SURF_8_BANK));
2209 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2210 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2211 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212 				NUM_BANKS(ADDR_SURF_8_BANK));
2213 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2214 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2215 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216 				NUM_BANKS(ADDR_SURF_8_BANK));
2217 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2218 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 				NUM_BANKS(ADDR_SURF_8_BANK));
2221 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2222 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2223 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224 				NUM_BANKS(ADDR_SURF_8_BANK));
2225 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2228 				NUM_BANKS(ADDR_SURF_8_BANK));
2229 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2231 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232 				NUM_BANKS(ADDR_SURF_8_BANK));
2233 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2234 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2235 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 				NUM_BANKS(ADDR_SURF_16_BANK));
2237 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2238 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2239 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240 				NUM_BANKS(ADDR_SURF_16_BANK));
2241 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2242 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2243 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2244 				 NUM_BANKS(ADDR_SURF_16_BANK));
2245 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2246 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2247 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2248 				 NUM_BANKS(ADDR_SURF_16_BANK));
2249 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2251 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252 				 NUM_BANKS(ADDR_SURF_16_BANK));
2253 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2255 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256 				 NUM_BANKS(ADDR_SURF_16_BANK));
2257 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2259 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260 				 NUM_BANKS(ADDR_SURF_8_BANK));
2261 
2262 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2263 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2264 			    reg_offset != 23)
2265 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2266 
2267 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2268 			if (reg_offset != 7)
2269 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2270 
2271 		break;
2272 	case CHIP_FIJI:
2273 	case CHIP_VEGAM:
2274 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2277 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2278 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2281 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2282 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2285 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2289 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2293 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2295 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2297 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2303 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2304 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2305 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2307 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2308 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2309 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2311 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2315 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2317 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2320 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2322 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2324 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2325 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2327 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2331 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2333 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2337 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2339 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2340 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2341 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2342 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2344 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2345 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2348 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2349 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2351 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2352 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2353 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2355 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2357 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2361 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2365 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2366 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2369 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2373 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2377 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2379 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2381 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2383 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2385 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2387 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2389 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2392 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2394 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2395 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2396 
2397 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2399 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 				NUM_BANKS(ADDR_SURF_8_BANK));
2401 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2403 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2404 				NUM_BANKS(ADDR_SURF_8_BANK));
2405 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408 				NUM_BANKS(ADDR_SURF_8_BANK));
2409 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412 				NUM_BANKS(ADDR_SURF_8_BANK));
2413 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2415 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2416 				NUM_BANKS(ADDR_SURF_8_BANK));
2417 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2419 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2420 				NUM_BANKS(ADDR_SURF_8_BANK));
2421 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 				NUM_BANKS(ADDR_SURF_8_BANK));
2425 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2427 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428 				NUM_BANKS(ADDR_SURF_8_BANK));
2429 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2432 				NUM_BANKS(ADDR_SURF_8_BANK));
2433 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2435 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2436 				 NUM_BANKS(ADDR_SURF_8_BANK));
2437 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2439 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2440 				 NUM_BANKS(ADDR_SURF_8_BANK));
2441 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 				 NUM_BANKS(ADDR_SURF_8_BANK));
2445 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2447 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 				 NUM_BANKS(ADDR_SURF_8_BANK));
2449 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2451 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2452 				 NUM_BANKS(ADDR_SURF_4_BANK));
2453 
2454 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2455 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2456 
2457 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2458 			if (reg_offset != 7)
2459 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2460 
2461 		break;
2462 	case CHIP_TONGA:
2463 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2464 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2466 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2467 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2468 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2470 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2474 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2478 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2479 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2482 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2484 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2486 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2490 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2492 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2493 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2494 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2496 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2497 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2498 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2499 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2501 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2504 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2505 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2506 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2508 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2509 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2511 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2512 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2513 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2514 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2520 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2521 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2522 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2526 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2529 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2530 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2531 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2533 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2534 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2537 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2538 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2540 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2541 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2542 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2544 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2546 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2550 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2552 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2554 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2558 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2562 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2566 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2568 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2570 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2574 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2581 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2585 
2586 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2588 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2589 				NUM_BANKS(ADDR_SURF_16_BANK));
2590 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2592 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2593 				NUM_BANKS(ADDR_SURF_16_BANK));
2594 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597 				NUM_BANKS(ADDR_SURF_16_BANK));
2598 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601 				NUM_BANKS(ADDR_SURF_16_BANK));
2602 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2604 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2605 				NUM_BANKS(ADDR_SURF_16_BANK));
2606 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2608 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2609 				NUM_BANKS(ADDR_SURF_16_BANK));
2610 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2613 				NUM_BANKS(ADDR_SURF_16_BANK));
2614 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2616 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2617 				NUM_BANKS(ADDR_SURF_16_BANK));
2618 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2620 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2621 				NUM_BANKS(ADDR_SURF_16_BANK));
2622 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2624 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2625 				 NUM_BANKS(ADDR_SURF_16_BANK));
2626 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2628 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2629 				 NUM_BANKS(ADDR_SURF_16_BANK));
2630 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2632 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2633 				 NUM_BANKS(ADDR_SURF_8_BANK));
2634 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2637 				 NUM_BANKS(ADDR_SURF_4_BANK));
2638 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641 				 NUM_BANKS(ADDR_SURF_4_BANK));
2642 
2643 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2644 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2645 
2646 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2647 			if (reg_offset != 7)
2648 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2649 
2650 		break;
2651 	case CHIP_POLARIS11:
2652 	case CHIP_POLARIS12:
2653 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2656 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2657 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2660 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2661 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2664 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2668 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2669 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2672 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2674 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2676 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2678 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2680 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2682 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2684 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2686 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2687 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2688 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2691 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2692 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2694 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2699 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2703 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2704 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2711 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2712 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2716 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2719 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2724 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2727 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2728 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2730 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2731 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2732 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2734 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2736 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2740 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2742 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2744 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2748 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2752 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2756 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2758 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2760 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2762 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2764 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2766 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2774 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2775 
2776 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2778 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2779 				NUM_BANKS(ADDR_SURF_16_BANK));
2780 
2781 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2783 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2784 				NUM_BANKS(ADDR_SURF_16_BANK));
2785 
2786 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2788 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2789 				NUM_BANKS(ADDR_SURF_16_BANK));
2790 
2791 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2792 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2793 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 				NUM_BANKS(ADDR_SURF_16_BANK));
2795 
2796 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2799 				NUM_BANKS(ADDR_SURF_16_BANK));
2800 
2801 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2803 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2804 				NUM_BANKS(ADDR_SURF_16_BANK));
2805 
2806 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2808 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2809 				NUM_BANKS(ADDR_SURF_16_BANK));
2810 
2811 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2812 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2813 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2814 				NUM_BANKS(ADDR_SURF_16_BANK));
2815 
2816 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2817 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2818 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2819 				NUM_BANKS(ADDR_SURF_16_BANK));
2820 
2821 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 				NUM_BANKS(ADDR_SURF_16_BANK));
2825 
2826 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2828 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 				NUM_BANKS(ADDR_SURF_16_BANK));
2830 
2831 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2833 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2834 				NUM_BANKS(ADDR_SURF_16_BANK));
2835 
2836 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2838 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2839 				NUM_BANKS(ADDR_SURF_8_BANK));
2840 
2841 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844 				NUM_BANKS(ADDR_SURF_4_BANK));
2845 
2846 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2847 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2848 
2849 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2850 			if (reg_offset != 7)
2851 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2852 
2853 		break;
2854 	case CHIP_POLARIS10:
2855 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2858 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2859 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2862 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2863 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2866 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2870 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2874 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2876 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2878 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2880 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2882 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2885 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2886 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2888 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2889 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2890 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2892 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2896 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2898 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2901 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2904 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2905 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2906 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2912 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2913 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2914 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2916 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2918 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2920 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2921 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2923 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2925 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2926 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2929 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2930 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2932 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2933 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2934 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2936 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2938 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2942 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2944 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2946 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2947 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2950 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2954 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2958 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2960 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2962 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2966 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2977 
2978 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2980 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2981 				NUM_BANKS(ADDR_SURF_16_BANK));
2982 
2983 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 				NUM_BANKS(ADDR_SURF_16_BANK));
2987 
2988 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 				NUM_BANKS(ADDR_SURF_16_BANK));
2992 
2993 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2995 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 				NUM_BANKS(ADDR_SURF_16_BANK));
2997 
2998 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3000 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3001 				NUM_BANKS(ADDR_SURF_16_BANK));
3002 
3003 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3005 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3006 				NUM_BANKS(ADDR_SURF_16_BANK));
3007 
3008 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3011 				NUM_BANKS(ADDR_SURF_16_BANK));
3012 
3013 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3015 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 				NUM_BANKS(ADDR_SURF_16_BANK));
3017 
3018 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3020 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3021 				NUM_BANKS(ADDR_SURF_16_BANK));
3022 
3023 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3025 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3026 				NUM_BANKS(ADDR_SURF_16_BANK));
3027 
3028 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3030 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3031 				NUM_BANKS(ADDR_SURF_16_BANK));
3032 
3033 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3035 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3036 				NUM_BANKS(ADDR_SURF_8_BANK));
3037 
3038 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3040 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3041 				NUM_BANKS(ADDR_SURF_4_BANK));
3042 
3043 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3046 				NUM_BANKS(ADDR_SURF_4_BANK));
3047 
3048 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3049 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3050 
3051 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3052 			if (reg_offset != 7)
3053 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3054 
3055 		break;
3056 	case CHIP_STONEY:
3057 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3058 				PIPE_CONFIG(ADDR_SURF_P2) |
3059 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3060 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3061 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3062 				PIPE_CONFIG(ADDR_SURF_P2) |
3063 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3064 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3065 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066 				PIPE_CONFIG(ADDR_SURF_P2) |
3067 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3068 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3069 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070 				PIPE_CONFIG(ADDR_SURF_P2) |
3071 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3072 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3073 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074 				PIPE_CONFIG(ADDR_SURF_P2) |
3075 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3076 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3078 				PIPE_CONFIG(ADDR_SURF_P2) |
3079 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3080 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3082 				PIPE_CONFIG(ADDR_SURF_P2) |
3083 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3084 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3086 				PIPE_CONFIG(ADDR_SURF_P2));
3087 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3088 				PIPE_CONFIG(ADDR_SURF_P2) |
3089 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3090 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3092 				 PIPE_CONFIG(ADDR_SURF_P2) |
3093 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3094 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3095 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3096 				 PIPE_CONFIG(ADDR_SURF_P2) |
3097 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3098 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3099 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3100 				 PIPE_CONFIG(ADDR_SURF_P2) |
3101 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3102 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3103 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 				 PIPE_CONFIG(ADDR_SURF_P2) |
3105 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3106 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3108 				 PIPE_CONFIG(ADDR_SURF_P2) |
3109 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3110 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 				 PIPE_CONFIG(ADDR_SURF_P2) |
3113 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3116 				 PIPE_CONFIG(ADDR_SURF_P2) |
3117 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3119 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3120 				 PIPE_CONFIG(ADDR_SURF_P2) |
3121 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3122 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3123 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3124 				 PIPE_CONFIG(ADDR_SURF_P2) |
3125 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3126 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3128 				 PIPE_CONFIG(ADDR_SURF_P2) |
3129 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3130 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3131 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3132 				 PIPE_CONFIG(ADDR_SURF_P2) |
3133 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3134 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3136 				 PIPE_CONFIG(ADDR_SURF_P2) |
3137 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3140 				 PIPE_CONFIG(ADDR_SURF_P2) |
3141 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3144 				 PIPE_CONFIG(ADDR_SURF_P2) |
3145 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3146 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 				 PIPE_CONFIG(ADDR_SURF_P2) |
3149 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3150 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3152 				 PIPE_CONFIG(ADDR_SURF_P2) |
3153 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3154 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3156 				 PIPE_CONFIG(ADDR_SURF_P2) |
3157 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3158 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3159 
3160 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163 				NUM_BANKS(ADDR_SURF_8_BANK));
3164 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3166 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3167 				NUM_BANKS(ADDR_SURF_8_BANK));
3168 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3170 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3171 				NUM_BANKS(ADDR_SURF_8_BANK));
3172 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3174 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3175 				NUM_BANKS(ADDR_SURF_8_BANK));
3176 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179 				NUM_BANKS(ADDR_SURF_8_BANK));
3180 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3183 				NUM_BANKS(ADDR_SURF_8_BANK));
3184 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187 				NUM_BANKS(ADDR_SURF_8_BANK));
3188 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3189 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3190 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3191 				NUM_BANKS(ADDR_SURF_16_BANK));
3192 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3193 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195 				NUM_BANKS(ADDR_SURF_16_BANK));
3196 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3197 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3198 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199 				 NUM_BANKS(ADDR_SURF_16_BANK));
3200 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3201 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3202 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203 				 NUM_BANKS(ADDR_SURF_16_BANK));
3204 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3206 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207 				 NUM_BANKS(ADDR_SURF_16_BANK));
3208 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211 				 NUM_BANKS(ADDR_SURF_16_BANK));
3212 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215 				 NUM_BANKS(ADDR_SURF_8_BANK));
3216 
3217 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3218 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3219 			    reg_offset != 23)
3220 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3221 
3222 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3223 			if (reg_offset != 7)
3224 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3225 
3226 		break;
3227 	default:
3228 		dev_warn(adev->dev,
3229 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3230 			 adev->asic_type);
3231 		fallthrough;
3232 
3233 	case CHIP_CARRIZO:
3234 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3235 				PIPE_CONFIG(ADDR_SURF_P2) |
3236 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3237 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3238 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3239 				PIPE_CONFIG(ADDR_SURF_P2) |
3240 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3241 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3242 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3243 				PIPE_CONFIG(ADDR_SURF_P2) |
3244 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3245 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3246 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247 				PIPE_CONFIG(ADDR_SURF_P2) |
3248 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3249 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3250 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251 				PIPE_CONFIG(ADDR_SURF_P2) |
3252 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3253 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3255 				PIPE_CONFIG(ADDR_SURF_P2) |
3256 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3257 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3259 				PIPE_CONFIG(ADDR_SURF_P2) |
3260 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3261 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3263 				PIPE_CONFIG(ADDR_SURF_P2));
3264 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3265 				PIPE_CONFIG(ADDR_SURF_P2) |
3266 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3267 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3269 				 PIPE_CONFIG(ADDR_SURF_P2) |
3270 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3271 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3272 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3273 				 PIPE_CONFIG(ADDR_SURF_P2) |
3274 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3275 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3276 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3277 				 PIPE_CONFIG(ADDR_SURF_P2) |
3278 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3279 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281 				 PIPE_CONFIG(ADDR_SURF_P2) |
3282 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3283 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3285 				 PIPE_CONFIG(ADDR_SURF_P2) |
3286 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3289 				 PIPE_CONFIG(ADDR_SURF_P2) |
3290 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3291 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3292 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3293 				 PIPE_CONFIG(ADDR_SURF_P2) |
3294 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3297 				 PIPE_CONFIG(ADDR_SURF_P2) |
3298 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3299 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3300 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3301 				 PIPE_CONFIG(ADDR_SURF_P2) |
3302 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3303 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3304 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3305 				 PIPE_CONFIG(ADDR_SURF_P2) |
3306 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3307 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3308 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3309 				 PIPE_CONFIG(ADDR_SURF_P2) |
3310 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3311 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3313 				 PIPE_CONFIG(ADDR_SURF_P2) |
3314 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3315 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3317 				 PIPE_CONFIG(ADDR_SURF_P2) |
3318 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3321 				 PIPE_CONFIG(ADDR_SURF_P2) |
3322 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3323 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3325 				 PIPE_CONFIG(ADDR_SURF_P2) |
3326 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3327 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3329 				 PIPE_CONFIG(ADDR_SURF_P2) |
3330 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3331 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333 				 PIPE_CONFIG(ADDR_SURF_P2) |
3334 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3335 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336 
3337 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3339 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340 				NUM_BANKS(ADDR_SURF_8_BANK));
3341 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3342 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3343 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3344 				NUM_BANKS(ADDR_SURF_8_BANK));
3345 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3347 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3348 				NUM_BANKS(ADDR_SURF_8_BANK));
3349 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3350 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3351 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3352 				NUM_BANKS(ADDR_SURF_8_BANK));
3353 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3355 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3356 				NUM_BANKS(ADDR_SURF_8_BANK));
3357 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360 				NUM_BANKS(ADDR_SURF_8_BANK));
3361 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364 				NUM_BANKS(ADDR_SURF_8_BANK));
3365 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3366 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3367 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3368 				NUM_BANKS(ADDR_SURF_16_BANK));
3369 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3370 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3371 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3372 				NUM_BANKS(ADDR_SURF_16_BANK));
3373 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3374 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3375 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376 				 NUM_BANKS(ADDR_SURF_16_BANK));
3377 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3378 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3379 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380 				 NUM_BANKS(ADDR_SURF_16_BANK));
3381 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3383 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384 				 NUM_BANKS(ADDR_SURF_16_BANK));
3385 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3387 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388 				 NUM_BANKS(ADDR_SURF_16_BANK));
3389 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392 				 NUM_BANKS(ADDR_SURF_8_BANK));
3393 
3394 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3395 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3396 			    reg_offset != 23)
3397 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3398 
3399 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3400 			if (reg_offset != 7)
3401 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3402 
3403 		break;
3404 	}
3405 }
3406 
3407 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3408 				  u32 se_num, u32 sh_num, u32 instance,
3409 				  int xcc_id)
3410 {
3411 	u32 data;
3412 
3413 	if (instance == 0xffffffff)
3414 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3415 	else
3416 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3417 
3418 	if (se_num == 0xffffffff)
3419 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3420 	else
3421 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3422 
3423 	if (sh_num == 0xffffffff)
3424 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3425 	else
3426 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3427 
3428 	WREG32(mmGRBM_GFX_INDEX, data);
3429 }
3430 
3431 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3432 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3433 {
3434 	vi_srbm_select(adev, me, pipe, q, vm);
3435 }
3436 
3437 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3438 {
3439 	u32 data, mask;
3440 
3441 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3442 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3443 
3444 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3445 
3446 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3447 					 adev->gfx.config.max_sh_per_se);
3448 
3449 	return (~data) & mask;
3450 }
3451 
3452 static void
3453 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3454 {
3455 	switch (adev->asic_type) {
3456 	case CHIP_FIJI:
3457 	case CHIP_VEGAM:
3458 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3459 			  RB_XSEL2(1) | PKR_MAP(2) |
3460 			  PKR_XSEL(1) | PKR_YSEL(1) |
3461 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3462 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3463 			   SE_PAIR_YSEL(2);
3464 		break;
3465 	case CHIP_TONGA:
3466 	case CHIP_POLARIS10:
3467 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3468 			  SE_XSEL(1) | SE_YSEL(1);
3469 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3470 			   SE_PAIR_YSEL(2);
3471 		break;
3472 	case CHIP_TOPAZ:
3473 	case CHIP_CARRIZO:
3474 		*rconf |= RB_MAP_PKR0(2);
3475 		*rconf1 |= 0x0;
3476 		break;
3477 	case CHIP_POLARIS11:
3478 	case CHIP_POLARIS12:
3479 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3480 			  SE_XSEL(1) | SE_YSEL(1);
3481 		*rconf1 |= 0x0;
3482 		break;
3483 	case CHIP_STONEY:
3484 		*rconf |= 0x0;
3485 		*rconf1 |= 0x0;
3486 		break;
3487 	default:
3488 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3489 		break;
3490 	}
3491 }
3492 
3493 static void
3494 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3495 					u32 raster_config, u32 raster_config_1,
3496 					unsigned rb_mask, unsigned num_rb)
3497 {
3498 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3499 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3500 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3501 	unsigned rb_per_se = num_rb / num_se;
3502 	unsigned se_mask[4];
3503 	unsigned se;
3504 
3505 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3506 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3507 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3508 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3509 
3510 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3511 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3512 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3513 
3514 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3515 			     (!se_mask[2] && !se_mask[3]))) {
3516 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3517 
3518 		if (!se_mask[0] && !se_mask[1]) {
3519 			raster_config_1 |=
3520 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3521 		} else {
3522 			raster_config_1 |=
3523 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3524 		}
3525 	}
3526 
3527 	for (se = 0; se < num_se; se++) {
3528 		unsigned raster_config_se = raster_config;
3529 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3530 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3531 		int idx = (se / 2) * 2;
3532 
3533 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3534 			raster_config_se &= ~SE_MAP_MASK;
3535 
3536 			if (!se_mask[idx]) {
3537 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3538 			} else {
3539 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3540 			}
3541 		}
3542 
3543 		pkr0_mask &= rb_mask;
3544 		pkr1_mask &= rb_mask;
3545 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3546 			raster_config_se &= ~PKR_MAP_MASK;
3547 
3548 			if (!pkr0_mask) {
3549 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3550 			} else {
3551 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3552 			}
3553 		}
3554 
3555 		if (rb_per_se >= 2) {
3556 			unsigned rb0_mask = 1 << (se * rb_per_se);
3557 			unsigned rb1_mask = rb0_mask << 1;
3558 
3559 			rb0_mask &= rb_mask;
3560 			rb1_mask &= rb_mask;
3561 			if (!rb0_mask || !rb1_mask) {
3562 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3563 
3564 				if (!rb0_mask) {
3565 					raster_config_se |=
3566 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3567 				} else {
3568 					raster_config_se |=
3569 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3570 				}
3571 			}
3572 
3573 			if (rb_per_se > 2) {
3574 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3575 				rb1_mask = rb0_mask << 1;
3576 				rb0_mask &= rb_mask;
3577 				rb1_mask &= rb_mask;
3578 				if (!rb0_mask || !rb1_mask) {
3579 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3580 
3581 					if (!rb0_mask) {
3582 						raster_config_se |=
3583 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3584 					} else {
3585 						raster_config_se |=
3586 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3587 					}
3588 				}
3589 			}
3590 		}
3591 
3592 		/* GRBM_GFX_INDEX has a different offset on VI */
3593 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3594 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3595 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3596 	}
3597 
3598 	/* GRBM_GFX_INDEX has a different offset on VI */
3599 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3600 }
3601 
3602 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3603 {
3604 	int i, j;
3605 	u32 data;
3606 	u32 raster_config = 0, raster_config_1 = 0;
3607 	u32 active_rbs = 0;
3608 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3609 					adev->gfx.config.max_sh_per_se;
3610 	unsigned num_rb_pipes;
3611 
3612 	mutex_lock(&adev->grbm_idx_mutex);
3613 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3614 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3615 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3616 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3617 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3618 					       rb_bitmap_width_per_sh);
3619 		}
3620 	}
3621 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3622 
3623 	adev->gfx.config.backend_enable_mask = active_rbs;
3624 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3625 
3626 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3627 			     adev->gfx.config.max_shader_engines, 16);
3628 
3629 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3630 
3631 	if (!adev->gfx.config.backend_enable_mask ||
3632 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3633 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3634 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3635 	} else {
3636 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3637 							adev->gfx.config.backend_enable_mask,
3638 							num_rb_pipes);
3639 	}
3640 
3641 	/* cache the values for userspace */
3642 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3643 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3644 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3645 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3646 				RREG32(mmCC_RB_BACKEND_DISABLE);
3647 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3648 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3649 			adev->gfx.config.rb_config[i][j].raster_config =
3650 				RREG32(mmPA_SC_RASTER_CONFIG);
3651 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3652 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3653 		}
3654 	}
3655 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3656 	mutex_unlock(&adev->grbm_idx_mutex);
3657 }
3658 
3659 #define DEFAULT_SH_MEM_BASES	(0x6000)
3660 /**
3661  * gfx_v8_0_init_compute_vmid - gart enable
3662  *
3663  * @adev: amdgpu_device pointer
3664  *
3665  * Initialize compute vmid sh_mem registers
3666  *
3667  */
3668 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3669 {
3670 	int i;
3671 	uint32_t sh_mem_config;
3672 	uint32_t sh_mem_bases;
3673 
3674 	/*
3675 	 * Configure apertures:
3676 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3677 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3678 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3679 	 */
3680 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3681 
3682 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3683 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3684 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3685 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3686 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3687 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3688 
3689 	mutex_lock(&adev->srbm_mutex);
3690 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3691 		vi_srbm_select(adev, 0, 0, 0, i);
3692 		/* CP and shaders */
3693 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3694 		WREG32(mmSH_MEM_APE1_BASE, 1);
3695 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3696 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3697 	}
3698 	vi_srbm_select(adev, 0, 0, 0, 0);
3699 	mutex_unlock(&adev->srbm_mutex);
3700 
3701 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
3702 	   access. These should be enabled by FW for target VMIDs. */
3703 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3704 		WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3705 		WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3706 		WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3707 		WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3708 	}
3709 }
3710 
3711 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3712 {
3713 	int vmid;
3714 
3715 	/*
3716 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3717 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3718 	 * the driver can enable them for graphics. VMID0 should maintain
3719 	 * access so that HWS firmware can save/restore entries.
3720 	 */
3721 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3722 		WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3723 		WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3724 		WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3725 		WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3726 	}
3727 }
3728 
3729 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3730 {
3731 	switch (adev->asic_type) {
3732 	default:
3733 		adev->gfx.config.double_offchip_lds_buf = 1;
3734 		break;
3735 	case CHIP_CARRIZO:
3736 	case CHIP_STONEY:
3737 		adev->gfx.config.double_offchip_lds_buf = 0;
3738 		break;
3739 	}
3740 }
3741 
3742 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3743 {
3744 	u32 tmp, sh_static_mem_cfg;
3745 	int i;
3746 
3747 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3748 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3749 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3750 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3751 
3752 	gfx_v8_0_tiling_mode_table_init(adev);
3753 	gfx_v8_0_setup_rb(adev);
3754 	gfx_v8_0_get_cu_info(adev);
3755 	gfx_v8_0_config_init(adev);
3756 
3757 	/* XXX SH_MEM regs */
3758 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3759 	sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3760 				   SWIZZLE_ENABLE, 1);
3761 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3762 				   ELEMENT_SIZE, 1);
3763 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3764 				   INDEX_STRIDE, 3);
3765 	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3766 
3767 	mutex_lock(&adev->srbm_mutex);
3768 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3769 		vi_srbm_select(adev, 0, 0, 0, i);
3770 		/* CP and shaders */
3771 		if (i == 0) {
3772 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3773 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3774 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3775 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3776 			WREG32(mmSH_MEM_CONFIG, tmp);
3777 			WREG32(mmSH_MEM_BASES, 0);
3778 		} else {
3779 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3780 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3781 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3782 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3783 			WREG32(mmSH_MEM_CONFIG, tmp);
3784 			tmp = adev->gmc.shared_aperture_start >> 48;
3785 			WREG32(mmSH_MEM_BASES, tmp);
3786 		}
3787 
3788 		WREG32(mmSH_MEM_APE1_BASE, 1);
3789 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3790 	}
3791 	vi_srbm_select(adev, 0, 0, 0, 0);
3792 	mutex_unlock(&adev->srbm_mutex);
3793 
3794 	gfx_v8_0_init_compute_vmid(adev);
3795 	gfx_v8_0_init_gds_vmid(adev);
3796 
3797 	mutex_lock(&adev->grbm_idx_mutex);
3798 	/*
3799 	 * making sure that the following register writes will be broadcasted
3800 	 * to all the shaders
3801 	 */
3802 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3803 
3804 	WREG32(mmPA_SC_FIFO_SIZE,
3805 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3806 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3807 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3808 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3809 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3810 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3811 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3812 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3813 
3814 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3815 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3816 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3817 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3818 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3819 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3820 
3821 	mutex_unlock(&adev->grbm_idx_mutex);
3822 
3823 }
3824 
3825 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3826 {
3827 	u32 i, j, k;
3828 	u32 mask;
3829 
3830 	mutex_lock(&adev->grbm_idx_mutex);
3831 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3832 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3833 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3834 			for (k = 0; k < adev->usec_timeout; k++) {
3835 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3836 					break;
3837 				udelay(1);
3838 			}
3839 			if (k == adev->usec_timeout) {
3840 				gfx_v8_0_select_se_sh(adev, 0xffffffff,
3841 						      0xffffffff, 0xffffffff, 0);
3842 				mutex_unlock(&adev->grbm_idx_mutex);
3843 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3844 					 i, j);
3845 				return;
3846 			}
3847 		}
3848 	}
3849 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3850 	mutex_unlock(&adev->grbm_idx_mutex);
3851 
3852 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3853 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3854 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3855 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3856 	for (k = 0; k < adev->usec_timeout; k++) {
3857 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3858 			break;
3859 		udelay(1);
3860 	}
3861 }
3862 
3863 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3864 					       bool enable)
3865 {
3866 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3867 
3868 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3869 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3870 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3871 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3872 
3873 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3874 }
3875 
3876 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3877 {
3878 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3879 	/* csib */
3880 	WREG32(mmRLC_CSIB_ADDR_HI,
3881 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3882 	WREG32(mmRLC_CSIB_ADDR_LO,
3883 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3884 	WREG32(mmRLC_CSIB_LENGTH,
3885 			adev->gfx.rlc.clear_state_size);
3886 }
3887 
3888 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3889 				int ind_offset,
3890 				int list_size,
3891 				int *unique_indices,
3892 				int *indices_count,
3893 				int max_indices,
3894 				int *ind_start_offsets,
3895 				int *offset_count,
3896 				int max_offset)
3897 {
3898 	int indices;
3899 	bool new_entry = true;
3900 
3901 	for (; ind_offset < list_size; ind_offset++) {
3902 
3903 		if (new_entry) {
3904 			new_entry = false;
3905 			ind_start_offsets[*offset_count] = ind_offset;
3906 			*offset_count = *offset_count + 1;
3907 			BUG_ON(*offset_count >= max_offset);
3908 		}
3909 
3910 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3911 			new_entry = true;
3912 			continue;
3913 		}
3914 
3915 		ind_offset += 2;
3916 
3917 		/* look for the matching indice */
3918 		for (indices = 0;
3919 			indices < *indices_count;
3920 			indices++) {
3921 			if (unique_indices[indices] ==
3922 				register_list_format[ind_offset])
3923 				break;
3924 		}
3925 
3926 		if (indices >= *indices_count) {
3927 			unique_indices[*indices_count] =
3928 				register_list_format[ind_offset];
3929 			indices = *indices_count;
3930 			*indices_count = *indices_count + 1;
3931 			BUG_ON(*indices_count >= max_indices);
3932 		}
3933 
3934 		register_list_format[ind_offset] = indices;
3935 	}
3936 }
3937 
3938 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3939 {
3940 	int i, temp, data;
3941 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3942 	int indices_count = 0;
3943 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3944 	int offset_count = 0;
3945 
3946 	int list_size;
3947 	unsigned int *register_list_format =
3948 		kmemdup(adev->gfx.rlc.register_list_format,
3949 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3950 	if (!register_list_format)
3951 		return -ENOMEM;
3952 
3953 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3954 				RLC_FormatDirectRegListLength,
3955 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3956 				unique_indices,
3957 				&indices_count,
3958 				ARRAY_SIZE(unique_indices),
3959 				indirect_start_offsets,
3960 				&offset_count,
3961 				ARRAY_SIZE(indirect_start_offsets));
3962 
3963 	/* save and restore list */
3964 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3965 
3966 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3967 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3968 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3969 
3970 	/* indirect list */
3971 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3972 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3973 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3974 
3975 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3976 	list_size = list_size >> 1;
3977 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3978 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3979 
3980 	/* starting offsets starts */
3981 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3982 		adev->gfx.rlc.starting_offsets_start);
3983 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3984 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3985 				indirect_start_offsets[i]);
3986 
3987 	/* unique indices */
3988 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3989 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3990 	for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3991 		if (unique_indices[i] != 0) {
3992 			WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3993 			WREG32(data + i, unique_indices[i] >> 20);
3994 		}
3995 	}
3996 	kfree(register_list_format);
3997 
3998 	return 0;
3999 }
4000 
4001 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4002 {
4003 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4004 }
4005 
4006 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4007 {
4008 	uint32_t data;
4009 
4010 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4011 
4012 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4013 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4014 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4015 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4016 	WREG32(mmRLC_PG_DELAY, data);
4017 
4018 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4019 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4020 
4021 }
4022 
4023 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4024 						bool enable)
4025 {
4026 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4027 }
4028 
4029 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4030 						  bool enable)
4031 {
4032 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4033 }
4034 
4035 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4036 {
4037 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4038 }
4039 
4040 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4041 {
4042 	if ((adev->asic_type == CHIP_CARRIZO) ||
4043 	    (adev->asic_type == CHIP_STONEY)) {
4044 		gfx_v8_0_init_csb(adev);
4045 		gfx_v8_0_init_save_restore_list(adev);
4046 		gfx_v8_0_enable_save_restore_machine(adev);
4047 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4048 		gfx_v8_0_init_power_gating(adev);
4049 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4050 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4051 		   (adev->asic_type == CHIP_POLARIS12) ||
4052 		   (adev->asic_type == CHIP_VEGAM)) {
4053 		gfx_v8_0_init_csb(adev);
4054 		gfx_v8_0_init_save_restore_list(adev);
4055 		gfx_v8_0_enable_save_restore_machine(adev);
4056 		gfx_v8_0_init_power_gating(adev);
4057 	}
4058 
4059 }
4060 
4061 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4062 {
4063 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4064 
4065 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4066 	gfx_v8_0_wait_for_rlc_serdes(adev);
4067 }
4068 
4069 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4070 {
4071 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4072 	udelay(50);
4073 
4074 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4075 	udelay(50);
4076 }
4077 
4078 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4079 {
4080 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4081 
4082 	/* carrizo do enable cp interrupt after cp inited */
4083 	if (!(adev->flags & AMD_IS_APU))
4084 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4085 
4086 	udelay(50);
4087 }
4088 
4089 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4090 {
4091 	if (amdgpu_sriov_vf(adev)) {
4092 		gfx_v8_0_init_csb(adev);
4093 		return 0;
4094 	}
4095 
4096 	adev->gfx.rlc.funcs->stop(adev);
4097 	adev->gfx.rlc.funcs->reset(adev);
4098 	gfx_v8_0_init_pg(adev);
4099 	adev->gfx.rlc.funcs->start(adev);
4100 
4101 	return 0;
4102 }
4103 
4104 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4105 {
4106 	u32 tmp = RREG32(mmCP_ME_CNTL);
4107 
4108 	if (enable) {
4109 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4110 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4111 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4112 	} else {
4113 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4114 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4115 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4116 	}
4117 	WREG32(mmCP_ME_CNTL, tmp);
4118 	udelay(50);
4119 }
4120 
4121 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4122 {
4123 	u32 count = 0;
4124 	const struct cs_section_def *sect = NULL;
4125 	const struct cs_extent_def *ext = NULL;
4126 
4127 	/* begin clear state */
4128 	count += 2;
4129 	/* context control state */
4130 	count += 3;
4131 
4132 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4133 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4134 			if (sect->id == SECT_CONTEXT)
4135 				count += 2 + ext->reg_count;
4136 			else
4137 				return 0;
4138 		}
4139 	}
4140 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4141 	count += 4;
4142 	/* end clear state */
4143 	count += 2;
4144 	/* clear state */
4145 	count += 2;
4146 
4147 	return count;
4148 }
4149 
4150 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4151 {
4152 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4153 	const struct cs_section_def *sect = NULL;
4154 	const struct cs_extent_def *ext = NULL;
4155 	int r, i;
4156 
4157 	/* init the CP */
4158 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4159 	WREG32(mmCP_ENDIAN_SWAP, 0);
4160 	WREG32(mmCP_DEVICE_ID, 1);
4161 
4162 	gfx_v8_0_cp_gfx_enable(adev, true);
4163 
4164 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4165 	if (r) {
4166 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4167 		return r;
4168 	}
4169 
4170 	/* clear state buffer */
4171 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4172 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4173 
4174 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4175 	amdgpu_ring_write(ring, 0x80000000);
4176 	amdgpu_ring_write(ring, 0x80000000);
4177 
4178 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4179 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4180 			if (sect->id == SECT_CONTEXT) {
4181 				amdgpu_ring_write(ring,
4182 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4183 					       ext->reg_count));
4184 				amdgpu_ring_write(ring,
4185 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4186 				for (i = 0; i < ext->reg_count; i++)
4187 					amdgpu_ring_write(ring, ext->extent[i]);
4188 			}
4189 		}
4190 	}
4191 
4192 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4193 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4194 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4195 	amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4196 
4197 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4198 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4199 
4200 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4201 	amdgpu_ring_write(ring, 0);
4202 
4203 	/* init the CE partitions */
4204 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4205 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4206 	amdgpu_ring_write(ring, 0x8000);
4207 	amdgpu_ring_write(ring, 0x8000);
4208 
4209 	amdgpu_ring_commit(ring);
4210 
4211 	return 0;
4212 }
4213 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4214 {
4215 	u32 tmp;
4216 	/* no gfx doorbells on iceland */
4217 	if (adev->asic_type == CHIP_TOPAZ)
4218 		return;
4219 
4220 	tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4221 
4222 	if (ring->use_doorbell) {
4223 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4224 				DOORBELL_OFFSET, ring->doorbell_index);
4225 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4226 						DOORBELL_HIT, 0);
4227 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4228 					    DOORBELL_EN, 1);
4229 	} else {
4230 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4231 	}
4232 
4233 	WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4234 
4235 	if (adev->flags & AMD_IS_APU)
4236 		return;
4237 
4238 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4239 					DOORBELL_RANGE_LOWER,
4240 					adev->doorbell_index.gfx_ring0);
4241 	WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4242 
4243 	WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4244 		CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4245 }
4246 
4247 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4248 {
4249 	struct amdgpu_ring *ring;
4250 	u32 tmp;
4251 	u32 rb_bufsz;
4252 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4253 
4254 	/* Set the write pointer delay */
4255 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4256 
4257 	/* set the RB to use vmid 0 */
4258 	WREG32(mmCP_RB_VMID, 0);
4259 
4260 	/* Set ring buffer size */
4261 	ring = &adev->gfx.gfx_ring[0];
4262 	rb_bufsz = order_base_2(ring->ring_size / 8);
4263 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4264 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4265 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4266 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4267 #ifdef __BIG_ENDIAN
4268 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4269 #endif
4270 	WREG32(mmCP_RB0_CNTL, tmp);
4271 
4272 	/* Initialize the ring buffer's read and write pointers */
4273 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4274 	ring->wptr = 0;
4275 	WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4276 
4277 	/* set the wb address whether it's enabled or not */
4278 	rptr_addr = ring->rptr_gpu_addr;
4279 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4280 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4281 
4282 	wptr_gpu_addr = ring->wptr_gpu_addr;
4283 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4284 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4285 	mdelay(1);
4286 	WREG32(mmCP_RB0_CNTL, tmp);
4287 
4288 	rb_addr = ring->gpu_addr >> 8;
4289 	WREG32(mmCP_RB0_BASE, rb_addr);
4290 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4291 
4292 	gfx_v8_0_set_cpg_door_bell(adev, ring);
4293 	/* start the ring */
4294 	amdgpu_ring_clear_ring(ring);
4295 	gfx_v8_0_cp_gfx_start(adev);
4296 
4297 	return 0;
4298 }
4299 
4300 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4301 {
4302 	if (enable) {
4303 		WREG32(mmCP_MEC_CNTL, 0);
4304 	} else {
4305 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4306 		adev->gfx.kiq[0].ring.sched.ready = false;
4307 	}
4308 	udelay(50);
4309 }
4310 
4311 /* KIQ functions */
4312 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4313 {
4314 	uint32_t tmp;
4315 	struct amdgpu_device *adev = ring->adev;
4316 
4317 	/* tell RLC which is KIQ queue */
4318 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4319 	tmp &= 0xffffff00;
4320 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4321 	WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
4322 }
4323 
4324 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4325 {
4326 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4327 	uint64_t queue_mask = 0;
4328 	int r, i;
4329 
4330 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4331 		if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4332 			continue;
4333 
4334 		/* This situation may be hit in the future if a new HW
4335 		 * generation exposes more than 64 queues. If so, the
4336 		 * definition of queue_mask needs updating */
4337 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4338 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4339 			break;
4340 		}
4341 
4342 		queue_mask |= (1ull << i);
4343 	}
4344 
4345 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4346 	if (r) {
4347 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4348 		return r;
4349 	}
4350 	/* set resources */
4351 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4352 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4353 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
4354 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
4355 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
4356 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
4357 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
4358 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
4359 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4360 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4361 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4362 		uint64_t wptr_addr = ring->wptr_gpu_addr;
4363 
4364 		/* map queues */
4365 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4366 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4367 		amdgpu_ring_write(kiq_ring,
4368 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4369 		amdgpu_ring_write(kiq_ring,
4370 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4371 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4372 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4373 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4374 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4375 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4376 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4377 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4378 	}
4379 
4380 	amdgpu_ring_commit(kiq_ring);
4381 
4382 	return 0;
4383 }
4384 
4385 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4386 {
4387 	int i, r = 0;
4388 
4389 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4390 		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4391 		for (i = 0; i < adev->usec_timeout; i++) {
4392 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4393 				break;
4394 			udelay(1);
4395 		}
4396 		if (i == adev->usec_timeout)
4397 			r = -ETIMEDOUT;
4398 	}
4399 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4400 	WREG32(mmCP_HQD_PQ_RPTR, 0);
4401 	WREG32(mmCP_HQD_PQ_WPTR, 0);
4402 
4403 	return r;
4404 }
4405 
4406 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4407 {
4408 	struct amdgpu_device *adev = ring->adev;
4409 
4410 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4411 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4412 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4413 			mqd->cp_hqd_queue_priority =
4414 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4415 		}
4416 	}
4417 }
4418 
4419 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4420 {
4421 	struct amdgpu_device *adev = ring->adev;
4422 	struct vi_mqd *mqd = ring->mqd_ptr;
4423 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4424 	uint32_t tmp;
4425 
4426 	mqd->header = 0xC0310800;
4427 	mqd->compute_pipelinestat_enable = 0x00000001;
4428 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4429 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4430 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4431 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4432 	mqd->compute_misc_reserved = 0x00000003;
4433 	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4434 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4435 	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4436 						     + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4437 	eop_base_addr = ring->eop_gpu_addr >> 8;
4438 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4439 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4440 
4441 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4442 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4443 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4444 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4445 
4446 	mqd->cp_hqd_eop_control = tmp;
4447 
4448 	/* enable doorbell? */
4449 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4450 			    CP_HQD_PQ_DOORBELL_CONTROL,
4451 			    DOORBELL_EN,
4452 			    ring->use_doorbell ? 1 : 0);
4453 
4454 	mqd->cp_hqd_pq_doorbell_control = tmp;
4455 
4456 	/* set the pointer to the MQD */
4457 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4458 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4459 
4460 	/* set MQD vmid to 0 */
4461 	tmp = RREG32(mmCP_MQD_CONTROL);
4462 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4463 	mqd->cp_mqd_control = tmp;
4464 
4465 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4466 	hqd_gpu_addr = ring->gpu_addr >> 8;
4467 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4468 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4469 
4470 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4471 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4472 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4473 			    (order_base_2(ring->ring_size / 4) - 1));
4474 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4475 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4476 #ifdef __BIG_ENDIAN
4477 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4478 #endif
4479 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4480 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4481 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4482 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4483 	mqd->cp_hqd_pq_control = tmp;
4484 
4485 	/* set the wb address whether it's enabled or not */
4486 	wb_gpu_addr = ring->rptr_gpu_addr;
4487 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4488 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4489 		upper_32_bits(wb_gpu_addr) & 0xffff;
4490 
4491 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4492 	wb_gpu_addr = ring->wptr_gpu_addr;
4493 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4494 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4495 
4496 	tmp = 0;
4497 	/* enable the doorbell if requested */
4498 	if (ring->use_doorbell) {
4499 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4500 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4501 				DOORBELL_OFFSET, ring->doorbell_index);
4502 
4503 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4504 					 DOORBELL_EN, 1);
4505 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4506 					 DOORBELL_SOURCE, 0);
4507 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4508 					 DOORBELL_HIT, 0);
4509 	}
4510 
4511 	mqd->cp_hqd_pq_doorbell_control = tmp;
4512 
4513 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4514 	ring->wptr = 0;
4515 	mqd->cp_hqd_pq_wptr = ring->wptr;
4516 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4517 
4518 	/* set the vmid for the queue */
4519 	mqd->cp_hqd_vmid = 0;
4520 
4521 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4522 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4523 	mqd->cp_hqd_persistent_state = tmp;
4524 
4525 	/* set MTYPE */
4526 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
4527 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4528 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4529 	mqd->cp_hqd_ib_control = tmp;
4530 
4531 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
4532 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4533 	mqd->cp_hqd_iq_timer = tmp;
4534 
4535 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4536 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4537 	mqd->cp_hqd_ctx_save_control = tmp;
4538 
4539 	/* defaults */
4540 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4541 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4542 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4543 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4544 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4545 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4546 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4547 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4548 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4549 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4550 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4551 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4552 
4553 	/* set static priority for a queue/ring */
4554 	gfx_v8_0_mqd_set_priority(ring, mqd);
4555 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4556 
4557 	/* map_queues packet doesn't need activate the queue,
4558 	 * so only kiq need set this field.
4559 	 */
4560 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4561 		mqd->cp_hqd_active = 1;
4562 
4563 	return 0;
4564 }
4565 
4566 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4567 			struct vi_mqd *mqd)
4568 {
4569 	uint32_t mqd_reg;
4570 	uint32_t *mqd_data;
4571 
4572 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4573 	mqd_data = &mqd->cp_mqd_base_addr_lo;
4574 
4575 	/* disable wptr polling */
4576 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4577 
4578 	/* program all HQD registers */
4579 	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4580 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4581 
4582 	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4583 	 * This is safe since EOP RPTR==WPTR for any inactive HQD
4584 	 * on ASICs that do not support context-save.
4585 	 * EOP writes/reads can start anywhere in the ring.
4586 	 */
4587 	if (adev->asic_type != CHIP_TONGA) {
4588 		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4589 		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4590 		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4591 	}
4592 
4593 	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4594 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4595 
4596 	/* activate the HQD */
4597 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4598 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4599 
4600 	return 0;
4601 }
4602 
4603 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4604 {
4605 	struct amdgpu_device *adev = ring->adev;
4606 	struct vi_mqd *mqd = ring->mqd_ptr;
4607 
4608 	gfx_v8_0_kiq_setting(ring);
4609 
4610 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4611 		/* reset MQD to a clean status */
4612 		if (adev->gfx.kiq[0].mqd_backup)
4613 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4614 
4615 		/* reset ring buffer */
4616 		ring->wptr = 0;
4617 		amdgpu_ring_clear_ring(ring);
4618 		mutex_lock(&adev->srbm_mutex);
4619 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4620 		gfx_v8_0_mqd_commit(adev, mqd);
4621 		vi_srbm_select(adev, 0, 0, 0, 0);
4622 		mutex_unlock(&adev->srbm_mutex);
4623 	} else {
4624 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4625 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4626 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4627 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4628 			amdgpu_ring_clear_ring(ring);
4629 		mutex_lock(&adev->srbm_mutex);
4630 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4631 		gfx_v8_0_mqd_init(ring);
4632 		gfx_v8_0_mqd_commit(adev, mqd);
4633 		vi_srbm_select(adev, 0, 0, 0, 0);
4634 		mutex_unlock(&adev->srbm_mutex);
4635 
4636 		if (adev->gfx.kiq[0].mqd_backup)
4637 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4638 	}
4639 
4640 	return 0;
4641 }
4642 
4643 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4644 {
4645 	struct amdgpu_device *adev = ring->adev;
4646 	struct vi_mqd *mqd = ring->mqd_ptr;
4647 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4648 
4649 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4650 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4651 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4652 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4653 		mutex_lock(&adev->srbm_mutex);
4654 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4655 		gfx_v8_0_mqd_init(ring);
4656 		vi_srbm_select(adev, 0, 0, 0, 0);
4657 		mutex_unlock(&adev->srbm_mutex);
4658 
4659 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4660 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4661 	} else {
4662 		/* restore MQD to a clean status */
4663 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4664 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4665 		/* reset ring buffer */
4666 		ring->wptr = 0;
4667 		amdgpu_ring_clear_ring(ring);
4668 	}
4669 	return 0;
4670 }
4671 
4672 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4673 {
4674 	if (adev->asic_type > CHIP_TONGA) {
4675 		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4676 		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4677 	}
4678 	/* enable doorbells */
4679 	WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4680 }
4681 
4682 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4683 {
4684 	gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
4685 	return 0;
4686 }
4687 
4688 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4689 {
4690 	int i, r;
4691 
4692 	gfx_v8_0_cp_compute_enable(adev, true);
4693 
4694 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4695 		r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
4696 		if (r)
4697 			return r;
4698 	}
4699 
4700 	gfx_v8_0_set_mec_doorbell_range(adev);
4701 
4702 	return gfx_v8_0_kiq_kcq_enable(adev);
4703 }
4704 
4705 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4706 {
4707 	int r, i;
4708 	struct amdgpu_ring *ring;
4709 
4710 	/* collect all the ring_tests here, gfx, kiq, compute */
4711 	ring = &adev->gfx.gfx_ring[0];
4712 	r = amdgpu_ring_test_helper(ring);
4713 	if (r)
4714 		return r;
4715 
4716 	ring = &adev->gfx.kiq[0].ring;
4717 	r = amdgpu_ring_test_helper(ring);
4718 	if (r)
4719 		return r;
4720 
4721 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4722 		ring = &adev->gfx.compute_ring[i];
4723 		amdgpu_ring_test_helper(ring);
4724 	}
4725 
4726 	return 0;
4727 }
4728 
4729 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4730 {
4731 	int r;
4732 
4733 	if (!(adev->flags & AMD_IS_APU))
4734 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4735 
4736 	r = gfx_v8_0_kiq_resume(adev);
4737 	if (r)
4738 		return r;
4739 
4740 	r = gfx_v8_0_cp_gfx_resume(adev);
4741 	if (r)
4742 		return r;
4743 
4744 	r = gfx_v8_0_kcq_resume(adev);
4745 	if (r)
4746 		return r;
4747 
4748 	r = gfx_v8_0_cp_test_all_rings(adev);
4749 	if (r)
4750 		return r;
4751 
4752 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4753 
4754 	return 0;
4755 }
4756 
4757 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4758 {
4759 	gfx_v8_0_cp_gfx_enable(adev, enable);
4760 	gfx_v8_0_cp_compute_enable(adev, enable);
4761 }
4762 
4763 static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
4764 {
4765 	int r;
4766 	struct amdgpu_device *adev = ip_block->adev;
4767 
4768 	gfx_v8_0_init_golden_registers(adev);
4769 	gfx_v8_0_constants_init(adev);
4770 
4771 	r = adev->gfx.rlc.funcs->resume(adev);
4772 	if (r)
4773 		return r;
4774 
4775 	r = gfx_v8_0_cp_resume(adev);
4776 
4777 	return r;
4778 }
4779 
4780 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4781 {
4782 	int r, i;
4783 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4784 
4785 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4786 	if (r)
4787 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4788 
4789 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4790 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4791 
4792 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4793 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4794 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4795 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4796 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4797 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4798 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4799 		amdgpu_ring_write(kiq_ring, 0);
4800 		amdgpu_ring_write(kiq_ring, 0);
4801 		amdgpu_ring_write(kiq_ring, 0);
4802 	}
4803 	/* Submit unmap queue packet */
4804 	amdgpu_ring_commit(kiq_ring);
4805 	/*
4806 	 * Ring test will do a basic scratch register change check. Just run
4807 	 * this to ensure that unmap queues that is submitted before got
4808 	 * processed successfully before returning.
4809 	 */
4810 	r = amdgpu_ring_test_helper(kiq_ring);
4811 	if (r)
4812 		DRM_ERROR("KCQ disable failed\n");
4813 
4814 	return r;
4815 }
4816 
4817 static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
4818 {
4819 	struct amdgpu_device *adev = ip_block->adev;
4820 
4821 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4822 		|| RREG32(mmGRBM_STATUS2) != 0x8)
4823 		return false;
4824 	else
4825 		return true;
4826 }
4827 
4828 static bool gfx_v8_0_rlc_is_idle(void *handle)
4829 {
4830 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4831 
4832 	if (RREG32(mmGRBM_STATUS2) != 0x8)
4833 		return false;
4834 	else
4835 		return true;
4836 }
4837 
4838 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4839 {
4840 	unsigned int i;
4841 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4842 
4843 	for (i = 0; i < adev->usec_timeout; i++) {
4844 		if (gfx_v8_0_rlc_is_idle(handle))
4845 			return 0;
4846 
4847 		udelay(1);
4848 	}
4849 	return -ETIMEDOUT;
4850 }
4851 
4852 static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4853 {
4854 	unsigned int i;
4855 	struct amdgpu_device *adev = ip_block->adev;
4856 
4857 	for (i = 0; i < adev->usec_timeout; i++) {
4858 		if (gfx_v8_0_is_idle(ip_block))
4859 			return 0;
4860 
4861 		udelay(1);
4862 	}
4863 	return -ETIMEDOUT;
4864 }
4865 
4866 static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
4867 {
4868 	struct amdgpu_device *adev = ip_block->adev;
4869 
4870 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4871 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4872 
4873 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4874 
4875 	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4876 
4877 	/* disable KCQ to avoid CPC touch memory not valid anymore */
4878 	gfx_v8_0_kcq_disable(adev);
4879 
4880 	if (amdgpu_sriov_vf(adev)) {
4881 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4882 		return 0;
4883 	}
4884 
4885 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4886 	if (!gfx_v8_0_wait_for_idle(ip_block))
4887 		gfx_v8_0_cp_enable(adev, false);
4888 	else
4889 		pr_err("cp is busy, skip halt cp\n");
4890 	if (!gfx_v8_0_wait_for_rlc_idle(adev))
4891 		adev->gfx.rlc.funcs->stop(adev);
4892 	else
4893 		pr_err("rlc is busy, skip halt rlc\n");
4894 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4895 
4896 	return 0;
4897 }
4898 
4899 static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
4900 {
4901 	return gfx_v8_0_hw_fini(ip_block);
4902 }
4903 
4904 static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
4905 {
4906 	return gfx_v8_0_hw_init(ip_block);
4907 }
4908 
4909 static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4910 {
4911 	struct amdgpu_device *adev = ip_block->adev;
4912 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4913 	u32 tmp;
4914 
4915 	/* GRBM_STATUS */
4916 	tmp = RREG32(mmGRBM_STATUS);
4917 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4918 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4919 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4920 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4921 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4922 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4923 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4924 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4925 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4926 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4927 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4928 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4930 	}
4931 
4932 	/* GRBM_STATUS2 */
4933 	tmp = RREG32(mmGRBM_STATUS2);
4934 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4935 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4936 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4937 
4938 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4939 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4940 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4941 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4942 						SOFT_RESET_CPF, 1);
4943 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4944 						SOFT_RESET_CPC, 1);
4945 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4946 						SOFT_RESET_CPG, 1);
4947 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4948 						SOFT_RESET_GRBM, 1);
4949 	}
4950 
4951 	/* SRBM_STATUS */
4952 	tmp = RREG32(mmSRBM_STATUS);
4953 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4954 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4955 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4956 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4957 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4958 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4959 
4960 	if (grbm_soft_reset || srbm_soft_reset) {
4961 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4962 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4963 		return true;
4964 	} else {
4965 		adev->gfx.grbm_soft_reset = 0;
4966 		adev->gfx.srbm_soft_reset = 0;
4967 		return false;
4968 	}
4969 }
4970 
4971 static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
4972 {
4973 	struct amdgpu_device *adev = ip_block->adev;
4974 	u32 grbm_soft_reset = 0;
4975 
4976 	if ((!adev->gfx.grbm_soft_reset) &&
4977 	    (!adev->gfx.srbm_soft_reset))
4978 		return 0;
4979 
4980 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
4981 
4982 	/* stop the rlc */
4983 	adev->gfx.rlc.funcs->stop(adev);
4984 
4985 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4986 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4987 		/* Disable GFX parsing/prefetching */
4988 		gfx_v8_0_cp_gfx_enable(adev, false);
4989 
4990 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4991 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4992 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4993 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4994 		int i;
4995 
4996 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4997 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4998 
4999 			mutex_lock(&adev->srbm_mutex);
5000 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5001 			gfx_v8_0_deactivate_hqd(adev, 2);
5002 			vi_srbm_select(adev, 0, 0, 0, 0);
5003 			mutex_unlock(&adev->srbm_mutex);
5004 		}
5005 		/* Disable MEC parsing/prefetching */
5006 		gfx_v8_0_cp_compute_enable(adev, false);
5007 	}
5008 
5009 	return 0;
5010 }
5011 
5012 static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
5013 {
5014 	struct amdgpu_device *adev = ip_block->adev;
5015 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5016 	u32 tmp;
5017 
5018 	if ((!adev->gfx.grbm_soft_reset) &&
5019 	    (!adev->gfx.srbm_soft_reset))
5020 		return 0;
5021 
5022 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5023 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5024 
5025 	if (grbm_soft_reset || srbm_soft_reset) {
5026 		tmp = RREG32(mmGMCON_DEBUG);
5027 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5028 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5029 		WREG32(mmGMCON_DEBUG, tmp);
5030 		udelay(50);
5031 	}
5032 
5033 	if (grbm_soft_reset) {
5034 		tmp = RREG32(mmGRBM_SOFT_RESET);
5035 		tmp |= grbm_soft_reset;
5036 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5037 		WREG32(mmGRBM_SOFT_RESET, tmp);
5038 		tmp = RREG32(mmGRBM_SOFT_RESET);
5039 
5040 		udelay(50);
5041 
5042 		tmp &= ~grbm_soft_reset;
5043 		WREG32(mmGRBM_SOFT_RESET, tmp);
5044 		tmp = RREG32(mmGRBM_SOFT_RESET);
5045 	}
5046 
5047 	if (srbm_soft_reset) {
5048 		tmp = RREG32(mmSRBM_SOFT_RESET);
5049 		tmp |= srbm_soft_reset;
5050 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5051 		WREG32(mmSRBM_SOFT_RESET, tmp);
5052 		tmp = RREG32(mmSRBM_SOFT_RESET);
5053 
5054 		udelay(50);
5055 
5056 		tmp &= ~srbm_soft_reset;
5057 		WREG32(mmSRBM_SOFT_RESET, tmp);
5058 		tmp = RREG32(mmSRBM_SOFT_RESET);
5059 	}
5060 
5061 	if (grbm_soft_reset || srbm_soft_reset) {
5062 		tmp = RREG32(mmGMCON_DEBUG);
5063 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5064 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5065 		WREG32(mmGMCON_DEBUG, tmp);
5066 	}
5067 
5068 	/* Wait a little for things to settle down */
5069 	udelay(50);
5070 
5071 	return 0;
5072 }
5073 
5074 static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5075 {
5076 	struct amdgpu_device *adev = ip_block->adev;
5077 	u32 grbm_soft_reset = 0;
5078 
5079 	if ((!adev->gfx.grbm_soft_reset) &&
5080 	    (!adev->gfx.srbm_soft_reset))
5081 		return 0;
5082 
5083 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5084 
5085 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5086 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5087 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5088 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5089 		int i;
5090 
5091 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5092 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5093 
5094 			mutex_lock(&adev->srbm_mutex);
5095 			vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5096 			gfx_v8_0_deactivate_hqd(adev, 2);
5097 			vi_srbm_select(adev, 0, 0, 0, 0);
5098 			mutex_unlock(&adev->srbm_mutex);
5099 		}
5100 		gfx_v8_0_kiq_resume(adev);
5101 		gfx_v8_0_kcq_resume(adev);
5102 	}
5103 
5104 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5105 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5106 		gfx_v8_0_cp_gfx_resume(adev);
5107 
5108 	gfx_v8_0_cp_test_all_rings(adev);
5109 
5110 	adev->gfx.rlc.funcs->start(adev);
5111 
5112 	return 0;
5113 }
5114 
5115 /**
5116  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5117  *
5118  * @adev: amdgpu_device pointer
5119  *
5120  * Fetches a GPU clock counter snapshot.
5121  * Returns the 64 bit clock counter snapshot.
5122  */
5123 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5124 {
5125 	uint64_t clock;
5126 
5127 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5128 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5129 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5130 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5131 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5132 	return clock;
5133 }
5134 
5135 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5136 					  uint32_t vmid,
5137 					  uint32_t gds_base, uint32_t gds_size,
5138 					  uint32_t gws_base, uint32_t gws_size,
5139 					  uint32_t oa_base, uint32_t oa_size)
5140 {
5141 	/* GDS Base */
5142 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5143 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5144 				WRITE_DATA_DST_SEL(0)));
5145 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5146 	amdgpu_ring_write(ring, 0);
5147 	amdgpu_ring_write(ring, gds_base);
5148 
5149 	/* GDS Size */
5150 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5151 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5152 				WRITE_DATA_DST_SEL(0)));
5153 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5154 	amdgpu_ring_write(ring, 0);
5155 	amdgpu_ring_write(ring, gds_size);
5156 
5157 	/* GWS */
5158 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5159 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5160 				WRITE_DATA_DST_SEL(0)));
5161 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5162 	amdgpu_ring_write(ring, 0);
5163 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5164 
5165 	/* OA */
5166 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5167 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5168 				WRITE_DATA_DST_SEL(0)));
5169 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5170 	amdgpu_ring_write(ring, 0);
5171 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5172 }
5173 
5174 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5175 {
5176 	WREG32(mmSQ_IND_INDEX,
5177 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5178 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5179 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5180 		(SQ_IND_INDEX__FORCE_READ_MASK));
5181 	return RREG32(mmSQ_IND_DATA);
5182 }
5183 
5184 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5185 			   uint32_t wave, uint32_t thread,
5186 			   uint32_t regno, uint32_t num, uint32_t *out)
5187 {
5188 	WREG32(mmSQ_IND_INDEX,
5189 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5190 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5191 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5192 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5193 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5194 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5195 	while (num--)
5196 		*(out++) = RREG32(mmSQ_IND_DATA);
5197 }
5198 
5199 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5200 {
5201 	/* type 0 wave data */
5202 	dst[(*no_fields)++] = 0;
5203 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5204 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5205 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5206 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5207 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5208 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5209 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5210 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5211 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5212 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5213 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5214 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5215 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5216 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5217 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5218 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5219 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5220 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5221 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5222 }
5223 
5224 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5225 				     uint32_t wave, uint32_t start,
5226 				     uint32_t size, uint32_t *dst)
5227 {
5228 	wave_read_regs(
5229 		adev, simd, wave, 0,
5230 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5231 }
5232 
5233 
5234 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5235 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5236 	.select_se_sh = &gfx_v8_0_select_se_sh,
5237 	.read_wave_data = &gfx_v8_0_read_wave_data,
5238 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5239 	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5240 };
5241 
5242 static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
5243 {
5244 	struct amdgpu_device *adev = ip_block->adev;
5245 
5246 	adev->gfx.xcc_mask = 1;
5247 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5248 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5249 					  AMDGPU_MAX_COMPUTE_RINGS);
5250 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5251 	gfx_v8_0_set_ring_funcs(adev);
5252 	gfx_v8_0_set_irq_funcs(adev);
5253 	gfx_v8_0_set_gds_init(adev);
5254 	gfx_v8_0_set_rlc_funcs(adev);
5255 
5256 	return 0;
5257 }
5258 
5259 static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
5260 {
5261 	struct amdgpu_device *adev = ip_block->adev;
5262 	int r;
5263 
5264 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5265 	if (r)
5266 		return r;
5267 
5268 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5269 	if (r)
5270 		return r;
5271 
5272 	/* requires IBs so do in late init after IB pool is initialized */
5273 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5274 	if (r)
5275 		return r;
5276 
5277 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5278 	if (r) {
5279 		DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5280 		return r;
5281 	}
5282 
5283 	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5284 	if (r) {
5285 		DRM_ERROR(
5286 			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5287 			r);
5288 		return r;
5289 	}
5290 
5291 	return 0;
5292 }
5293 
5294 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5295 						       bool enable)
5296 {
5297 	if ((adev->asic_type == CHIP_POLARIS11) ||
5298 	    (adev->asic_type == CHIP_POLARIS12) ||
5299 	    (adev->asic_type == CHIP_VEGAM))
5300 		/* Send msg to SMU via Powerplay */
5301 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
5302 
5303 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5304 }
5305 
5306 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5307 							bool enable)
5308 {
5309 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5310 }
5311 
5312 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5313 		bool enable)
5314 {
5315 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5316 }
5317 
5318 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5319 					  bool enable)
5320 {
5321 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5322 }
5323 
5324 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5325 						bool enable)
5326 {
5327 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5328 
5329 	/* Read any GFX register to wake up GFX. */
5330 	if (!enable)
5331 		RREG32(mmDB_RENDER_CONTROL);
5332 }
5333 
5334 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5335 					  bool enable)
5336 {
5337 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5338 		cz_enable_gfx_cg_power_gating(adev, true);
5339 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5340 			cz_enable_gfx_pipeline_power_gating(adev, true);
5341 	} else {
5342 		cz_enable_gfx_cg_power_gating(adev, false);
5343 		cz_enable_gfx_pipeline_power_gating(adev, false);
5344 	}
5345 }
5346 
5347 static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5348 					  enum amd_powergating_state state)
5349 {
5350 	struct amdgpu_device *adev = ip_block->adev;
5351 	bool enable = (state == AMD_PG_STATE_GATE);
5352 
5353 	if (amdgpu_sriov_vf(adev))
5354 		return 0;
5355 
5356 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5357 				AMD_PG_SUPPORT_RLC_SMU_HS |
5358 				AMD_PG_SUPPORT_CP |
5359 				AMD_PG_SUPPORT_GFX_DMG))
5360 		amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5361 	switch (adev->asic_type) {
5362 	case CHIP_CARRIZO:
5363 	case CHIP_STONEY:
5364 
5365 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5366 			cz_enable_sck_slow_down_on_power_up(adev, true);
5367 			cz_enable_sck_slow_down_on_power_down(adev, true);
5368 		} else {
5369 			cz_enable_sck_slow_down_on_power_up(adev, false);
5370 			cz_enable_sck_slow_down_on_power_down(adev, false);
5371 		}
5372 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5373 			cz_enable_cp_power_gating(adev, true);
5374 		else
5375 			cz_enable_cp_power_gating(adev, false);
5376 
5377 		cz_update_gfx_cg_power_gating(adev, enable);
5378 
5379 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5380 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5381 		else
5382 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5383 
5384 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5385 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5386 		else
5387 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5388 		break;
5389 	case CHIP_POLARIS11:
5390 	case CHIP_POLARIS12:
5391 	case CHIP_VEGAM:
5392 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5393 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5394 		else
5395 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5396 
5397 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5398 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5399 		else
5400 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5401 
5402 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5403 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5404 		else
5405 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5406 		break;
5407 	default:
5408 		break;
5409 	}
5410 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5411 				AMD_PG_SUPPORT_RLC_SMU_HS |
5412 				AMD_PG_SUPPORT_CP |
5413 				AMD_PG_SUPPORT_GFX_DMG))
5414 		amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5415 	return 0;
5416 }
5417 
5418 static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5419 {
5420 	struct amdgpu_device *adev = ip_block->adev;
5421 	int data;
5422 
5423 	if (amdgpu_sriov_vf(adev))
5424 		*flags = 0;
5425 
5426 	/* AMD_CG_SUPPORT_GFX_MGCG */
5427 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5428 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5429 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5430 
5431 	/* AMD_CG_SUPPORT_GFX_CGLG */
5432 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5433 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5434 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5435 
5436 	/* AMD_CG_SUPPORT_GFX_CGLS */
5437 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5438 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5439 
5440 	/* AMD_CG_SUPPORT_GFX_CGTS */
5441 	data = RREG32(mmCGTS_SM_CTRL_REG);
5442 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5443 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5444 
5445 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5446 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5447 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5448 
5449 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5450 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5451 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5452 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5453 
5454 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5455 	data = RREG32(mmCP_MEM_SLP_CNTL);
5456 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5457 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5458 }
5459 
5460 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5461 				     uint32_t reg_addr, uint32_t cmd)
5462 {
5463 	uint32_t data;
5464 
5465 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5466 
5467 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5468 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5469 
5470 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5471 	if (adev->asic_type == CHIP_STONEY)
5472 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5473 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5474 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5475 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5476 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5477 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5478 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5479 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5480 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5481 	else
5482 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5483 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5484 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5485 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5486 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5487 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5488 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5489 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5490 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5491 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5492 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5493 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5494 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5495 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5496 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5497 
5498 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5499 }
5500 
5501 #define MSG_ENTER_RLC_SAFE_MODE     1
5502 #define MSG_EXIT_RLC_SAFE_MODE      0
5503 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5504 #define RLC_GPR_REG2__REQ__SHIFT 0
5505 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5506 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5507 
5508 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5509 {
5510 	uint32_t rlc_setting;
5511 
5512 	rlc_setting = RREG32(mmRLC_CNTL);
5513 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5514 		return false;
5515 
5516 	return true;
5517 }
5518 
5519 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5520 {
5521 	uint32_t data;
5522 	unsigned i;
5523 	data = RREG32(mmRLC_CNTL);
5524 	data |= RLC_SAFE_MODE__CMD_MASK;
5525 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5526 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5527 	WREG32(mmRLC_SAFE_MODE, data);
5528 
5529 	/* wait for RLC_SAFE_MODE */
5530 	for (i = 0; i < adev->usec_timeout; i++) {
5531 		if ((RREG32(mmRLC_GPM_STAT) &
5532 		     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5533 		      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5534 		    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5535 		     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5536 			break;
5537 		udelay(1);
5538 	}
5539 	for (i = 0; i < adev->usec_timeout; i++) {
5540 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5541 			break;
5542 		udelay(1);
5543 	}
5544 }
5545 
5546 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5547 {
5548 	uint32_t data;
5549 	unsigned i;
5550 
5551 	data = RREG32(mmRLC_CNTL);
5552 	data |= RLC_SAFE_MODE__CMD_MASK;
5553 	data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5554 	WREG32(mmRLC_SAFE_MODE, data);
5555 
5556 	for (i = 0; i < adev->usec_timeout; i++) {
5557 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5558 			break;
5559 		udelay(1);
5560 	}
5561 }
5562 
5563 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5564 {
5565 	u32 data;
5566 
5567 	amdgpu_gfx_off_ctrl(adev, false);
5568 
5569 	if (amdgpu_sriov_is_pp_one_vf(adev))
5570 		data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5571 	else
5572 		data = RREG32(mmRLC_SPM_VMID);
5573 
5574 	data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5575 	data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5576 
5577 	if (amdgpu_sriov_is_pp_one_vf(adev))
5578 		WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5579 	else
5580 		WREG32(mmRLC_SPM_VMID, data);
5581 
5582 	amdgpu_gfx_off_ctrl(adev, true);
5583 }
5584 
5585 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5586 	.is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5587 	.set_safe_mode = gfx_v8_0_set_safe_mode,
5588 	.unset_safe_mode = gfx_v8_0_unset_safe_mode,
5589 	.init = gfx_v8_0_rlc_init,
5590 	.get_csb_size = gfx_v8_0_get_csb_size,
5591 	.get_csb_buffer = gfx_v8_0_get_csb_buffer,
5592 	.get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5593 	.resume = gfx_v8_0_rlc_resume,
5594 	.stop = gfx_v8_0_rlc_stop,
5595 	.reset = gfx_v8_0_rlc_reset,
5596 	.start = gfx_v8_0_rlc_start,
5597 	.update_spm_vmid = gfx_v8_0_update_spm_vmid
5598 };
5599 
5600 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5601 						      bool enable)
5602 {
5603 	uint32_t temp, data;
5604 
5605 	/* It is disabled by HW by default */
5606 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5607 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5608 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5609 				/* 1 - RLC memory Light sleep */
5610 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5611 
5612 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5613 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5614 		}
5615 
5616 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5617 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5618 		if (adev->flags & AMD_IS_APU)
5619 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5620 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5621 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5622 		else
5623 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5624 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5625 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5626 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5627 
5628 		if (temp != data)
5629 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5630 
5631 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5632 		gfx_v8_0_wait_for_rlc_serdes(adev);
5633 
5634 		/* 5 - clear mgcg override */
5635 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5636 
5637 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5638 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5639 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5640 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5641 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5642 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5643 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5644 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5645 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5646 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5647 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5648 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5649 			if (temp != data)
5650 				WREG32(mmCGTS_SM_CTRL_REG, data);
5651 		}
5652 		udelay(50);
5653 
5654 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5655 		gfx_v8_0_wait_for_rlc_serdes(adev);
5656 	} else {
5657 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5658 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5659 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5660 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5661 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5662 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5663 		if (temp != data)
5664 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5665 
5666 		/* 2 - disable MGLS in RLC */
5667 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5668 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5669 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5670 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5671 		}
5672 
5673 		/* 3 - disable MGLS in CP */
5674 		data = RREG32(mmCP_MEM_SLP_CNTL);
5675 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5676 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5677 			WREG32(mmCP_MEM_SLP_CNTL, data);
5678 		}
5679 
5680 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5681 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5682 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5683 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5684 		if (temp != data)
5685 			WREG32(mmCGTS_SM_CTRL_REG, data);
5686 
5687 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5688 		gfx_v8_0_wait_for_rlc_serdes(adev);
5689 
5690 		/* 6 - set mgcg override */
5691 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5692 
5693 		udelay(50);
5694 
5695 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5696 		gfx_v8_0_wait_for_rlc_serdes(adev);
5697 	}
5698 }
5699 
5700 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5701 						      bool enable)
5702 {
5703 	uint32_t temp, temp1, data, data1;
5704 
5705 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5706 
5707 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5708 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5709 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5710 		if (temp1 != data1)
5711 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5712 
5713 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5714 		gfx_v8_0_wait_for_rlc_serdes(adev);
5715 
5716 		/* 2 - clear cgcg override */
5717 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5718 
5719 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5720 		gfx_v8_0_wait_for_rlc_serdes(adev);
5721 
5722 		/* 3 - write cmd to set CGLS */
5723 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5724 
5725 		/* 4 - enable cgcg */
5726 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5727 
5728 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5729 			/* enable cgls*/
5730 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5731 
5732 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5733 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5734 
5735 			if (temp1 != data1)
5736 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5737 		} else {
5738 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5739 		}
5740 
5741 		if (temp != data)
5742 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5743 
5744 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5745 		 * Cmp_busy/GFX_Idle interrupts
5746 		 */
5747 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5748 	} else {
5749 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5750 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5751 
5752 		/* TEST CGCG */
5753 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5754 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5755 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5756 		if (temp1 != data1)
5757 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5758 
5759 		/* read gfx register to wake up cgcg */
5760 		RREG32(mmCB_CGTT_SCLK_CTRL);
5761 		RREG32(mmCB_CGTT_SCLK_CTRL);
5762 		RREG32(mmCB_CGTT_SCLK_CTRL);
5763 		RREG32(mmCB_CGTT_SCLK_CTRL);
5764 
5765 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5766 		gfx_v8_0_wait_for_rlc_serdes(adev);
5767 
5768 		/* write cmd to Set CGCG Override */
5769 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5770 
5771 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5772 		gfx_v8_0_wait_for_rlc_serdes(adev);
5773 
5774 		/* write cmd to Clear CGLS */
5775 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5776 
5777 		/* disable cgcg, cgls should be disabled too. */
5778 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5779 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5780 		if (temp != data)
5781 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5782 		/* enable interrupts again for PG */
5783 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5784 	}
5785 
5786 	gfx_v8_0_wait_for_rlc_serdes(adev);
5787 }
5788 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5789 					    bool enable)
5790 {
5791 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5792 
5793 	if (enable) {
5794 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5795 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5796 		 */
5797 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5798 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5799 	} else {
5800 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5801 		 * ===  CGCG + CGLS ===
5802 		 */
5803 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5804 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5805 	}
5806 
5807 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5808 	return 0;
5809 }
5810 
5811 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5812 					  enum amd_clockgating_state state)
5813 {
5814 	uint32_t msg_id, pp_state = 0;
5815 	uint32_t pp_support_state = 0;
5816 
5817 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5818 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5819 			pp_support_state = PP_STATE_SUPPORT_LS;
5820 			pp_state = PP_STATE_LS;
5821 		}
5822 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5823 			pp_support_state |= PP_STATE_SUPPORT_CG;
5824 			pp_state |= PP_STATE_CG;
5825 		}
5826 		if (state == AMD_CG_STATE_UNGATE)
5827 			pp_state = 0;
5828 
5829 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5830 				PP_BLOCK_GFX_CG,
5831 				pp_support_state,
5832 				pp_state);
5833 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5834 	}
5835 
5836 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5837 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5838 			pp_support_state = PP_STATE_SUPPORT_LS;
5839 			pp_state = PP_STATE_LS;
5840 		}
5841 
5842 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5843 			pp_support_state |= PP_STATE_SUPPORT_CG;
5844 			pp_state |= PP_STATE_CG;
5845 		}
5846 
5847 		if (state == AMD_CG_STATE_UNGATE)
5848 			pp_state = 0;
5849 
5850 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5851 				PP_BLOCK_GFX_MG,
5852 				pp_support_state,
5853 				pp_state);
5854 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5855 	}
5856 
5857 	return 0;
5858 }
5859 
5860 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5861 					  enum amd_clockgating_state state)
5862 {
5863 
5864 	uint32_t msg_id, pp_state = 0;
5865 	uint32_t pp_support_state = 0;
5866 
5867 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5868 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5869 			pp_support_state = PP_STATE_SUPPORT_LS;
5870 			pp_state = PP_STATE_LS;
5871 		}
5872 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5873 			pp_support_state |= PP_STATE_SUPPORT_CG;
5874 			pp_state |= PP_STATE_CG;
5875 		}
5876 		if (state == AMD_CG_STATE_UNGATE)
5877 			pp_state = 0;
5878 
5879 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880 				PP_BLOCK_GFX_CG,
5881 				pp_support_state,
5882 				pp_state);
5883 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884 	}
5885 
5886 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5887 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5888 			pp_support_state = PP_STATE_SUPPORT_LS;
5889 			pp_state = PP_STATE_LS;
5890 		}
5891 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5892 			pp_support_state |= PP_STATE_SUPPORT_CG;
5893 			pp_state |= PP_STATE_CG;
5894 		}
5895 		if (state == AMD_CG_STATE_UNGATE)
5896 			pp_state = 0;
5897 
5898 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5899 				PP_BLOCK_GFX_3D,
5900 				pp_support_state,
5901 				pp_state);
5902 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5903 	}
5904 
5905 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5906 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5907 			pp_support_state = PP_STATE_SUPPORT_LS;
5908 			pp_state = PP_STATE_LS;
5909 		}
5910 
5911 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5912 			pp_support_state |= PP_STATE_SUPPORT_CG;
5913 			pp_state |= PP_STATE_CG;
5914 		}
5915 
5916 		if (state == AMD_CG_STATE_UNGATE)
5917 			pp_state = 0;
5918 
5919 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5920 				PP_BLOCK_GFX_MG,
5921 				pp_support_state,
5922 				pp_state);
5923 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5924 	}
5925 
5926 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5927 		pp_support_state = PP_STATE_SUPPORT_LS;
5928 
5929 		if (state == AMD_CG_STATE_UNGATE)
5930 			pp_state = 0;
5931 		else
5932 			pp_state = PP_STATE_LS;
5933 
5934 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5935 				PP_BLOCK_GFX_RLC,
5936 				pp_support_state,
5937 				pp_state);
5938 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5939 	}
5940 
5941 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5942 		pp_support_state = PP_STATE_SUPPORT_LS;
5943 
5944 		if (state == AMD_CG_STATE_UNGATE)
5945 			pp_state = 0;
5946 		else
5947 			pp_state = PP_STATE_LS;
5948 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5949 			PP_BLOCK_GFX_CP,
5950 			pp_support_state,
5951 			pp_state);
5952 		amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953 	}
5954 
5955 	return 0;
5956 }
5957 
5958 static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5959 					  enum amd_clockgating_state state)
5960 {
5961 	struct amdgpu_device *adev = ip_block->adev;
5962 
5963 	if (amdgpu_sriov_vf(adev))
5964 		return 0;
5965 
5966 	switch (adev->asic_type) {
5967 	case CHIP_FIJI:
5968 	case CHIP_CARRIZO:
5969 	case CHIP_STONEY:
5970 		gfx_v8_0_update_gfx_clock_gating(adev,
5971 						 state == AMD_CG_STATE_GATE);
5972 		break;
5973 	case CHIP_TONGA:
5974 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5975 		break;
5976 	case CHIP_POLARIS10:
5977 	case CHIP_POLARIS11:
5978 	case CHIP_POLARIS12:
5979 	case CHIP_VEGAM:
5980 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5981 		break;
5982 	default:
5983 		break;
5984 	}
5985 	return 0;
5986 }
5987 
5988 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5989 {
5990 	return *ring->rptr_cpu_addr;
5991 }
5992 
5993 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5994 {
5995 	struct amdgpu_device *adev = ring->adev;
5996 
5997 	if (ring->use_doorbell)
5998 		/* XXX check if swapping is necessary on BE */
5999 		return *ring->wptr_cpu_addr;
6000 	else
6001 		return RREG32(mmCP_RB0_WPTR);
6002 }
6003 
6004 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6005 {
6006 	struct amdgpu_device *adev = ring->adev;
6007 
6008 	if (ring->use_doorbell) {
6009 		/* XXX check if swapping is necessary on BE */
6010 		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6011 		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6012 	} else {
6013 		WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6014 		(void)RREG32(mmCP_RB0_WPTR);
6015 	}
6016 }
6017 
6018 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6019 {
6020 	u32 ref_and_mask, reg_mem_engine;
6021 
6022 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6023 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6024 		switch (ring->me) {
6025 		case 1:
6026 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6027 			break;
6028 		case 2:
6029 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6030 			break;
6031 		default:
6032 			return;
6033 		}
6034 		reg_mem_engine = 0;
6035 	} else {
6036 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6037 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6038 	}
6039 
6040 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6041 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6042 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6043 				 reg_mem_engine));
6044 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6045 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6046 	amdgpu_ring_write(ring, ref_and_mask);
6047 	amdgpu_ring_write(ring, ref_and_mask);
6048 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6049 }
6050 
6051 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6052 {
6053 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6054 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6055 		EVENT_INDEX(4));
6056 
6057 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6058 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6059 		EVENT_INDEX(0));
6060 }
6061 
6062 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6063 					struct amdgpu_job *job,
6064 					struct amdgpu_ib *ib,
6065 					uint32_t flags)
6066 {
6067 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6068 	u32 header, control = 0;
6069 
6070 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6071 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6072 	else
6073 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6074 
6075 	control |= ib->length_dw | (vmid << 24);
6076 
6077 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6078 		control |= INDIRECT_BUFFER_PRE_ENB(1);
6079 
6080 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6081 			gfx_v8_0_ring_emit_de_meta(ring);
6082 	}
6083 
6084 	amdgpu_ring_write(ring, header);
6085 	amdgpu_ring_write(ring,
6086 #ifdef __BIG_ENDIAN
6087 			  (2 << 0) |
6088 #endif
6089 			  (ib->gpu_addr & 0xFFFFFFFC));
6090 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6091 	amdgpu_ring_write(ring, control);
6092 }
6093 
6094 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6095 					  struct amdgpu_job *job,
6096 					  struct amdgpu_ib *ib,
6097 					  uint32_t flags)
6098 {
6099 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6100 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6101 
6102 	/* Currently, there is a high possibility to get wave ID mismatch
6103 	 * between ME and GDS, leading to a hw deadlock, because ME generates
6104 	 * different wave IDs than the GDS expects. This situation happens
6105 	 * randomly when at least 5 compute pipes use GDS ordered append.
6106 	 * The wave IDs generated by ME are also wrong after suspend/resume.
6107 	 * Those are probably bugs somewhere else in the kernel driver.
6108 	 *
6109 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6110 	 * GDS to 0 for this ring (me/pipe).
6111 	 */
6112 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6113 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6114 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6115 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6116 	}
6117 
6118 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6119 	amdgpu_ring_write(ring,
6120 #ifdef __BIG_ENDIAN
6121 				(2 << 0) |
6122 #endif
6123 				(ib->gpu_addr & 0xFFFFFFFC));
6124 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6125 	amdgpu_ring_write(ring, control);
6126 }
6127 
6128 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6129 					 u64 seq, unsigned flags)
6130 {
6131 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6132 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6133 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
6134 
6135 	/* Workaround for cache flush problems. First send a dummy EOP
6136 	 * event down the pipe with seq one below.
6137 	 */
6138 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6139 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6140 				 EOP_TC_ACTION_EN |
6141 				 EOP_TC_WB_ACTION_EN |
6142 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6143 				 EVENT_INDEX(5)));
6144 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6145 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6146 				DATA_SEL(1) | INT_SEL(0));
6147 	amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6148 	amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6149 
6150 	/* Then send the real EOP event down the pipe:
6151 	 * EVENT_WRITE_EOP - flush caches, send int */
6152 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6153 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6154 				 EOP_TC_ACTION_EN |
6155 				 EOP_TC_WB_ACTION_EN |
6156 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6157 				 EVENT_INDEX(5) |
6158 				 (exec ? EOP_EXEC : 0)));
6159 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6160 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6161 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6162 	amdgpu_ring_write(ring, lower_32_bits(seq));
6163 	amdgpu_ring_write(ring, upper_32_bits(seq));
6164 
6165 }
6166 
6167 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6168 {
6169 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6170 	uint32_t seq = ring->fence_drv.sync_seq;
6171 	uint64_t addr = ring->fence_drv.gpu_addr;
6172 
6173 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6174 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6175 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6176 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6177 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6178 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6179 	amdgpu_ring_write(ring, seq);
6180 	amdgpu_ring_write(ring, 0xffffffff);
6181 	amdgpu_ring_write(ring, 4); /* poll interval */
6182 }
6183 
6184 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6185 					unsigned vmid, uint64_t pd_addr)
6186 {
6187 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6188 
6189 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6190 
6191 	/* wait for the invalidate to complete */
6192 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6193 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6194 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6195 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6196 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6197 	amdgpu_ring_write(ring, 0);
6198 	amdgpu_ring_write(ring, 0); /* ref */
6199 	amdgpu_ring_write(ring, 0); /* mask */
6200 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6201 
6202 	/* compute doesn't have PFP */
6203 	if (usepfp) {
6204 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6205 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6206 		amdgpu_ring_write(ring, 0x0);
6207 	}
6208 }
6209 
6210 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6211 {
6212 	return *ring->wptr_cpu_addr;
6213 }
6214 
6215 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6216 {
6217 	struct amdgpu_device *adev = ring->adev;
6218 
6219 	/* XXX check if swapping is necessary on BE */
6220 	*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6221 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6222 }
6223 
6224 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6225 					     u64 addr, u64 seq,
6226 					     unsigned flags)
6227 {
6228 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6229 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6230 
6231 	/* RELEASE_MEM - flush caches, send int */
6232 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6233 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6234 				 EOP_TC_ACTION_EN |
6235 				 EOP_TC_WB_ACTION_EN |
6236 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6237 				 EVENT_INDEX(5)));
6238 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6239 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6240 	amdgpu_ring_write(ring, upper_32_bits(addr));
6241 	amdgpu_ring_write(ring, lower_32_bits(seq));
6242 	amdgpu_ring_write(ring, upper_32_bits(seq));
6243 }
6244 
6245 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6246 					 u64 seq, unsigned int flags)
6247 {
6248 	/* we only allocate 32bit for each seq wb address */
6249 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6250 
6251 	/* write fence seq to the "addr" */
6252 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6253 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6254 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6255 	amdgpu_ring_write(ring, lower_32_bits(addr));
6256 	amdgpu_ring_write(ring, upper_32_bits(addr));
6257 	amdgpu_ring_write(ring, lower_32_bits(seq));
6258 
6259 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6260 		/* set register to trigger INT */
6261 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6262 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6263 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6264 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6265 		amdgpu_ring_write(ring, 0);
6266 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6267 	}
6268 }
6269 
6270 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6271 {
6272 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6273 	amdgpu_ring_write(ring, 0);
6274 }
6275 
6276 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6277 {
6278 	uint32_t dw2 = 0;
6279 
6280 	if (amdgpu_sriov_vf(ring->adev))
6281 		gfx_v8_0_ring_emit_ce_meta(ring);
6282 
6283 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6284 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6285 		gfx_v8_0_ring_emit_vgt_flush(ring);
6286 		/* set load_global_config & load_global_uconfig */
6287 		dw2 |= 0x8001;
6288 		/* set load_cs_sh_regs */
6289 		dw2 |= 0x01000000;
6290 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6291 		dw2 |= 0x10002;
6292 
6293 		/* set load_ce_ram if preamble presented */
6294 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6295 			dw2 |= 0x10000000;
6296 	} else {
6297 		/* still load_ce_ram if this is the first time preamble presented
6298 		 * although there is no context switch happens.
6299 		 */
6300 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6301 			dw2 |= 0x10000000;
6302 	}
6303 
6304 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6305 	amdgpu_ring_write(ring, dw2);
6306 	amdgpu_ring_write(ring, 0);
6307 }
6308 
6309 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6310 						  uint64_t addr)
6311 {
6312 	unsigned ret;
6313 
6314 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6315 	amdgpu_ring_write(ring, lower_32_bits(addr));
6316 	amdgpu_ring_write(ring, upper_32_bits(addr));
6317 	/* discard following DWs if *cond_exec_gpu_addr==0 */
6318 	amdgpu_ring_write(ring, 0);
6319 	ret = ring->wptr & ring->buf_mask;
6320 	/* patch dummy value later */
6321 	amdgpu_ring_write(ring, 0);
6322 	return ret;
6323 }
6324 
6325 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6326 				    uint32_t reg_val_offs)
6327 {
6328 	struct amdgpu_device *adev = ring->adev;
6329 
6330 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6331 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6332 				(5 << 8) |	/* dst: memory */
6333 				(1 << 20));	/* write confirm */
6334 	amdgpu_ring_write(ring, reg);
6335 	amdgpu_ring_write(ring, 0);
6336 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6337 				reg_val_offs * 4));
6338 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6339 				reg_val_offs * 4));
6340 }
6341 
6342 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6343 				  uint32_t val)
6344 {
6345 	uint32_t cmd;
6346 
6347 	switch (ring->funcs->type) {
6348 	case AMDGPU_RING_TYPE_GFX:
6349 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6350 		break;
6351 	case AMDGPU_RING_TYPE_KIQ:
6352 		cmd = 1 << 16; /* no inc addr */
6353 		break;
6354 	default:
6355 		cmd = WR_CONFIRM;
6356 		break;
6357 	}
6358 
6359 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6360 	amdgpu_ring_write(ring, cmd);
6361 	amdgpu_ring_write(ring, reg);
6362 	amdgpu_ring_write(ring, 0);
6363 	amdgpu_ring_write(ring, val);
6364 }
6365 
6366 static void gfx_v8_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
6367 				  int mem_space, int opt, uint32_t addr0,
6368 				  uint32_t addr1, uint32_t ref, uint32_t mask,
6369 				  uint32_t inv)
6370 {
6371 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6372 	amdgpu_ring_write(ring,
6373 			  /* memory (1) or register (0) */
6374 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
6375 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
6376 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
6377 			   WAIT_REG_MEM_ENGINE(eng_sel)));
6378 
6379 	if (mem_space)
6380 		BUG_ON(addr0 & 0x3); /* Dword align */
6381 	amdgpu_ring_write(ring, addr0);
6382 	amdgpu_ring_write(ring, addr1);
6383 	amdgpu_ring_write(ring, ref);
6384 	amdgpu_ring_write(ring, mask);
6385 	amdgpu_ring_write(ring, inv); /* poll interval */
6386 }
6387 
6388 static void gfx_v8_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6389 					uint32_t val, uint32_t mask)
6390 {
6391 	gfx_v8_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6392 }
6393 
6394 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6395 {
6396 	struct amdgpu_device *adev = ring->adev;
6397 	uint32_t value = 0;
6398 
6399 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6400 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6401 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6402 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6403 	WREG32(mmSQ_CMD, value);
6404 }
6405 
6406 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6407 						 enum amdgpu_interrupt_state state)
6408 {
6409 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6410 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6411 }
6412 
6413 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6414 						     int me, int pipe,
6415 						     enum amdgpu_interrupt_state state)
6416 {
6417 	u32 mec_int_cntl, mec_int_cntl_reg;
6418 
6419 	/*
6420 	 * amdgpu controls only the first MEC. That's why this function only
6421 	 * handles the setting of interrupts for this specific MEC. All other
6422 	 * pipes' interrupts are set by amdkfd.
6423 	 */
6424 
6425 	if (me == 1) {
6426 		switch (pipe) {
6427 		case 0:
6428 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6429 			break;
6430 		case 1:
6431 			mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6432 			break;
6433 		case 2:
6434 			mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6435 			break;
6436 		case 3:
6437 			mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6438 			break;
6439 		default:
6440 			DRM_DEBUG("invalid pipe %d\n", pipe);
6441 			return;
6442 		}
6443 	} else {
6444 		DRM_DEBUG("invalid me %d\n", me);
6445 		return;
6446 	}
6447 
6448 	switch (state) {
6449 	case AMDGPU_IRQ_STATE_DISABLE:
6450 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6451 		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6452 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6453 		break;
6454 	case AMDGPU_IRQ_STATE_ENABLE:
6455 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6456 		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6457 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6458 		break;
6459 	default:
6460 		break;
6461 	}
6462 }
6463 
6464 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6465 					     struct amdgpu_irq_src *source,
6466 					     unsigned type,
6467 					     enum amdgpu_interrupt_state state)
6468 {
6469 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6470 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6471 
6472 	return 0;
6473 }
6474 
6475 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6476 					      struct amdgpu_irq_src *source,
6477 					      unsigned type,
6478 					      enum amdgpu_interrupt_state state)
6479 {
6480 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6481 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6482 
6483 	return 0;
6484 }
6485 
6486 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6487 					    struct amdgpu_irq_src *src,
6488 					    unsigned type,
6489 					    enum amdgpu_interrupt_state state)
6490 {
6491 	switch (type) {
6492 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6493 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6494 		break;
6495 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6496 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6497 		break;
6498 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6499 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6500 		break;
6501 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6502 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6503 		break;
6504 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6505 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6506 		break;
6507 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6508 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6509 		break;
6510 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6511 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6512 		break;
6513 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6514 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6515 		break;
6516 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6517 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6518 		break;
6519 	default:
6520 		break;
6521 	}
6522 	return 0;
6523 }
6524 
6525 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6526 					 struct amdgpu_irq_src *source,
6527 					 unsigned int type,
6528 					 enum amdgpu_interrupt_state state)
6529 {
6530 	int enable_flag;
6531 
6532 	switch (state) {
6533 	case AMDGPU_IRQ_STATE_DISABLE:
6534 		enable_flag = 0;
6535 		break;
6536 
6537 	case AMDGPU_IRQ_STATE_ENABLE:
6538 		enable_flag = 1;
6539 		break;
6540 
6541 	default:
6542 		return -EINVAL;
6543 	}
6544 
6545 	WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6546 	WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6547 	WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6548 	WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6549 	WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6550 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6551 		     enable_flag);
6552 	WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6553 		     enable_flag);
6554 	WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6555 		     enable_flag);
6556 	WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6557 		     enable_flag);
6558 	WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6559 		     enable_flag);
6560 	WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6561 		     enable_flag);
6562 	WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6563 		     enable_flag);
6564 	WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6565 		     enable_flag);
6566 
6567 	return 0;
6568 }
6569 
6570 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6571 				     struct amdgpu_irq_src *source,
6572 				     unsigned int type,
6573 				     enum amdgpu_interrupt_state state)
6574 {
6575 	int enable_flag;
6576 
6577 	switch (state) {
6578 	case AMDGPU_IRQ_STATE_DISABLE:
6579 		enable_flag = 1;
6580 		break;
6581 
6582 	case AMDGPU_IRQ_STATE_ENABLE:
6583 		enable_flag = 0;
6584 		break;
6585 
6586 	default:
6587 		return -EINVAL;
6588 	}
6589 
6590 	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6591 		     enable_flag);
6592 
6593 	return 0;
6594 }
6595 
6596 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6597 			    struct amdgpu_irq_src *source,
6598 			    struct amdgpu_iv_entry *entry)
6599 {
6600 	int i;
6601 	u8 me_id, pipe_id, queue_id;
6602 	struct amdgpu_ring *ring;
6603 
6604 	DRM_DEBUG("IH: CP EOP\n");
6605 	me_id = (entry->ring_id & 0x0c) >> 2;
6606 	pipe_id = (entry->ring_id & 0x03) >> 0;
6607 	queue_id = (entry->ring_id & 0x70) >> 4;
6608 
6609 	switch (me_id) {
6610 	case 0:
6611 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6612 		break;
6613 	case 1:
6614 	case 2:
6615 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6616 			ring = &adev->gfx.compute_ring[i];
6617 			/* Per-queue interrupt is supported for MEC starting from VI.
6618 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6619 			  */
6620 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6621 				amdgpu_fence_process(ring);
6622 		}
6623 		break;
6624 	}
6625 	return 0;
6626 }
6627 
6628 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6629 			   struct amdgpu_iv_entry *entry)
6630 {
6631 	u8 me_id, pipe_id, queue_id;
6632 	struct amdgpu_ring *ring;
6633 	int i;
6634 
6635 	me_id = (entry->ring_id & 0x0c) >> 2;
6636 	pipe_id = (entry->ring_id & 0x03) >> 0;
6637 	queue_id = (entry->ring_id & 0x70) >> 4;
6638 
6639 	switch (me_id) {
6640 	case 0:
6641 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6642 		break;
6643 	case 1:
6644 	case 2:
6645 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6646 			ring = &adev->gfx.compute_ring[i];
6647 			if (ring->me == me_id && ring->pipe == pipe_id &&
6648 			    ring->queue == queue_id)
6649 				drm_sched_fault(&ring->sched);
6650 		}
6651 		break;
6652 	}
6653 }
6654 
6655 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6656 				 struct amdgpu_irq_src *source,
6657 				 struct amdgpu_iv_entry *entry)
6658 {
6659 	DRM_ERROR("Illegal register access in command stream\n");
6660 	gfx_v8_0_fault(adev, entry);
6661 	return 0;
6662 }
6663 
6664 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6665 				  struct amdgpu_irq_src *source,
6666 				  struct amdgpu_iv_entry *entry)
6667 {
6668 	DRM_ERROR("Illegal instruction in command stream\n");
6669 	gfx_v8_0_fault(adev, entry);
6670 	return 0;
6671 }
6672 
6673 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6674 				     struct amdgpu_irq_src *source,
6675 				     struct amdgpu_iv_entry *entry)
6676 {
6677 	DRM_ERROR("CP EDC/ECC error detected.");
6678 	return 0;
6679 }
6680 
6681 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6682 				  bool from_wq)
6683 {
6684 	u32 enc, se_id, sh_id, cu_id;
6685 	char type[20];
6686 	int sq_edc_source = -1;
6687 
6688 	enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6689 	se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6690 
6691 	switch (enc) {
6692 		case 0:
6693 			DRM_INFO("SQ general purpose intr detected:"
6694 					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
6695 					"host_cmd_overflow %d, cmd_timestamp %d,"
6696 					"reg_timestamp %d, thread_trace_buff_full %d,"
6697 					"wlt %d, thread_trace %d.\n",
6698 					se_id,
6699 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6700 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6701 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6702 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6703 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6704 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6705 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6706 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6707 					);
6708 			break;
6709 		case 1:
6710 		case 2:
6711 
6712 			cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6713 			sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6714 
6715 			/*
6716 			 * This function can be called either directly from ISR
6717 			 * or from BH in which case we can access SQ_EDC_INFO
6718 			 * instance
6719 			 */
6720 			if (from_wq) {
6721 				mutex_lock(&adev->grbm_idx_mutex);
6722 				gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6723 
6724 				sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6725 
6726 				gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6727 				mutex_unlock(&adev->grbm_idx_mutex);
6728 			}
6729 
6730 			if (enc == 1)
6731 				sprintf(type, "instruction intr");
6732 			else
6733 				sprintf(type, "EDC/ECC error");
6734 
6735 			DRM_INFO(
6736 				"SQ %s detected: "
6737 					"se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6738 					"trap %s, sq_ed_info.source %s.\n",
6739 					type, se_id, sh_id, cu_id,
6740 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6741 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6742 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6743 					REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6744 					(sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6745 				);
6746 			break;
6747 		default:
6748 			DRM_ERROR("SQ invalid encoding type\n.");
6749 	}
6750 }
6751 
6752 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6753 {
6754 
6755 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6756 	struct sq_work *sq_work = container_of(work, struct sq_work, work);
6757 
6758 	gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6759 }
6760 
6761 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6762 			   struct amdgpu_irq_src *source,
6763 			   struct amdgpu_iv_entry *entry)
6764 {
6765 	unsigned ih_data = entry->src_data[0];
6766 
6767 	/*
6768 	 * Try to submit work so SQ_EDC_INFO can be accessed from
6769 	 * BH. If previous work submission hasn't finished yet
6770 	 * just print whatever info is possible directly from the ISR.
6771 	 */
6772 	if (work_pending(&adev->gfx.sq_work.work)) {
6773 		gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6774 	} else {
6775 		adev->gfx.sq_work.ih_data = ih_data;
6776 		schedule_work(&adev->gfx.sq_work.work);
6777 	}
6778 
6779 	return 0;
6780 }
6781 
6782 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6783 {
6784 	amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6785 	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6786 			  PACKET3_TC_ACTION_ENA |
6787 			  PACKET3_SH_KCACHE_ACTION_ENA |
6788 			  PACKET3_SH_ICACHE_ACTION_ENA |
6789 			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6790 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6791 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */
6792 	amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6793 }
6794 
6795 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6796 {
6797 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6798 	amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6799 			  PACKET3_TC_ACTION_ENA |
6800 			  PACKET3_SH_KCACHE_ACTION_ENA |
6801 			  PACKET3_SH_ICACHE_ACTION_ENA |
6802 			  PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */
6803 	amdgpu_ring_write(ring, 0xffffffff);	/* CP_COHER_SIZE */
6804 	amdgpu_ring_write(ring, 0xff);		/* CP_COHER_SIZE_HI */
6805 	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE */
6806 	amdgpu_ring_write(ring, 0);		/* CP_COHER_BASE_HI */
6807 	amdgpu_ring_write(ring, 0x0000000A);	/* poll interval */
6808 }
6809 
6810 
6811 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6812 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT	0x0000007f
6813 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6814 					uint32_t pipe, bool enable)
6815 {
6816 	uint32_t val;
6817 	uint32_t wcl_cs_reg;
6818 
6819 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6820 
6821 	switch (pipe) {
6822 	case 0:
6823 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6824 		break;
6825 	case 1:
6826 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6827 		break;
6828 	case 2:
6829 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6830 		break;
6831 	case 3:
6832 		wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6833 		break;
6834 	default:
6835 		DRM_DEBUG("invalid pipe %d\n", pipe);
6836 		return;
6837 	}
6838 
6839 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6840 
6841 }
6842 
6843 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT	0x07ffffff
6844 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6845 {
6846 	struct amdgpu_device *adev = ring->adev;
6847 	uint32_t val;
6848 	int i;
6849 
6850 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6851 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6852 	 * around 25% of gpu resources.
6853 	 */
6854 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6855 	amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6856 
6857 	/* Restrict waves for normal/low priority compute queues as well
6858 	 * to get best QoS for high priority compute jobs.
6859 	 *
6860 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6861 	 */
6862 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6863 		if (i != ring->pipe)
6864 			gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6865 
6866 	}
6867 
6868 }
6869 
6870 static int gfx_v8_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
6871 {
6872 	struct amdgpu_device *adev = ring->adev;
6873 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
6874 	struct amdgpu_ring *kiq_ring = &kiq->ring;
6875 	unsigned long flags;
6876 	u32 tmp;
6877 	int r;
6878 
6879 	if (amdgpu_sriov_vf(adev))
6880 		return -EINVAL;
6881 
6882 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
6883 		return -EINVAL;
6884 
6885 	spin_lock_irqsave(&kiq->ring_lock, flags);
6886 
6887 	if (amdgpu_ring_alloc(kiq_ring, 5)) {
6888 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
6889 		return -ENOMEM;
6890 	}
6891 
6892 	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
6893 	gfx_v8_0_ring_emit_wreg(kiq_ring, mmCP_VMID_RESET, tmp);
6894 	amdgpu_ring_commit(kiq_ring);
6895 
6896 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
6897 
6898 	r = amdgpu_ring_test_ring(kiq_ring);
6899 	if (r)
6900 		return r;
6901 
6902 	if (amdgpu_ring_alloc(ring, 7 + 12 + 5))
6903 		return -ENOMEM;
6904 	gfx_v8_0_ring_emit_fence_gfx(ring, ring->fence_drv.gpu_addr,
6905 				     ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
6906 	gfx_v8_0_ring_emit_reg_wait(ring, mmCP_VMID_RESET, 0, 0xffff);
6907 	gfx_v8_0_ring_emit_wreg(ring, mmCP_VMID_RESET, 0);
6908 
6909 	return amdgpu_ring_test_ring(ring);
6910 }
6911 
6912 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6913 	.name = "gfx_v8_0",
6914 	.early_init = gfx_v8_0_early_init,
6915 	.late_init = gfx_v8_0_late_init,
6916 	.sw_init = gfx_v8_0_sw_init,
6917 	.sw_fini = gfx_v8_0_sw_fini,
6918 	.hw_init = gfx_v8_0_hw_init,
6919 	.hw_fini = gfx_v8_0_hw_fini,
6920 	.suspend = gfx_v8_0_suspend,
6921 	.resume = gfx_v8_0_resume,
6922 	.is_idle = gfx_v8_0_is_idle,
6923 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6924 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6925 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6926 	.soft_reset = gfx_v8_0_soft_reset,
6927 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6928 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6929 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6930 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
6931 };
6932 
6933 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6934 	.type = AMDGPU_RING_TYPE_GFX,
6935 	.align_mask = 0xff,
6936 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6937 	.support_64bit_ptrs = false,
6938 	.get_rptr = gfx_v8_0_ring_get_rptr,
6939 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6940 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6941 	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
6942 		5 +  /* COND_EXEC */
6943 		7 +  /* PIPELINE_SYNC */
6944 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6945 		12 +  /* FENCE for VM_FLUSH */
6946 		20 + /* GDS switch */
6947 		4 + /* double SWITCH_BUFFER,
6948 		       the first COND_EXEC jump to the place just
6949 			   prior to this double SWITCH_BUFFER  */
6950 		5 + /* COND_EXEC */
6951 		7 +	 /*	HDP_flush */
6952 		4 +	 /*	VGT_flush */
6953 		14 + /*	CE_META */
6954 		31 + /*	DE_META */
6955 		3 + /* CNTX_CTRL */
6956 		5 + /* HDP_INVL */
6957 		12 + 12 + /* FENCE x2 */
6958 		2 + /* SWITCH_BUFFER */
6959 		5, /* SURFACE_SYNC */
6960 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6961 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6962 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6963 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6964 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6965 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6966 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6967 	.test_ring = gfx_v8_0_ring_test_ring,
6968 	.test_ib = gfx_v8_0_ring_test_ib,
6969 	.insert_nop = amdgpu_ring_insert_nop,
6970 	.pad_ib = amdgpu_ring_generic_pad_ib,
6971 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6972 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6973 	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6974 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
6975 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
6976 	.emit_mem_sync = gfx_v8_0_emit_mem_sync,
6977 	.reset = gfx_v8_0_reset_kgq,
6978 };
6979 
6980 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6981 	.type = AMDGPU_RING_TYPE_COMPUTE,
6982 	.align_mask = 0xff,
6983 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6984 	.support_64bit_ptrs = false,
6985 	.get_rptr = gfx_v8_0_ring_get_rptr,
6986 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6987 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6988 	.emit_frame_size =
6989 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6990 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6991 		5 + /* hdp_invalidate */
6992 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6993 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6994 		7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6995 		7 + /* gfx_v8_0_emit_mem_sync_compute */
6996 		5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6997 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6998 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
6999 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
7000 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
7001 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7002 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7003 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7004 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7005 	.test_ring = gfx_v8_0_ring_test_ring,
7006 	.test_ib = gfx_v8_0_ring_test_ib,
7007 	.insert_nop = amdgpu_ring_insert_nop,
7008 	.pad_ib = amdgpu_ring_generic_pad_ib,
7009 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7010 	.soft_recovery = gfx_v8_0_ring_soft_recovery,
7011 	.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
7012 	.emit_wave_limit = gfx_v8_0_emit_wave_limit,
7013 };
7014 
7015 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7016 	.type = AMDGPU_RING_TYPE_KIQ,
7017 	.align_mask = 0xff,
7018 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7019 	.support_64bit_ptrs = false,
7020 	.get_rptr = gfx_v8_0_ring_get_rptr,
7021 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7022 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7023 	.emit_frame_size =
7024 		20 + /* gfx_v8_0_ring_emit_gds_switch */
7025 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7026 		5 + /* hdp_invalidate */
7027 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7028 		17 + /* gfx_v8_0_ring_emit_vm_flush */
7029 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7030 	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
7031 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7032 	.test_ring = gfx_v8_0_ring_test_ring,
7033 	.insert_nop = amdgpu_ring_insert_nop,
7034 	.pad_ib = amdgpu_ring_generic_pad_ib,
7035 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7036 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7037 };
7038 
7039 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7040 {
7041 	int i;
7042 
7043 	adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7044 
7045 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7046 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7047 
7048 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7049 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7050 }
7051 
7052 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7053 	.set = gfx_v8_0_set_eop_interrupt_state,
7054 	.process = gfx_v8_0_eop_irq,
7055 };
7056 
7057 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7058 	.set = gfx_v8_0_set_priv_reg_fault_state,
7059 	.process = gfx_v8_0_priv_reg_irq,
7060 };
7061 
7062 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7063 	.set = gfx_v8_0_set_priv_inst_fault_state,
7064 	.process = gfx_v8_0_priv_inst_irq,
7065 };
7066 
7067 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7068 	.set = gfx_v8_0_set_cp_ecc_int_state,
7069 	.process = gfx_v8_0_cp_ecc_error_irq,
7070 };
7071 
7072 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7073 	.set = gfx_v8_0_set_sq_int_state,
7074 	.process = gfx_v8_0_sq_irq,
7075 };
7076 
7077 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7078 {
7079 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7080 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7081 
7082 	adev->gfx.priv_reg_irq.num_types = 1;
7083 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7084 
7085 	adev->gfx.priv_inst_irq.num_types = 1;
7086 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7087 
7088 	adev->gfx.cp_ecc_error_irq.num_types = 1;
7089 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7090 
7091 	adev->gfx.sq_irq.num_types = 1;
7092 	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7093 }
7094 
7095 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7096 {
7097 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7098 }
7099 
7100 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7101 {
7102 	/* init asci gds info */
7103 	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7104 	adev->gds.gws_size = 64;
7105 	adev->gds.oa_size = 16;
7106 	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7107 }
7108 
7109 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7110 						 u32 bitmap)
7111 {
7112 	u32 data;
7113 
7114 	if (!bitmap)
7115 		return;
7116 
7117 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7118 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7119 
7120 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7121 }
7122 
7123 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7124 {
7125 	u32 data, mask;
7126 
7127 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7128 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7129 
7130 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7131 
7132 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7133 }
7134 
7135 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7136 {
7137 	int i, j, k, counter, active_cu_number = 0;
7138 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7139 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7140 	unsigned disable_masks[4 * 2];
7141 	u32 ao_cu_num;
7142 
7143 	memset(cu_info, 0, sizeof(*cu_info));
7144 
7145 	if (adev->flags & AMD_IS_APU)
7146 		ao_cu_num = 2;
7147 	else
7148 		ao_cu_num = adev->gfx.config.max_cu_per_sh;
7149 
7150 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7151 
7152 	mutex_lock(&adev->grbm_idx_mutex);
7153 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7154 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7155 			mask = 1;
7156 			ao_bitmap = 0;
7157 			counter = 0;
7158 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7159 			if (i < 4 && j < 2)
7160 				gfx_v8_0_set_user_cu_inactive_bitmap(
7161 					adev, disable_masks[i * 2 + j]);
7162 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7163 			cu_info->bitmap[0][i][j] = bitmap;
7164 
7165 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7166 				if (bitmap & mask) {
7167 					if (counter < ao_cu_num)
7168 						ao_bitmap |= mask;
7169 					counter ++;
7170 				}
7171 				mask <<= 1;
7172 			}
7173 			active_cu_number += counter;
7174 			if (i < 2 && j < 2)
7175 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7176 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7177 		}
7178 	}
7179 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7180 	mutex_unlock(&adev->grbm_idx_mutex);
7181 
7182 	cu_info->number = active_cu_number;
7183 	cu_info->ao_cu_mask = ao_cu_mask;
7184 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7185 	cu_info->max_waves_per_simd = 10;
7186 	cu_info->max_scratch_slots_per_cu = 32;
7187 	cu_info->wave_front_size = 64;
7188 	cu_info->lds_size = 64;
7189 }
7190 
7191 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7192 {
7193 	.type = AMD_IP_BLOCK_TYPE_GFX,
7194 	.major = 8,
7195 	.minor = 0,
7196 	.rev = 0,
7197 	.funcs = &gfx_v8_0_ip_funcs,
7198 };
7199 
7200 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7201 {
7202 	.type = AMD_IP_BLOCK_TYPE_GFX,
7203 	.major = 8,
7204 	.minor = 1,
7205 	.rev = 0,
7206 	.funcs = &gfx_v8_0_ip_funcs,
7207 };
7208 
7209 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7210 {
7211 	uint64_t ce_payload_addr;
7212 	int cnt_ce;
7213 	union {
7214 		struct vi_ce_ib_state regular;
7215 		struct vi_ce_ib_state_chained_ib chained;
7216 	} ce_payload = {};
7217 
7218 	if (ring->adev->virt.chained_ib_support) {
7219 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7220 			offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7221 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7222 	} else {
7223 		ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7224 			offsetof(struct vi_gfx_meta_data, ce_payload);
7225 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7226 	}
7227 
7228 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7229 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7230 				WRITE_DATA_DST_SEL(8) |
7231 				WR_CONFIRM) |
7232 				WRITE_DATA_CACHE_POLICY(0));
7233 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7234 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7235 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7236 }
7237 
7238 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7239 {
7240 	uint64_t de_payload_addr, gds_addr, csa_addr;
7241 	int cnt_de;
7242 	union {
7243 		struct vi_de_ib_state regular;
7244 		struct vi_de_ib_state_chained_ib chained;
7245 	} de_payload = {};
7246 
7247 	csa_addr = amdgpu_csa_vaddr(ring->adev);
7248 	gds_addr = csa_addr + 4096;
7249 	if (ring->adev->virt.chained_ib_support) {
7250 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7251 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7252 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7253 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7254 	} else {
7255 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7256 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7257 		de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7258 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7259 	}
7260 
7261 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7262 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7263 				WRITE_DATA_DST_SEL(8) |
7264 				WR_CONFIRM) |
7265 				WRITE_DATA_CACHE_POLICY(0));
7266 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7267 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7268 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7269 }
7270