xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 7edbb0d389ccad68a75a2dcdbeb682014f1ccffe)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31 
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34 
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40 
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45 
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48 
49 #define GFX8_NUM_GFX_RINGS     1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51 
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55 
56 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65 
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
72 
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD    1
75 #define CLE_BPM_SERDES_CMD    0
76 
77 /* BPM Register Address*/
78 enum {
79 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
80 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
81 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
82 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
83 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
84 	BPM_REG_FGCG_MAX
85 };
86 
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99 
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119 
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139 
140 static const u32 golden_settings_tonga_a11[] =
141 {
142 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
146 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158 
159 static const u32 tonga_golden_common_all[] =
160 {
161 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170 
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249 
250 static const u32 fiji_golden_common_all[] =
251 {
252 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263 
264 static const u32 golden_settings_fiji_a10[] =
265 {
266 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278 
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317 
318 static const u32 golden_settings_iceland_a11[] =
319 {
320 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
324 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336 
337 static const u32 iceland_golden_common_all[] =
338 {
339 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348 
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416 
417 static const u32 cz_golden_settings_a11[] =
418 {
419 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
422 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430 
431 static const u32 cz_golden_common_all[] =
432 {
433 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442 
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521 
522 static const u32 stoney_golden_settings_a11[] =
523 {
524 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
526 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530   	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535 
536 static const u32 stoney_golden_common_all[] =
537 {
538 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547 
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557 
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
562 
563 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
564 {
565 	switch (adev->asic_type) {
566 	case CHIP_TOPAZ:
567 		amdgpu_program_register_sequence(adev,
568 						 iceland_mgcg_cgcg_init,
569 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
570 		amdgpu_program_register_sequence(adev,
571 						 golden_settings_iceland_a11,
572 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
573 		amdgpu_program_register_sequence(adev,
574 						 iceland_golden_common_all,
575 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
576 		break;
577 	case CHIP_FIJI:
578 		amdgpu_program_register_sequence(adev,
579 						 fiji_mgcg_cgcg_init,
580 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
581 		amdgpu_program_register_sequence(adev,
582 						 golden_settings_fiji_a10,
583 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
584 		amdgpu_program_register_sequence(adev,
585 						 fiji_golden_common_all,
586 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
587 		break;
588 
589 	case CHIP_TONGA:
590 		amdgpu_program_register_sequence(adev,
591 						 tonga_mgcg_cgcg_init,
592 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
593 		amdgpu_program_register_sequence(adev,
594 						 golden_settings_tonga_a11,
595 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
596 		amdgpu_program_register_sequence(adev,
597 						 tonga_golden_common_all,
598 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
599 		break;
600 	case CHIP_CARRIZO:
601 		amdgpu_program_register_sequence(adev,
602 						 cz_mgcg_cgcg_init,
603 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
604 		amdgpu_program_register_sequence(adev,
605 						 cz_golden_settings_a11,
606 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
607 		amdgpu_program_register_sequence(adev,
608 						 cz_golden_common_all,
609 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
610 		break;
611 	case CHIP_STONEY:
612 		amdgpu_program_register_sequence(adev,
613 						 stoney_mgcg_cgcg_init,
614 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
615 		amdgpu_program_register_sequence(adev,
616 						 stoney_golden_settings_a11,
617 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
618 		amdgpu_program_register_sequence(adev,
619 						 stoney_golden_common_all,
620 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
621 		break;
622 	default:
623 		break;
624 	}
625 }
626 
627 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
628 {
629 	int i;
630 
631 	adev->gfx.scratch.num_reg = 7;
632 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
633 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
634 		adev->gfx.scratch.free[i] = true;
635 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
636 	}
637 }
638 
639 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
640 {
641 	struct amdgpu_device *adev = ring->adev;
642 	uint32_t scratch;
643 	uint32_t tmp = 0;
644 	unsigned i;
645 	int r;
646 
647 	r = amdgpu_gfx_scratch_get(adev, &scratch);
648 	if (r) {
649 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
650 		return r;
651 	}
652 	WREG32(scratch, 0xCAFEDEAD);
653 	r = amdgpu_ring_alloc(ring, 3);
654 	if (r) {
655 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
656 			  ring->idx, r);
657 		amdgpu_gfx_scratch_free(adev, scratch);
658 		return r;
659 	}
660 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
661 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
662 	amdgpu_ring_write(ring, 0xDEADBEEF);
663 	amdgpu_ring_commit(ring);
664 
665 	for (i = 0; i < adev->usec_timeout; i++) {
666 		tmp = RREG32(scratch);
667 		if (tmp == 0xDEADBEEF)
668 			break;
669 		DRM_UDELAY(1);
670 	}
671 	if (i < adev->usec_timeout) {
672 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
673 			 ring->idx, i);
674 	} else {
675 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
676 			  ring->idx, scratch, tmp);
677 		r = -EINVAL;
678 	}
679 	amdgpu_gfx_scratch_free(adev, scratch);
680 	return r;
681 }
682 
683 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
684 {
685 	struct amdgpu_device *adev = ring->adev;
686 	struct amdgpu_ib ib;
687 	struct fence *f = NULL;
688 	uint32_t scratch;
689 	uint32_t tmp = 0;
690 	unsigned i;
691 	int r;
692 
693 	r = amdgpu_gfx_scratch_get(adev, &scratch);
694 	if (r) {
695 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
696 		return r;
697 	}
698 	WREG32(scratch, 0xCAFEDEAD);
699 	memset(&ib, 0, sizeof(ib));
700 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
701 	if (r) {
702 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
703 		goto err1;
704 	}
705 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
706 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
707 	ib.ptr[2] = 0xDEADBEEF;
708 	ib.length_dw = 3;
709 
710 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
711 	if (r)
712 		goto err2;
713 
714 	r = fence_wait(f, false);
715 	if (r) {
716 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
717 		goto err2;
718 	}
719 	for (i = 0; i < adev->usec_timeout; i++) {
720 		tmp = RREG32(scratch);
721 		if (tmp == 0xDEADBEEF)
722 			break;
723 		DRM_UDELAY(1);
724 	}
725 	if (i < adev->usec_timeout) {
726 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
727 			 ring->idx, i);
728 		goto err2;
729 	} else {
730 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
731 			  scratch, tmp);
732 		r = -EINVAL;
733 	}
734 err2:
735 	fence_put(f);
736 	amdgpu_ib_free(adev, &ib, NULL);
737 	fence_put(f);
738 err1:
739 	amdgpu_gfx_scratch_free(adev, scratch);
740 	return r;
741 }
742 
743 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
744 {
745 	const char *chip_name;
746 	char fw_name[30];
747 	int err;
748 	struct amdgpu_firmware_info *info = NULL;
749 	const struct common_firmware_header *header = NULL;
750 	const struct gfx_firmware_header_v1_0 *cp_hdr;
751 
752 	DRM_DEBUG("\n");
753 
754 	switch (adev->asic_type) {
755 	case CHIP_TOPAZ:
756 		chip_name = "topaz";
757 		break;
758 	case CHIP_TONGA:
759 		chip_name = "tonga";
760 		break;
761 	case CHIP_CARRIZO:
762 		chip_name = "carrizo";
763 		break;
764 	case CHIP_FIJI:
765 		chip_name = "fiji";
766 		break;
767 	case CHIP_STONEY:
768 		chip_name = "stoney";
769 		break;
770 	default:
771 		BUG();
772 	}
773 
774 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
775 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
776 	if (err)
777 		goto out;
778 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
779 	if (err)
780 		goto out;
781 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
782 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
783 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
784 
785 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
786 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
787 	if (err)
788 		goto out;
789 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
790 	if (err)
791 		goto out;
792 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
793 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
794 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
795 
796 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
797 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
798 	if (err)
799 		goto out;
800 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
801 	if (err)
802 		goto out;
803 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
804 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
805 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
806 
807 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
808 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
809 	if (err)
810 		goto out;
811 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
812 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
813 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
814 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
815 
816 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
817 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
818 	if (err)
819 		goto out;
820 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
821 	if (err)
822 		goto out;
823 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
824 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
825 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
826 
827 	if ((adev->asic_type != CHIP_STONEY) &&
828 	    (adev->asic_type != CHIP_TOPAZ)) {
829 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
830 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
831 		if (!err) {
832 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
833 			if (err)
834 				goto out;
835 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
836 				adev->gfx.mec2_fw->data;
837 			adev->gfx.mec2_fw_version =
838 				le32_to_cpu(cp_hdr->header.ucode_version);
839 			adev->gfx.mec2_feature_version =
840 				le32_to_cpu(cp_hdr->ucode_feature_version);
841 		} else {
842 			err = 0;
843 			adev->gfx.mec2_fw = NULL;
844 		}
845 	}
846 
847 	if (adev->firmware.smu_load) {
848 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
849 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
850 		info->fw = adev->gfx.pfp_fw;
851 		header = (const struct common_firmware_header *)info->fw->data;
852 		adev->firmware.fw_size +=
853 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
854 
855 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
856 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
857 		info->fw = adev->gfx.me_fw;
858 		header = (const struct common_firmware_header *)info->fw->data;
859 		adev->firmware.fw_size +=
860 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
861 
862 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
863 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
864 		info->fw = adev->gfx.ce_fw;
865 		header = (const struct common_firmware_header *)info->fw->data;
866 		adev->firmware.fw_size +=
867 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
868 
869 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
870 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
871 		info->fw = adev->gfx.rlc_fw;
872 		header = (const struct common_firmware_header *)info->fw->data;
873 		adev->firmware.fw_size +=
874 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
875 
876 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
877 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
878 		info->fw = adev->gfx.mec_fw;
879 		header = (const struct common_firmware_header *)info->fw->data;
880 		adev->firmware.fw_size +=
881 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
882 
883 		if (adev->gfx.mec2_fw) {
884 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
885 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
886 			info->fw = adev->gfx.mec2_fw;
887 			header = (const struct common_firmware_header *)info->fw->data;
888 			adev->firmware.fw_size +=
889 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
890 		}
891 
892 	}
893 
894 out:
895 	if (err) {
896 		dev_err(adev->dev,
897 			"gfx8: Failed to load firmware \"%s\"\n",
898 			fw_name);
899 		release_firmware(adev->gfx.pfp_fw);
900 		adev->gfx.pfp_fw = NULL;
901 		release_firmware(adev->gfx.me_fw);
902 		adev->gfx.me_fw = NULL;
903 		release_firmware(adev->gfx.ce_fw);
904 		adev->gfx.ce_fw = NULL;
905 		release_firmware(adev->gfx.rlc_fw);
906 		adev->gfx.rlc_fw = NULL;
907 		release_firmware(adev->gfx.mec_fw);
908 		adev->gfx.mec_fw = NULL;
909 		release_firmware(adev->gfx.mec2_fw);
910 		adev->gfx.mec2_fw = NULL;
911 	}
912 	return err;
913 }
914 
915 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
916 {
917 	int r;
918 
919 	if (adev->gfx.mec.hpd_eop_obj) {
920 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
921 		if (unlikely(r != 0))
922 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
923 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
924 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
925 
926 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
927 		adev->gfx.mec.hpd_eop_obj = NULL;
928 	}
929 }
930 
931 #define MEC_HPD_SIZE 2048
932 
933 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
934 {
935 	int r;
936 	u32 *hpd;
937 
938 	/*
939 	 * we assign only 1 pipe because all other pipes will
940 	 * be handled by KFD
941 	 */
942 	adev->gfx.mec.num_mec = 1;
943 	adev->gfx.mec.num_pipe = 1;
944 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
945 
946 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
947 		r = amdgpu_bo_create(adev,
948 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
949 				     PAGE_SIZE, true,
950 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
951 				     &adev->gfx.mec.hpd_eop_obj);
952 		if (r) {
953 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
954 			return r;
955 		}
956 	}
957 
958 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
959 	if (unlikely(r != 0)) {
960 		gfx_v8_0_mec_fini(adev);
961 		return r;
962 	}
963 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
964 			  &adev->gfx.mec.hpd_eop_gpu_addr);
965 	if (r) {
966 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
967 		gfx_v8_0_mec_fini(adev);
968 		return r;
969 	}
970 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
971 	if (r) {
972 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
973 		gfx_v8_0_mec_fini(adev);
974 		return r;
975 	}
976 
977 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
978 
979 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
980 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
981 
982 	return 0;
983 }
984 
985 static const u32 vgpr_init_compute_shader[] =
986 {
987 	0x7e000209, 0x7e020208,
988 	0x7e040207, 0x7e060206,
989 	0x7e080205, 0x7e0a0204,
990 	0x7e0c0203, 0x7e0e0202,
991 	0x7e100201, 0x7e120200,
992 	0x7e140209, 0x7e160208,
993 	0x7e180207, 0x7e1a0206,
994 	0x7e1c0205, 0x7e1e0204,
995 	0x7e200203, 0x7e220202,
996 	0x7e240201, 0x7e260200,
997 	0x7e280209, 0x7e2a0208,
998 	0x7e2c0207, 0x7e2e0206,
999 	0x7e300205, 0x7e320204,
1000 	0x7e340203, 0x7e360202,
1001 	0x7e380201, 0x7e3a0200,
1002 	0x7e3c0209, 0x7e3e0208,
1003 	0x7e400207, 0x7e420206,
1004 	0x7e440205, 0x7e460204,
1005 	0x7e480203, 0x7e4a0202,
1006 	0x7e4c0201, 0x7e4e0200,
1007 	0x7e500209, 0x7e520208,
1008 	0x7e540207, 0x7e560206,
1009 	0x7e580205, 0x7e5a0204,
1010 	0x7e5c0203, 0x7e5e0202,
1011 	0x7e600201, 0x7e620200,
1012 	0x7e640209, 0x7e660208,
1013 	0x7e680207, 0x7e6a0206,
1014 	0x7e6c0205, 0x7e6e0204,
1015 	0x7e700203, 0x7e720202,
1016 	0x7e740201, 0x7e760200,
1017 	0x7e780209, 0x7e7a0208,
1018 	0x7e7c0207, 0x7e7e0206,
1019 	0xbf8a0000, 0xbf810000,
1020 };
1021 
1022 static const u32 sgpr_init_compute_shader[] =
1023 {
1024 	0xbe8a0100, 0xbe8c0102,
1025 	0xbe8e0104, 0xbe900106,
1026 	0xbe920108, 0xbe940100,
1027 	0xbe960102, 0xbe980104,
1028 	0xbe9a0106, 0xbe9c0108,
1029 	0xbe9e0100, 0xbea00102,
1030 	0xbea20104, 0xbea40106,
1031 	0xbea60108, 0xbea80100,
1032 	0xbeaa0102, 0xbeac0104,
1033 	0xbeae0106, 0xbeb00108,
1034 	0xbeb20100, 0xbeb40102,
1035 	0xbeb60104, 0xbeb80106,
1036 	0xbeba0108, 0xbebc0100,
1037 	0xbebe0102, 0xbec00104,
1038 	0xbec20106, 0xbec40108,
1039 	0xbec60100, 0xbec80102,
1040 	0xbee60004, 0xbee70005,
1041 	0xbeea0006, 0xbeeb0007,
1042 	0xbee80008, 0xbee90009,
1043 	0xbefc0000, 0xbf8a0000,
1044 	0xbf810000, 0x00000000,
1045 };
1046 
1047 static const u32 vgpr_init_regs[] =
1048 {
1049 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1050 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1051 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1052 	mmCOMPUTE_NUM_THREAD_Y, 1,
1053 	mmCOMPUTE_NUM_THREAD_Z, 1,
1054 	mmCOMPUTE_PGM_RSRC2, 20,
1055 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1056 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1057 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1058 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1059 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1060 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1061 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1062 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1063 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1064 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1065 };
1066 
1067 static const u32 sgpr1_init_regs[] =
1068 {
1069 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1070 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1071 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1072 	mmCOMPUTE_NUM_THREAD_Y, 1,
1073 	mmCOMPUTE_NUM_THREAD_Z, 1,
1074 	mmCOMPUTE_PGM_RSRC2, 20,
1075 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1076 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1077 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1078 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1079 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1080 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1081 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1082 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1083 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1084 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1085 };
1086 
1087 static const u32 sgpr2_init_regs[] =
1088 {
1089 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1090 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1091 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1092 	mmCOMPUTE_NUM_THREAD_Y, 1,
1093 	mmCOMPUTE_NUM_THREAD_Z, 1,
1094 	mmCOMPUTE_PGM_RSRC2, 20,
1095 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1096 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1097 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1098 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1099 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1100 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1101 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1102 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1103 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1104 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1105 };
1106 
1107 static const u32 sec_ded_counter_registers[] =
1108 {
1109 	mmCPC_EDC_ATC_CNT,
1110 	mmCPC_EDC_SCRATCH_CNT,
1111 	mmCPC_EDC_UCODE_CNT,
1112 	mmCPF_EDC_ATC_CNT,
1113 	mmCPF_EDC_ROQ_CNT,
1114 	mmCPF_EDC_TAG_CNT,
1115 	mmCPG_EDC_ATC_CNT,
1116 	mmCPG_EDC_DMA_CNT,
1117 	mmCPG_EDC_TAG_CNT,
1118 	mmDC_EDC_CSINVOC_CNT,
1119 	mmDC_EDC_RESTORE_CNT,
1120 	mmDC_EDC_STATE_CNT,
1121 	mmGDS_EDC_CNT,
1122 	mmGDS_EDC_GRBM_CNT,
1123 	mmGDS_EDC_OA_DED,
1124 	mmSPI_EDC_CNT,
1125 	mmSQC_ATC_EDC_GATCL1_CNT,
1126 	mmSQC_EDC_CNT,
1127 	mmSQ_EDC_DED_CNT,
1128 	mmSQ_EDC_INFO,
1129 	mmSQ_EDC_SEC_CNT,
1130 	mmTCC_EDC_CNT,
1131 	mmTCP_ATC_EDC_GATCL1_CNT,
1132 	mmTCP_EDC_CNT,
1133 	mmTD_EDC_CNT
1134 };
1135 
1136 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1137 {
1138 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1139 	struct amdgpu_ib ib;
1140 	struct fence *f = NULL;
1141 	int r, i;
1142 	u32 tmp;
1143 	unsigned total_size, vgpr_offset, sgpr_offset;
1144 	u64 gpu_addr;
1145 
1146 	/* only supported on CZ */
1147 	if (adev->asic_type != CHIP_CARRIZO)
1148 		return 0;
1149 
1150 	/* bail if the compute ring is not ready */
1151 	if (!ring->ready)
1152 		return 0;
1153 
1154 	tmp = RREG32(mmGB_EDC_MODE);
1155 	WREG32(mmGB_EDC_MODE, 0);
1156 
1157 	total_size =
1158 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1159 	total_size +=
1160 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1161 	total_size +=
1162 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1163 	total_size = ALIGN(total_size, 256);
1164 	vgpr_offset = total_size;
1165 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1166 	sgpr_offset = total_size;
1167 	total_size += sizeof(sgpr_init_compute_shader);
1168 
1169 	/* allocate an indirect buffer to put the commands in */
1170 	memset(&ib, 0, sizeof(ib));
1171 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1172 	if (r) {
1173 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1174 		return r;
1175 	}
1176 
1177 	/* load the compute shaders */
1178 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1179 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1180 
1181 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1182 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1183 
1184 	/* init the ib length to 0 */
1185 	ib.length_dw = 0;
1186 
1187 	/* VGPR */
1188 	/* write the register state for the compute dispatch */
1189 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1190 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1191 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1192 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1193 	}
1194 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1195 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1196 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1197 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1198 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1199 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1200 
1201 	/* write dispatch packet */
1202 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1203 	ib.ptr[ib.length_dw++] = 8; /* x */
1204 	ib.ptr[ib.length_dw++] = 1; /* y */
1205 	ib.ptr[ib.length_dw++] = 1; /* z */
1206 	ib.ptr[ib.length_dw++] =
1207 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1208 
1209 	/* write CS partial flush packet */
1210 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1211 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1212 
1213 	/* SGPR1 */
1214 	/* write the register state for the compute dispatch */
1215 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1216 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1217 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1218 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1219 	}
1220 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1221 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1222 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1223 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1224 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1225 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1226 
1227 	/* write dispatch packet */
1228 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1229 	ib.ptr[ib.length_dw++] = 8; /* x */
1230 	ib.ptr[ib.length_dw++] = 1; /* y */
1231 	ib.ptr[ib.length_dw++] = 1; /* z */
1232 	ib.ptr[ib.length_dw++] =
1233 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1234 
1235 	/* write CS partial flush packet */
1236 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1237 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1238 
1239 	/* SGPR2 */
1240 	/* write the register state for the compute dispatch */
1241 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1242 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1243 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1244 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1245 	}
1246 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1247 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1248 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1249 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1250 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1251 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1252 
1253 	/* write dispatch packet */
1254 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1255 	ib.ptr[ib.length_dw++] = 8; /* x */
1256 	ib.ptr[ib.length_dw++] = 1; /* y */
1257 	ib.ptr[ib.length_dw++] = 1; /* z */
1258 	ib.ptr[ib.length_dw++] =
1259 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1260 
1261 	/* write CS partial flush packet */
1262 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1263 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1264 
1265 	/* shedule the ib on the ring */
1266 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1267 	if (r) {
1268 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1269 		goto fail;
1270 	}
1271 
1272 	/* wait for the GPU to finish processing the IB */
1273 	r = fence_wait(f, false);
1274 	if (r) {
1275 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1276 		goto fail;
1277 	}
1278 
1279 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1280 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1281 	WREG32(mmGB_EDC_MODE, tmp);
1282 
1283 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1284 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1285 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1286 
1287 
1288 	/* read back registers to clear the counters */
1289 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1290 		RREG32(sec_ded_counter_registers[i]);
1291 
1292 fail:
1293 	fence_put(f);
1294 	amdgpu_ib_free(adev, &ib, NULL);
1295 	fence_put(f);
1296 
1297 	return r;
1298 }
1299 
1300 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1301 {
1302 	u32 gb_addr_config;
1303 	u32 mc_shared_chmap, mc_arb_ramcfg;
1304 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1305 	u32 tmp;
1306 
1307 	switch (adev->asic_type) {
1308 	case CHIP_TOPAZ:
1309 		adev->gfx.config.max_shader_engines = 1;
1310 		adev->gfx.config.max_tile_pipes = 2;
1311 		adev->gfx.config.max_cu_per_sh = 6;
1312 		adev->gfx.config.max_sh_per_se = 1;
1313 		adev->gfx.config.max_backends_per_se = 2;
1314 		adev->gfx.config.max_texture_channel_caches = 2;
1315 		adev->gfx.config.max_gprs = 256;
1316 		adev->gfx.config.max_gs_threads = 32;
1317 		adev->gfx.config.max_hw_contexts = 8;
1318 
1319 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1324 		break;
1325 	case CHIP_FIJI:
1326 		adev->gfx.config.max_shader_engines = 4;
1327 		adev->gfx.config.max_tile_pipes = 16;
1328 		adev->gfx.config.max_cu_per_sh = 16;
1329 		adev->gfx.config.max_sh_per_se = 1;
1330 		adev->gfx.config.max_backends_per_se = 4;
1331 		adev->gfx.config.max_texture_channel_caches = 16;
1332 		adev->gfx.config.max_gprs = 256;
1333 		adev->gfx.config.max_gs_threads = 32;
1334 		adev->gfx.config.max_hw_contexts = 8;
1335 
1336 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1341 		break;
1342 	case CHIP_TONGA:
1343 		adev->gfx.config.max_shader_engines = 4;
1344 		adev->gfx.config.max_tile_pipes = 8;
1345 		adev->gfx.config.max_cu_per_sh = 8;
1346 		adev->gfx.config.max_sh_per_se = 1;
1347 		adev->gfx.config.max_backends_per_se = 2;
1348 		adev->gfx.config.max_texture_channel_caches = 8;
1349 		adev->gfx.config.max_gprs = 256;
1350 		adev->gfx.config.max_gs_threads = 32;
1351 		adev->gfx.config.max_hw_contexts = 8;
1352 
1353 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1354 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1355 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1356 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1357 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1358 		break;
1359 	case CHIP_CARRIZO:
1360 		adev->gfx.config.max_shader_engines = 1;
1361 		adev->gfx.config.max_tile_pipes = 2;
1362 		adev->gfx.config.max_sh_per_se = 1;
1363 		adev->gfx.config.max_backends_per_se = 2;
1364 
1365 		switch (adev->pdev->revision) {
1366 		case 0xc4:
1367 		case 0x84:
1368 		case 0xc8:
1369 		case 0xcc:
1370 		case 0xe1:
1371 		case 0xe3:
1372 			/* B10 */
1373 			adev->gfx.config.max_cu_per_sh = 8;
1374 			break;
1375 		case 0xc5:
1376 		case 0x81:
1377 		case 0x85:
1378 		case 0xc9:
1379 		case 0xcd:
1380 		case 0xe2:
1381 		case 0xe4:
1382 			/* B8 */
1383 			adev->gfx.config.max_cu_per_sh = 6;
1384 			break;
1385 		case 0xc6:
1386 		case 0xca:
1387 		case 0xce:
1388 		case 0x88:
1389 			/* B6 */
1390 			adev->gfx.config.max_cu_per_sh = 6;
1391 			break;
1392 		case 0xc7:
1393 		case 0x87:
1394 		case 0xcb:
1395 		case 0xe5:
1396 		case 0x89:
1397 		default:
1398 			/* B4 */
1399 			adev->gfx.config.max_cu_per_sh = 4;
1400 			break;
1401 		}
1402 
1403 		adev->gfx.config.max_texture_channel_caches = 2;
1404 		adev->gfx.config.max_gprs = 256;
1405 		adev->gfx.config.max_gs_threads = 32;
1406 		adev->gfx.config.max_hw_contexts = 8;
1407 
1408 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1409 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1410 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1411 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1412 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1413 		break;
1414 	case CHIP_STONEY:
1415 		adev->gfx.config.max_shader_engines = 1;
1416 		adev->gfx.config.max_tile_pipes = 2;
1417 		adev->gfx.config.max_sh_per_se = 1;
1418 		adev->gfx.config.max_backends_per_se = 1;
1419 
1420 		switch (adev->pdev->revision) {
1421 		case 0xc0:
1422 		case 0xc1:
1423 		case 0xc2:
1424 		case 0xc4:
1425 		case 0xc8:
1426 		case 0xc9:
1427 			adev->gfx.config.max_cu_per_sh = 3;
1428 			break;
1429 		case 0xd0:
1430 		case 0xd1:
1431 		case 0xd2:
1432 		default:
1433 			adev->gfx.config.max_cu_per_sh = 2;
1434 			break;
1435 		}
1436 
1437 		adev->gfx.config.max_texture_channel_caches = 2;
1438 		adev->gfx.config.max_gprs = 256;
1439 		adev->gfx.config.max_gs_threads = 16;
1440 		adev->gfx.config.max_hw_contexts = 8;
1441 
1442 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1447 		break;
1448 	default:
1449 		adev->gfx.config.max_shader_engines = 2;
1450 		adev->gfx.config.max_tile_pipes = 4;
1451 		adev->gfx.config.max_cu_per_sh = 2;
1452 		adev->gfx.config.max_sh_per_se = 1;
1453 		adev->gfx.config.max_backends_per_se = 2;
1454 		adev->gfx.config.max_texture_channel_caches = 4;
1455 		adev->gfx.config.max_gprs = 256;
1456 		adev->gfx.config.max_gs_threads = 32;
1457 		adev->gfx.config.max_hw_contexts = 8;
1458 
1459 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1460 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1461 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1462 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1463 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1464 		break;
1465 	}
1466 
1467 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1468 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1469 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1470 
1471 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1472 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1473 	if (adev->flags & AMD_IS_APU) {
1474 		/* Get memory bank mapping mode. */
1475 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1476 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1477 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1478 
1479 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1480 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482 
1483 		/* Validate settings in case only one DIMM installed. */
1484 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1485 			dimm00_addr_map = 0;
1486 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1487 			dimm01_addr_map = 0;
1488 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1489 			dimm10_addr_map = 0;
1490 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1491 			dimm11_addr_map = 0;
1492 
1493 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1494 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1495 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1496 			adev->gfx.config.mem_row_size_in_kb = 2;
1497 		else
1498 			adev->gfx.config.mem_row_size_in_kb = 1;
1499 	} else {
1500 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1501 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1502 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1503 			adev->gfx.config.mem_row_size_in_kb = 4;
1504 	}
1505 
1506 	adev->gfx.config.shader_engine_tile_size = 32;
1507 	adev->gfx.config.num_gpus = 1;
1508 	adev->gfx.config.multi_gpu_tile_size = 64;
1509 
1510 	/* fix up row size */
1511 	switch (adev->gfx.config.mem_row_size_in_kb) {
1512 	case 1:
1513 	default:
1514 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1515 		break;
1516 	case 2:
1517 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1518 		break;
1519 	case 4:
1520 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1521 		break;
1522 	}
1523 	adev->gfx.config.gb_addr_config = gb_addr_config;
1524 }
1525 
1526 static int gfx_v8_0_sw_init(void *handle)
1527 {
1528 	int i, r;
1529 	struct amdgpu_ring *ring;
1530 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1531 
1532 	/* EOP Event */
1533 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1534 	if (r)
1535 		return r;
1536 
1537 	/* Privileged reg */
1538 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1539 	if (r)
1540 		return r;
1541 
1542 	/* Privileged inst */
1543 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1544 	if (r)
1545 		return r;
1546 
1547 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1548 
1549 	gfx_v8_0_scratch_init(adev);
1550 
1551 	r = gfx_v8_0_init_microcode(adev);
1552 	if (r) {
1553 		DRM_ERROR("Failed to load gfx firmware!\n");
1554 		return r;
1555 	}
1556 
1557 	r = gfx_v8_0_mec_init(adev);
1558 	if (r) {
1559 		DRM_ERROR("Failed to init MEC BOs!\n");
1560 		return r;
1561 	}
1562 
1563 	/* set up the gfx ring */
1564 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1565 		ring = &adev->gfx.gfx_ring[i];
1566 		ring->ring_obj = NULL;
1567 		sprintf(ring->name, "gfx");
1568 		/* no gfx doorbells on iceland */
1569 		if (adev->asic_type != CHIP_TOPAZ) {
1570 			ring->use_doorbell = true;
1571 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1572 		}
1573 
1574 		r = amdgpu_ring_init(adev, ring, 1024,
1575 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1576 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1577 				     AMDGPU_RING_TYPE_GFX);
1578 		if (r)
1579 			return r;
1580 	}
1581 
1582 	/* set up the compute queues */
1583 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1584 		unsigned irq_type;
1585 
1586 		/* max 32 queues per MEC */
1587 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1588 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1589 			break;
1590 		}
1591 		ring = &adev->gfx.compute_ring[i];
1592 		ring->ring_obj = NULL;
1593 		ring->use_doorbell = true;
1594 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1595 		ring->me = 1; /* first MEC */
1596 		ring->pipe = i / 8;
1597 		ring->queue = i % 8;
1598 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1599 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1600 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1601 		r = amdgpu_ring_init(adev, ring, 1024,
1602 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1603 				     &adev->gfx.eop_irq, irq_type,
1604 				     AMDGPU_RING_TYPE_COMPUTE);
1605 		if (r)
1606 			return r;
1607 	}
1608 
1609 	/* reserve GDS, GWS and OA resource for gfx */
1610 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1611 			PAGE_SIZE, true,
1612 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1613 			NULL, &adev->gds.gds_gfx_bo);
1614 	if (r)
1615 		return r;
1616 
1617 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1618 		PAGE_SIZE, true,
1619 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1620 		NULL, &adev->gds.gws_gfx_bo);
1621 	if (r)
1622 		return r;
1623 
1624 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1625 			PAGE_SIZE, true,
1626 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1627 			NULL, &adev->gds.oa_gfx_bo);
1628 	if (r)
1629 		return r;
1630 
1631 	adev->gfx.ce_ram_size = 0x8000;
1632 
1633 	gfx_v8_0_gpu_early_init(adev);
1634 
1635 	return 0;
1636 }
1637 
1638 static int gfx_v8_0_sw_fini(void *handle)
1639 {
1640 	int i;
1641 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1642 
1643 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1644 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1645 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1646 
1647 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1648 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1649 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1650 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1651 
1652 	gfx_v8_0_mec_fini(adev);
1653 
1654 	return 0;
1655 }
1656 
1657 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1658 {
1659 	uint32_t *modearray, *mod2array;
1660 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1661 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1662 	u32 reg_offset;
1663 
1664 	modearray = adev->gfx.config.tile_mode_array;
1665 	mod2array = adev->gfx.config.macrotile_mode_array;
1666 
1667 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1668 		modearray[reg_offset] = 0;
1669 
1670 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1671 		mod2array[reg_offset] = 0;
1672 
1673 	switch (adev->asic_type) {
1674 	case CHIP_TOPAZ:
1675 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1676 				PIPE_CONFIG(ADDR_SURF_P2) |
1677 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1678 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1679 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680 				PIPE_CONFIG(ADDR_SURF_P2) |
1681 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1682 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684 				PIPE_CONFIG(ADDR_SURF_P2) |
1685 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1686 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 				PIPE_CONFIG(ADDR_SURF_P2) |
1689 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1690 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692 				PIPE_CONFIG(ADDR_SURF_P2) |
1693 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1694 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1696 				PIPE_CONFIG(ADDR_SURF_P2) |
1697 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1700 				PIPE_CONFIG(ADDR_SURF_P2) |
1701 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1704 				PIPE_CONFIG(ADDR_SURF_P2));
1705 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1706 				PIPE_CONFIG(ADDR_SURF_P2) |
1707 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1708 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1709 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1710 				 PIPE_CONFIG(ADDR_SURF_P2) |
1711 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1714 				 PIPE_CONFIG(ADDR_SURF_P2) |
1715 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1717 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1718 				 PIPE_CONFIG(ADDR_SURF_P2) |
1719 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1720 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1721 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1722 				 PIPE_CONFIG(ADDR_SURF_P2) |
1723 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1726 				 PIPE_CONFIG(ADDR_SURF_P2) |
1727 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1730 				 PIPE_CONFIG(ADDR_SURF_P2) |
1731 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1733 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1734 				 PIPE_CONFIG(ADDR_SURF_P2) |
1735 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1737 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738 				 PIPE_CONFIG(ADDR_SURF_P2) |
1739 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1740 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1742 				 PIPE_CONFIG(ADDR_SURF_P2) |
1743 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1746 				 PIPE_CONFIG(ADDR_SURF_P2) |
1747 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1750 				 PIPE_CONFIG(ADDR_SURF_P2) |
1751 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1754 				 PIPE_CONFIG(ADDR_SURF_P2) |
1755 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1756 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1758 				 PIPE_CONFIG(ADDR_SURF_P2) |
1759 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1760 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1762 				 PIPE_CONFIG(ADDR_SURF_P2) |
1763 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1766 				 PIPE_CONFIG(ADDR_SURF_P2) |
1767 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1768 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1769 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 				 PIPE_CONFIG(ADDR_SURF_P2) |
1771 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1774 				 PIPE_CONFIG(ADDR_SURF_P2) |
1775 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1777 
1778 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1779 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1780 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1781 				NUM_BANKS(ADDR_SURF_8_BANK));
1782 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785 				NUM_BANKS(ADDR_SURF_8_BANK));
1786 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1787 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 				NUM_BANKS(ADDR_SURF_8_BANK));
1790 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1791 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1793 				NUM_BANKS(ADDR_SURF_8_BANK));
1794 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1796 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1797 				NUM_BANKS(ADDR_SURF_8_BANK));
1798 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1800 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 				NUM_BANKS(ADDR_SURF_8_BANK));
1802 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805 				NUM_BANKS(ADDR_SURF_8_BANK));
1806 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1807 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1808 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1809 				NUM_BANKS(ADDR_SURF_16_BANK));
1810 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 				NUM_BANKS(ADDR_SURF_16_BANK));
1814 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1815 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817 				 NUM_BANKS(ADDR_SURF_16_BANK));
1818 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1820 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821 				 NUM_BANKS(ADDR_SURF_16_BANK));
1822 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1823 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 				 NUM_BANKS(ADDR_SURF_16_BANK));
1826 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1828 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829 				 NUM_BANKS(ADDR_SURF_16_BANK));
1830 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1833 				 NUM_BANKS(ADDR_SURF_8_BANK));
1834 
1835 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1836 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1837 			    reg_offset != 23)
1838 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1839 
1840 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1841 			if (reg_offset != 7)
1842 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1843 
1844 		break;
1845 	case CHIP_FIJI:
1846 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1847 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1848 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1849 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1850 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1853 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1857 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1861 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1865 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1871 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1876 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1879 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1880 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1881 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1882 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1883 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1884 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1885 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1889 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1892 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1894 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1897 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1898 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1899 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1900 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1901 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1905 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1909 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1912 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1914 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1917 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1918 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1920 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1923 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1925 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1929 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1933 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1938 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1941 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1942 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1943 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1945 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1947 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1949 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1953 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1955 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1956 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1957 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1961 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1964 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1966 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968 
1969 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1971 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1972 				NUM_BANKS(ADDR_SURF_8_BANK));
1973 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976 				NUM_BANKS(ADDR_SURF_8_BANK));
1977 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980 				NUM_BANKS(ADDR_SURF_8_BANK));
1981 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984 				NUM_BANKS(ADDR_SURF_8_BANK));
1985 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1987 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1988 				NUM_BANKS(ADDR_SURF_8_BANK));
1989 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1991 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992 				NUM_BANKS(ADDR_SURF_8_BANK));
1993 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996 				NUM_BANKS(ADDR_SURF_8_BANK));
1997 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1999 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2000 				NUM_BANKS(ADDR_SURF_8_BANK));
2001 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2003 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004 				NUM_BANKS(ADDR_SURF_8_BANK));
2005 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2007 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2008 				 NUM_BANKS(ADDR_SURF_8_BANK));
2009 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2011 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012 				 NUM_BANKS(ADDR_SURF_8_BANK));
2013 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2015 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2016 				 NUM_BANKS(ADDR_SURF_8_BANK));
2017 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2019 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020 				 NUM_BANKS(ADDR_SURF_8_BANK));
2021 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2024 				 NUM_BANKS(ADDR_SURF_4_BANK));
2025 
2026 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2027 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2028 
2029 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2030 			if (reg_offset != 7)
2031 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2032 
2033 		break;
2034 	case CHIP_TONGA:
2035 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2036 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2037 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2038 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2039 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2042 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2046 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2050 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2054 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2056 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2060 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2065 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2068 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2069 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2070 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2071 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2072 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2074 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2078 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2081 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2083 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2087 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2088 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2094 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2098 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2101 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2103 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2106 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2107 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2109 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2112 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2114 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2118 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2122 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2127 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2130 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2131 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2132 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2134 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2136 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2138 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2142 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2144 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2150 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2153 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157 
2158 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2160 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2161 				NUM_BANKS(ADDR_SURF_16_BANK));
2162 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165 				NUM_BANKS(ADDR_SURF_16_BANK));
2166 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169 				NUM_BANKS(ADDR_SURF_16_BANK));
2170 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173 				NUM_BANKS(ADDR_SURF_16_BANK));
2174 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2176 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2177 				NUM_BANKS(ADDR_SURF_16_BANK));
2178 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2180 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2181 				NUM_BANKS(ADDR_SURF_16_BANK));
2182 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 				NUM_BANKS(ADDR_SURF_16_BANK));
2186 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2188 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2189 				NUM_BANKS(ADDR_SURF_16_BANK));
2190 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2192 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193 				NUM_BANKS(ADDR_SURF_16_BANK));
2194 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2196 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2197 				 NUM_BANKS(ADDR_SURF_16_BANK));
2198 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2200 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201 				 NUM_BANKS(ADDR_SURF_16_BANK));
2202 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2205 				 NUM_BANKS(ADDR_SURF_8_BANK));
2206 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 				 NUM_BANKS(ADDR_SURF_4_BANK));
2210 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213 				 NUM_BANKS(ADDR_SURF_4_BANK));
2214 
2215 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2216 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2217 
2218 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2219 			if (reg_offset != 7)
2220 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2221 
2222 		break;
2223 	case CHIP_STONEY:
2224 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 				PIPE_CONFIG(ADDR_SURF_P2) |
2226 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2227 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 				PIPE_CONFIG(ADDR_SURF_P2) |
2230 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 				PIPE_CONFIG(ADDR_SURF_P2) |
2234 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2235 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 				PIPE_CONFIG(ADDR_SURF_P2) |
2238 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2239 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 				PIPE_CONFIG(ADDR_SURF_P2) |
2242 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2243 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2245 				PIPE_CONFIG(ADDR_SURF_P2) |
2246 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249 				PIPE_CONFIG(ADDR_SURF_P2) |
2250 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2253 				PIPE_CONFIG(ADDR_SURF_P2));
2254 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255 				PIPE_CONFIG(ADDR_SURF_P2) |
2256 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2257 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 				 PIPE_CONFIG(ADDR_SURF_P2) |
2260 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263 				 PIPE_CONFIG(ADDR_SURF_P2) |
2264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2267 				 PIPE_CONFIG(ADDR_SURF_P2) |
2268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271 				 PIPE_CONFIG(ADDR_SURF_P2) |
2272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2275 				 PIPE_CONFIG(ADDR_SURF_P2) |
2276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 				 PIPE_CONFIG(ADDR_SURF_P2) |
2280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2283 				 PIPE_CONFIG(ADDR_SURF_P2) |
2284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 				 PIPE_CONFIG(ADDR_SURF_P2) |
2288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2291 				 PIPE_CONFIG(ADDR_SURF_P2) |
2292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2295 				 PIPE_CONFIG(ADDR_SURF_P2) |
2296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2299 				 PIPE_CONFIG(ADDR_SURF_P2) |
2300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2303 				 PIPE_CONFIG(ADDR_SURF_P2) |
2304 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2307 				 PIPE_CONFIG(ADDR_SURF_P2) |
2308 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2311 				 PIPE_CONFIG(ADDR_SURF_P2) |
2312 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2315 				 PIPE_CONFIG(ADDR_SURF_P2) |
2316 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 				 PIPE_CONFIG(ADDR_SURF_P2) |
2320 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 				 PIPE_CONFIG(ADDR_SURF_P2) |
2324 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326 
2327 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2330 				NUM_BANKS(ADDR_SURF_8_BANK));
2331 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334 				NUM_BANKS(ADDR_SURF_8_BANK));
2335 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 				NUM_BANKS(ADDR_SURF_8_BANK));
2339 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 				NUM_BANKS(ADDR_SURF_8_BANK));
2343 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 				NUM_BANKS(ADDR_SURF_8_BANK));
2347 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 				NUM_BANKS(ADDR_SURF_8_BANK));
2351 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 				NUM_BANKS(ADDR_SURF_8_BANK));
2355 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2356 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2358 				NUM_BANKS(ADDR_SURF_16_BANK));
2359 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 				NUM_BANKS(ADDR_SURF_16_BANK));
2363 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2364 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 				 NUM_BANKS(ADDR_SURF_16_BANK));
2367 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 				 NUM_BANKS(ADDR_SURF_16_BANK));
2371 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 				 NUM_BANKS(ADDR_SURF_16_BANK));
2375 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 				 NUM_BANKS(ADDR_SURF_16_BANK));
2379 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382 				 NUM_BANKS(ADDR_SURF_8_BANK));
2383 
2384 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2386 			    reg_offset != 23)
2387 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2388 
2389 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390 			if (reg_offset != 7)
2391 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2392 
2393 		break;
2394 	default:
2395 		dev_warn(adev->dev,
2396 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2397 			 adev->asic_type);
2398 
2399 	case CHIP_CARRIZO:
2400 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 				PIPE_CONFIG(ADDR_SURF_P2) |
2402 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 				PIPE_CONFIG(ADDR_SURF_P2) |
2406 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 				PIPE_CONFIG(ADDR_SURF_P2) |
2410 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 				PIPE_CONFIG(ADDR_SURF_P2) |
2414 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 				PIPE_CONFIG(ADDR_SURF_P2) |
2418 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421 				PIPE_CONFIG(ADDR_SURF_P2) |
2422 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 				PIPE_CONFIG(ADDR_SURF_P2) |
2426 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2429 				PIPE_CONFIG(ADDR_SURF_P2));
2430 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2431 				PIPE_CONFIG(ADDR_SURF_P2) |
2432 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2433 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 				 PIPE_CONFIG(ADDR_SURF_P2) |
2436 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 				 PIPE_CONFIG(ADDR_SURF_P2) |
2440 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 				 PIPE_CONFIG(ADDR_SURF_P2) |
2444 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 				 PIPE_CONFIG(ADDR_SURF_P2) |
2448 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2451 				 PIPE_CONFIG(ADDR_SURF_P2) |
2452 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455 				 PIPE_CONFIG(ADDR_SURF_P2) |
2456 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2458 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2459 				 PIPE_CONFIG(ADDR_SURF_P2) |
2460 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2462 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463 				 PIPE_CONFIG(ADDR_SURF_P2) |
2464 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2465 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2467 				 PIPE_CONFIG(ADDR_SURF_P2) |
2468 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2471 				 PIPE_CONFIG(ADDR_SURF_P2) |
2472 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2475 				 PIPE_CONFIG(ADDR_SURF_P2) |
2476 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479 				 PIPE_CONFIG(ADDR_SURF_P2) |
2480 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2483 				 PIPE_CONFIG(ADDR_SURF_P2) |
2484 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2487 				 PIPE_CONFIG(ADDR_SURF_P2) |
2488 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2491 				 PIPE_CONFIG(ADDR_SURF_P2) |
2492 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2493 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2494 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 				 PIPE_CONFIG(ADDR_SURF_P2) |
2496 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 				 PIPE_CONFIG(ADDR_SURF_P2) |
2500 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2502 
2503 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2506 				NUM_BANKS(ADDR_SURF_8_BANK));
2507 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 				NUM_BANKS(ADDR_SURF_8_BANK));
2511 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2514 				NUM_BANKS(ADDR_SURF_8_BANK));
2515 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 				NUM_BANKS(ADDR_SURF_8_BANK));
2519 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 				NUM_BANKS(ADDR_SURF_8_BANK));
2523 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 				NUM_BANKS(ADDR_SURF_8_BANK));
2527 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530 				NUM_BANKS(ADDR_SURF_8_BANK));
2531 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2532 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2533 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534 				NUM_BANKS(ADDR_SURF_16_BANK));
2535 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 				NUM_BANKS(ADDR_SURF_16_BANK));
2539 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2540 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 				 NUM_BANKS(ADDR_SURF_16_BANK));
2543 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 				 NUM_BANKS(ADDR_SURF_16_BANK));
2547 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550 				 NUM_BANKS(ADDR_SURF_16_BANK));
2551 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 				 NUM_BANKS(ADDR_SURF_16_BANK));
2555 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2558 				 NUM_BANKS(ADDR_SURF_8_BANK));
2559 
2560 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2561 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2562 			    reg_offset != 23)
2563 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2564 
2565 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2566 			if (reg_offset != 7)
2567 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2568 
2569 		break;
2570 	}
2571 }
2572 
2573 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2574 {
2575 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2576 
2577 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2578 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2579 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2580 	} else if (se_num == 0xffffffff) {
2581 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2582 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2583 	} else if (sh_num == 0xffffffff) {
2584 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2585 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2586 	} else {
2587 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2588 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2589 	}
2590 	WREG32(mmGRBM_GFX_INDEX, data);
2591 }
2592 
2593 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2594 {
2595 	return (u32)((1ULL << bit_width) - 1);
2596 }
2597 
2598 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2599 {
2600 	u32 data, mask;
2601 
2602 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2603 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2604 
2605 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2606 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2607 
2608 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2609 				       adev->gfx.config.max_sh_per_se);
2610 
2611 	return (~data) & mask;
2612 }
2613 
2614 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2615 {
2616 	int i, j;
2617 	u32 data;
2618 	u32 active_rbs = 0;
2619 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2620 					adev->gfx.config.max_sh_per_se;
2621 
2622 	mutex_lock(&adev->grbm_idx_mutex);
2623 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2624 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2625 			gfx_v8_0_select_se_sh(adev, i, j);
2626 			data = gfx_v8_0_get_rb_active_bitmap(adev);
2627 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2628 					       rb_bitmap_width_per_sh);
2629 		}
2630 	}
2631 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2632 	mutex_unlock(&adev->grbm_idx_mutex);
2633 
2634 	adev->gfx.config.backend_enable_mask = active_rbs;
2635 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2636 }
2637 
2638 /**
2639  * gfx_v8_0_init_compute_vmid - gart enable
2640  *
2641  * @rdev: amdgpu_device pointer
2642  *
2643  * Initialize compute vmid sh_mem registers
2644  *
2645  */
2646 #define DEFAULT_SH_MEM_BASES	(0x6000)
2647 #define FIRST_COMPUTE_VMID	(8)
2648 #define LAST_COMPUTE_VMID	(16)
2649 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2650 {
2651 	int i;
2652 	uint32_t sh_mem_config;
2653 	uint32_t sh_mem_bases;
2654 
2655 	/*
2656 	 * Configure apertures:
2657 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2658 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2659 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2660 	 */
2661 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2662 
2663 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2664 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2665 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2666 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2667 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2668 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2669 
2670 	mutex_lock(&adev->srbm_mutex);
2671 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2672 		vi_srbm_select(adev, 0, 0, 0, i);
2673 		/* CP and shaders */
2674 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2675 		WREG32(mmSH_MEM_APE1_BASE, 1);
2676 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2677 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2678 	}
2679 	vi_srbm_select(adev, 0, 0, 0, 0);
2680 	mutex_unlock(&adev->srbm_mutex);
2681 }
2682 
2683 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2684 {
2685 	u32 tmp;
2686 	int i;
2687 
2688 	tmp = RREG32(mmGRBM_CNTL);
2689 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2690 	WREG32(mmGRBM_CNTL, tmp);
2691 
2692 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2694 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2695 
2696 	gfx_v8_0_tiling_mode_table_init(adev);
2697 
2698 	gfx_v8_0_setup_rb(adev);
2699 
2700 	/* XXX SH_MEM regs */
2701 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2702 	mutex_lock(&adev->srbm_mutex);
2703 	for (i = 0; i < 16; i++) {
2704 		vi_srbm_select(adev, 0, 0, 0, i);
2705 		/* CP and shaders */
2706 		if (i == 0) {
2707 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2708 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2709 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2710 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2711 			WREG32(mmSH_MEM_CONFIG, tmp);
2712 		} else {
2713 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2714 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2715 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2716 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2717 			WREG32(mmSH_MEM_CONFIG, tmp);
2718 		}
2719 
2720 		WREG32(mmSH_MEM_APE1_BASE, 1);
2721 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2722 		WREG32(mmSH_MEM_BASES, 0);
2723 	}
2724 	vi_srbm_select(adev, 0, 0, 0, 0);
2725 	mutex_unlock(&adev->srbm_mutex);
2726 
2727 	gfx_v8_0_init_compute_vmid(adev);
2728 
2729 	mutex_lock(&adev->grbm_idx_mutex);
2730 	/*
2731 	 * making sure that the following register writes will be broadcasted
2732 	 * to all the shaders
2733 	 */
2734 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2735 
2736 	WREG32(mmPA_SC_FIFO_SIZE,
2737 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2738 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2739 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2740 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2741 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2742 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2743 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2744 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2745 	mutex_unlock(&adev->grbm_idx_mutex);
2746 
2747 }
2748 
2749 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2750 {
2751 	u32 i, j, k;
2752 	u32 mask;
2753 
2754 	mutex_lock(&adev->grbm_idx_mutex);
2755 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2756 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2757 			gfx_v8_0_select_se_sh(adev, i, j);
2758 			for (k = 0; k < adev->usec_timeout; k++) {
2759 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2760 					break;
2761 				udelay(1);
2762 			}
2763 		}
2764 	}
2765 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2766 	mutex_unlock(&adev->grbm_idx_mutex);
2767 
2768 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2769 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2770 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2771 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2772 	for (k = 0; k < adev->usec_timeout; k++) {
2773 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2774 			break;
2775 		udelay(1);
2776 	}
2777 }
2778 
2779 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2780 					       bool enable)
2781 {
2782 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2783 
2784 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2785 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2786 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2787 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2788 
2789 	WREG32(mmCP_INT_CNTL_RING0, tmp);
2790 }
2791 
2792 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2793 {
2794 	u32 tmp = RREG32(mmRLC_CNTL);
2795 
2796 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2797 	WREG32(mmRLC_CNTL, tmp);
2798 
2799 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2800 
2801 	gfx_v8_0_wait_for_rlc_serdes(adev);
2802 }
2803 
2804 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2805 {
2806 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2807 
2808 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2809 	WREG32(mmGRBM_SOFT_RESET, tmp);
2810 	udelay(50);
2811 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2812 	WREG32(mmGRBM_SOFT_RESET, tmp);
2813 	udelay(50);
2814 }
2815 
2816 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2817 {
2818 	u32 tmp = RREG32(mmRLC_CNTL);
2819 
2820 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2821 	WREG32(mmRLC_CNTL, tmp);
2822 
2823 	/* carrizo do enable cp interrupt after cp inited */
2824 	if (!(adev->flags & AMD_IS_APU))
2825 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2826 
2827 	udelay(50);
2828 }
2829 
2830 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2831 {
2832 	const struct rlc_firmware_header_v2_0 *hdr;
2833 	const __le32 *fw_data;
2834 	unsigned i, fw_size;
2835 
2836 	if (!adev->gfx.rlc_fw)
2837 		return -EINVAL;
2838 
2839 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2840 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2841 
2842 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2843 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2844 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2845 
2846 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2847 	for (i = 0; i < fw_size; i++)
2848 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2849 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2850 
2851 	return 0;
2852 }
2853 
2854 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2855 {
2856 	int r;
2857 
2858 	gfx_v8_0_rlc_stop(adev);
2859 
2860 	/* disable CG */
2861 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2862 
2863 	/* disable PG */
2864 	WREG32(mmRLC_PG_CNTL, 0);
2865 
2866 	gfx_v8_0_rlc_reset(adev);
2867 
2868 	if (!adev->pp_enabled) {
2869 		if (!adev->firmware.smu_load) {
2870 			/* legacy rlc firmware loading */
2871 			r = gfx_v8_0_rlc_load_microcode(adev);
2872 			if (r)
2873 				return r;
2874 		} else {
2875 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2876 							AMDGPU_UCODE_ID_RLC_G);
2877 			if (r)
2878 				return -EINVAL;
2879 		}
2880 	}
2881 
2882 	gfx_v8_0_rlc_start(adev);
2883 
2884 	return 0;
2885 }
2886 
2887 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2888 {
2889 	int i;
2890 	u32 tmp = RREG32(mmCP_ME_CNTL);
2891 
2892 	if (enable) {
2893 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2894 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2895 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2896 	} else {
2897 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2898 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2899 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2900 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2901 			adev->gfx.gfx_ring[i].ready = false;
2902 	}
2903 	WREG32(mmCP_ME_CNTL, tmp);
2904 	udelay(50);
2905 }
2906 
2907 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2908 {
2909 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2910 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2911 	const struct gfx_firmware_header_v1_0 *me_hdr;
2912 	const __le32 *fw_data;
2913 	unsigned i, fw_size;
2914 
2915 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2916 		return -EINVAL;
2917 
2918 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2919 		adev->gfx.pfp_fw->data;
2920 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2921 		adev->gfx.ce_fw->data;
2922 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2923 		adev->gfx.me_fw->data;
2924 
2925 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2926 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2927 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2928 
2929 	gfx_v8_0_cp_gfx_enable(adev, false);
2930 
2931 	/* PFP */
2932 	fw_data = (const __le32 *)
2933 		(adev->gfx.pfp_fw->data +
2934 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2935 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2936 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2937 	for (i = 0; i < fw_size; i++)
2938 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2939 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2940 
2941 	/* CE */
2942 	fw_data = (const __le32 *)
2943 		(adev->gfx.ce_fw->data +
2944 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2945 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2946 	WREG32(mmCP_CE_UCODE_ADDR, 0);
2947 	for (i = 0; i < fw_size; i++)
2948 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2949 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2950 
2951 	/* ME */
2952 	fw_data = (const __le32 *)
2953 		(adev->gfx.me_fw->data +
2954 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2955 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2956 	WREG32(mmCP_ME_RAM_WADDR, 0);
2957 	for (i = 0; i < fw_size; i++)
2958 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2959 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2960 
2961 	return 0;
2962 }
2963 
2964 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2965 {
2966 	u32 count = 0;
2967 	const struct cs_section_def *sect = NULL;
2968 	const struct cs_extent_def *ext = NULL;
2969 
2970 	/* begin clear state */
2971 	count += 2;
2972 	/* context control state */
2973 	count += 3;
2974 
2975 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2976 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2977 			if (sect->id == SECT_CONTEXT)
2978 				count += 2 + ext->reg_count;
2979 			else
2980 				return 0;
2981 		}
2982 	}
2983 	/* pa_sc_raster_config/pa_sc_raster_config1 */
2984 	count += 4;
2985 	/* end clear state */
2986 	count += 2;
2987 	/* clear state */
2988 	count += 2;
2989 
2990 	return count;
2991 }
2992 
2993 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2994 {
2995 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2996 	const struct cs_section_def *sect = NULL;
2997 	const struct cs_extent_def *ext = NULL;
2998 	int r, i;
2999 
3000 	/* init the CP */
3001 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3002 	WREG32(mmCP_ENDIAN_SWAP, 0);
3003 	WREG32(mmCP_DEVICE_ID, 1);
3004 
3005 	gfx_v8_0_cp_gfx_enable(adev, true);
3006 
3007 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3008 	if (r) {
3009 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3010 		return r;
3011 	}
3012 
3013 	/* clear state buffer */
3014 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3015 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3016 
3017 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3018 	amdgpu_ring_write(ring, 0x80000000);
3019 	amdgpu_ring_write(ring, 0x80000000);
3020 
3021 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3022 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3023 			if (sect->id == SECT_CONTEXT) {
3024 				amdgpu_ring_write(ring,
3025 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3026 					       ext->reg_count));
3027 				amdgpu_ring_write(ring,
3028 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3029 				for (i = 0; i < ext->reg_count; i++)
3030 					amdgpu_ring_write(ring, ext->extent[i]);
3031 			}
3032 		}
3033 	}
3034 
3035 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3036 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3037 	switch (adev->asic_type) {
3038 	case CHIP_TONGA:
3039 		amdgpu_ring_write(ring, 0x16000012);
3040 		amdgpu_ring_write(ring, 0x0000002A);
3041 		break;
3042 	case CHIP_FIJI:
3043 		amdgpu_ring_write(ring, 0x3a00161a);
3044 		amdgpu_ring_write(ring, 0x0000002e);
3045 		break;
3046 	case CHIP_TOPAZ:
3047 	case CHIP_CARRIZO:
3048 		amdgpu_ring_write(ring, 0x00000002);
3049 		amdgpu_ring_write(ring, 0x00000000);
3050 		break;
3051 	case CHIP_STONEY:
3052 		amdgpu_ring_write(ring, 0x00000000);
3053 		amdgpu_ring_write(ring, 0x00000000);
3054 		break;
3055 	default:
3056 		BUG();
3057 	}
3058 
3059 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3060 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3061 
3062 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3063 	amdgpu_ring_write(ring, 0);
3064 
3065 	/* init the CE partitions */
3066 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3067 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3068 	amdgpu_ring_write(ring, 0x8000);
3069 	amdgpu_ring_write(ring, 0x8000);
3070 
3071 	amdgpu_ring_commit(ring);
3072 
3073 	return 0;
3074 }
3075 
3076 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3077 {
3078 	struct amdgpu_ring *ring;
3079 	u32 tmp;
3080 	u32 rb_bufsz;
3081 	u64 rb_addr, rptr_addr;
3082 	int r;
3083 
3084 	/* Set the write pointer delay */
3085 	WREG32(mmCP_RB_WPTR_DELAY, 0);
3086 
3087 	/* set the RB to use vmid 0 */
3088 	WREG32(mmCP_RB_VMID, 0);
3089 
3090 	/* Set ring buffer size */
3091 	ring = &adev->gfx.gfx_ring[0];
3092 	rb_bufsz = order_base_2(ring->ring_size / 8);
3093 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3094 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3095 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3096 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3097 #ifdef __BIG_ENDIAN
3098 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3099 #endif
3100 	WREG32(mmCP_RB0_CNTL, tmp);
3101 
3102 	/* Initialize the ring buffer's read and write pointers */
3103 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3104 	ring->wptr = 0;
3105 	WREG32(mmCP_RB0_WPTR, ring->wptr);
3106 
3107 	/* set the wb address wether it's enabled or not */
3108 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3109 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3110 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3111 
3112 	mdelay(1);
3113 	WREG32(mmCP_RB0_CNTL, tmp);
3114 
3115 	rb_addr = ring->gpu_addr >> 8;
3116 	WREG32(mmCP_RB0_BASE, rb_addr);
3117 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3118 
3119 	/* no gfx doorbells on iceland */
3120 	if (adev->asic_type != CHIP_TOPAZ) {
3121 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3122 		if (ring->use_doorbell) {
3123 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3124 					    DOORBELL_OFFSET, ring->doorbell_index);
3125 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3126 					    DOORBELL_EN, 1);
3127 		} else {
3128 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3129 					    DOORBELL_EN, 0);
3130 		}
3131 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3132 
3133 		if (adev->asic_type == CHIP_TONGA) {
3134 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3135 					    DOORBELL_RANGE_LOWER,
3136 					    AMDGPU_DOORBELL_GFX_RING0);
3137 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3138 
3139 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3140 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3141 		}
3142 
3143 	}
3144 
3145 	/* start the ring */
3146 	gfx_v8_0_cp_gfx_start(adev);
3147 	ring->ready = true;
3148 	r = amdgpu_ring_test_ring(ring);
3149 	if (r) {
3150 		ring->ready = false;
3151 		return r;
3152 	}
3153 
3154 	return 0;
3155 }
3156 
3157 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3158 {
3159 	int i;
3160 
3161 	if (enable) {
3162 		WREG32(mmCP_MEC_CNTL, 0);
3163 	} else {
3164 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3165 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3166 			adev->gfx.compute_ring[i].ready = false;
3167 	}
3168 	udelay(50);
3169 }
3170 
3171 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3172 {
3173 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3174 	const __le32 *fw_data;
3175 	unsigned i, fw_size;
3176 
3177 	if (!adev->gfx.mec_fw)
3178 		return -EINVAL;
3179 
3180 	gfx_v8_0_cp_compute_enable(adev, false);
3181 
3182 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3183 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3184 
3185 	fw_data = (const __le32 *)
3186 		(adev->gfx.mec_fw->data +
3187 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3188 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3189 
3190 	/* MEC1 */
3191 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3192 	for (i = 0; i < fw_size; i++)
3193 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3194 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3195 
3196 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3197 	if (adev->gfx.mec2_fw) {
3198 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3199 
3200 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3201 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3202 
3203 		fw_data = (const __le32 *)
3204 			(adev->gfx.mec2_fw->data +
3205 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3206 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3207 
3208 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3209 		for (i = 0; i < fw_size; i++)
3210 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3211 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3212 	}
3213 
3214 	return 0;
3215 }
3216 
3217 struct vi_mqd {
3218 	uint32_t header;  /* ordinal0 */
3219 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3220 	uint32_t compute_dim_x;  /* ordinal2 */
3221 	uint32_t compute_dim_y;  /* ordinal3 */
3222 	uint32_t compute_dim_z;  /* ordinal4 */
3223 	uint32_t compute_start_x;  /* ordinal5 */
3224 	uint32_t compute_start_y;  /* ordinal6 */
3225 	uint32_t compute_start_z;  /* ordinal7 */
3226 	uint32_t compute_num_thread_x;  /* ordinal8 */
3227 	uint32_t compute_num_thread_y;  /* ordinal9 */
3228 	uint32_t compute_num_thread_z;  /* ordinal10 */
3229 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3230 	uint32_t compute_perfcount_enable;  /* ordinal12 */
3231 	uint32_t compute_pgm_lo;  /* ordinal13 */
3232 	uint32_t compute_pgm_hi;  /* ordinal14 */
3233 	uint32_t compute_tba_lo;  /* ordinal15 */
3234 	uint32_t compute_tba_hi;  /* ordinal16 */
3235 	uint32_t compute_tma_lo;  /* ordinal17 */
3236 	uint32_t compute_tma_hi;  /* ordinal18 */
3237 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3238 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3239 	uint32_t compute_vmid;  /* ordinal21 */
3240 	uint32_t compute_resource_limits;  /* ordinal22 */
3241 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3242 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3243 	uint32_t compute_tmpring_size;  /* ordinal25 */
3244 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3245 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3246 	uint32_t compute_restart_x;  /* ordinal28 */
3247 	uint32_t compute_restart_y;  /* ordinal29 */
3248 	uint32_t compute_restart_z;  /* ordinal30 */
3249 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3250 	uint32_t compute_misc_reserved;  /* ordinal32 */
3251 	uint32_t compute_dispatch_id;  /* ordinal33 */
3252 	uint32_t compute_threadgroup_id;  /* ordinal34 */
3253 	uint32_t compute_relaunch;  /* ordinal35 */
3254 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3255 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3256 	uint32_t compute_wave_restore_control;  /* ordinal38 */
3257 	uint32_t reserved9;  /* ordinal39 */
3258 	uint32_t reserved10;  /* ordinal40 */
3259 	uint32_t reserved11;  /* ordinal41 */
3260 	uint32_t reserved12;  /* ordinal42 */
3261 	uint32_t reserved13;  /* ordinal43 */
3262 	uint32_t reserved14;  /* ordinal44 */
3263 	uint32_t reserved15;  /* ordinal45 */
3264 	uint32_t reserved16;  /* ordinal46 */
3265 	uint32_t reserved17;  /* ordinal47 */
3266 	uint32_t reserved18;  /* ordinal48 */
3267 	uint32_t reserved19;  /* ordinal49 */
3268 	uint32_t reserved20;  /* ordinal50 */
3269 	uint32_t reserved21;  /* ordinal51 */
3270 	uint32_t reserved22;  /* ordinal52 */
3271 	uint32_t reserved23;  /* ordinal53 */
3272 	uint32_t reserved24;  /* ordinal54 */
3273 	uint32_t reserved25;  /* ordinal55 */
3274 	uint32_t reserved26;  /* ordinal56 */
3275 	uint32_t reserved27;  /* ordinal57 */
3276 	uint32_t reserved28;  /* ordinal58 */
3277 	uint32_t reserved29;  /* ordinal59 */
3278 	uint32_t reserved30;  /* ordinal60 */
3279 	uint32_t reserved31;  /* ordinal61 */
3280 	uint32_t reserved32;  /* ordinal62 */
3281 	uint32_t reserved33;  /* ordinal63 */
3282 	uint32_t reserved34;  /* ordinal64 */
3283 	uint32_t compute_user_data_0;  /* ordinal65 */
3284 	uint32_t compute_user_data_1;  /* ordinal66 */
3285 	uint32_t compute_user_data_2;  /* ordinal67 */
3286 	uint32_t compute_user_data_3;  /* ordinal68 */
3287 	uint32_t compute_user_data_4;  /* ordinal69 */
3288 	uint32_t compute_user_data_5;  /* ordinal70 */
3289 	uint32_t compute_user_data_6;  /* ordinal71 */
3290 	uint32_t compute_user_data_7;  /* ordinal72 */
3291 	uint32_t compute_user_data_8;  /* ordinal73 */
3292 	uint32_t compute_user_data_9;  /* ordinal74 */
3293 	uint32_t compute_user_data_10;  /* ordinal75 */
3294 	uint32_t compute_user_data_11;  /* ordinal76 */
3295 	uint32_t compute_user_data_12;  /* ordinal77 */
3296 	uint32_t compute_user_data_13;  /* ordinal78 */
3297 	uint32_t compute_user_data_14;  /* ordinal79 */
3298 	uint32_t compute_user_data_15;  /* ordinal80 */
3299 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3300 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3301 	uint32_t reserved35;  /* ordinal83 */
3302 	uint32_t reserved36;  /* ordinal84 */
3303 	uint32_t reserved37;  /* ordinal85 */
3304 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3305 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3306 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3307 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3308 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3309 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3310 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3311 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3312 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3313 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3314 	uint32_t reserved38;  /* ordinal96 */
3315 	uint32_t reserved39;  /* ordinal97 */
3316 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3317 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3318 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3319 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3320 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3321 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3322 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3323 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3324 	uint32_t reserved40;  /* ordinal106 */
3325 	uint32_t reserved41;  /* ordinal107 */
3326 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3327 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3328 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3329 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3330 	uint32_t reserved42;  /* ordinal112 */
3331 	uint32_t reserved43;  /* ordinal113 */
3332 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3333 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3334 	uint32_t cp_packet_id_lo;  /* ordinal116 */
3335 	uint32_t cp_packet_id_hi;  /* ordinal117 */
3336 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3337 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3338 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3339 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3340 	uint32_t gds_save_mask_lo;  /* ordinal122 */
3341 	uint32_t gds_save_mask_hi;  /* ordinal123 */
3342 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3343 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3344 	uint32_t reserved44;  /* ordinal126 */
3345 	uint32_t reserved45;  /* ordinal127 */
3346 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3347 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3348 	uint32_t cp_hqd_active;  /* ordinal130 */
3349 	uint32_t cp_hqd_vmid;  /* ordinal131 */
3350 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3351 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3352 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3353 	uint32_t cp_hqd_quantum;  /* ordinal135 */
3354 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3355 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3356 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3357 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3358 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3359 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3360 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3361 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3362 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3363 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3364 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3365 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3366 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3367 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3368 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3369 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3370 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3371 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3372 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3373 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3374 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3375 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3376 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3377 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3378 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3379 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3380 	uint32_t cp_mqd_control;  /* ordinal162 */
3381 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3382 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3383 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3384 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3385 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3386 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3387 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3388 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3389 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3390 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3391 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3392 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3393 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3394 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3395 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3396 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3397 	uint32_t cp_hqd_error;  /* ordinal179 */
3398 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3399 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3400 	uint32_t reserved46;  /* ordinal182 */
3401 	uint32_t reserved47;  /* ordinal183 */
3402 	uint32_t reserved48;  /* ordinal184 */
3403 	uint32_t reserved49;  /* ordinal185 */
3404 	uint32_t reserved50;  /* ordinal186 */
3405 	uint32_t reserved51;  /* ordinal187 */
3406 	uint32_t reserved52;  /* ordinal188 */
3407 	uint32_t reserved53;  /* ordinal189 */
3408 	uint32_t reserved54;  /* ordinal190 */
3409 	uint32_t reserved55;  /* ordinal191 */
3410 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3411 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3412 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3413 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3414 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3415 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3416 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3417 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3418 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3419 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3420 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3421 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3422 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3423 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3424 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3425 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3426 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3427 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3428 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3429 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3430 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3431 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3432 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3433 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3434 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3435 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3436 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3437 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3438 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3439 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3440 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3441 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3442 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3443 	uint32_t reserved56;  /* ordinal225 */
3444 	uint32_t reserved57;  /* ordinal226 */
3445 	uint32_t reserved58;  /* ordinal227 */
3446 	uint32_t set_resources_header;  /* ordinal228 */
3447 	uint32_t set_resources_dw1;  /* ordinal229 */
3448 	uint32_t set_resources_dw2;  /* ordinal230 */
3449 	uint32_t set_resources_dw3;  /* ordinal231 */
3450 	uint32_t set_resources_dw4;  /* ordinal232 */
3451 	uint32_t set_resources_dw5;  /* ordinal233 */
3452 	uint32_t set_resources_dw6;  /* ordinal234 */
3453 	uint32_t set_resources_dw7;  /* ordinal235 */
3454 	uint32_t reserved59;  /* ordinal236 */
3455 	uint32_t reserved60;  /* ordinal237 */
3456 	uint32_t reserved61;  /* ordinal238 */
3457 	uint32_t reserved62;  /* ordinal239 */
3458 	uint32_t reserved63;  /* ordinal240 */
3459 	uint32_t reserved64;  /* ordinal241 */
3460 	uint32_t reserved65;  /* ordinal242 */
3461 	uint32_t reserved66;  /* ordinal243 */
3462 	uint32_t reserved67;  /* ordinal244 */
3463 	uint32_t reserved68;  /* ordinal245 */
3464 	uint32_t reserved69;  /* ordinal246 */
3465 	uint32_t reserved70;  /* ordinal247 */
3466 	uint32_t reserved71;  /* ordinal248 */
3467 	uint32_t reserved72;  /* ordinal249 */
3468 	uint32_t reserved73;  /* ordinal250 */
3469 	uint32_t reserved74;  /* ordinal251 */
3470 	uint32_t reserved75;  /* ordinal252 */
3471 	uint32_t reserved76;  /* ordinal253 */
3472 	uint32_t reserved77;  /* ordinal254 */
3473 	uint32_t reserved78;  /* ordinal255 */
3474 
3475 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3476 };
3477 
3478 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3479 {
3480 	int i, r;
3481 
3482 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3483 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3484 
3485 		if (ring->mqd_obj) {
3486 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3487 			if (unlikely(r != 0))
3488 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3489 
3490 			amdgpu_bo_unpin(ring->mqd_obj);
3491 			amdgpu_bo_unreserve(ring->mqd_obj);
3492 
3493 			amdgpu_bo_unref(&ring->mqd_obj);
3494 			ring->mqd_obj = NULL;
3495 		}
3496 	}
3497 }
3498 
3499 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3500 {
3501 	int r, i, j;
3502 	u32 tmp;
3503 	bool use_doorbell = true;
3504 	u64 hqd_gpu_addr;
3505 	u64 mqd_gpu_addr;
3506 	u64 eop_gpu_addr;
3507 	u64 wb_gpu_addr;
3508 	u32 *buf;
3509 	struct vi_mqd *mqd;
3510 
3511 	/* init the pipes */
3512 	mutex_lock(&adev->srbm_mutex);
3513 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3514 		int me = (i < 4) ? 1 : 2;
3515 		int pipe = (i < 4) ? i : (i - 4);
3516 
3517 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3518 		eop_gpu_addr >>= 8;
3519 
3520 		vi_srbm_select(adev, me, pipe, 0, 0);
3521 
3522 		/* write the EOP addr */
3523 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3524 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3525 
3526 		/* set the VMID assigned */
3527 		WREG32(mmCP_HQD_VMID, 0);
3528 
3529 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3530 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3531 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3532 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3533 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3534 	}
3535 	vi_srbm_select(adev, 0, 0, 0, 0);
3536 	mutex_unlock(&adev->srbm_mutex);
3537 
3538 	/* init the queues.  Just two for now. */
3539 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3540 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3541 
3542 		if (ring->mqd_obj == NULL) {
3543 			r = amdgpu_bo_create(adev,
3544 					     sizeof(struct vi_mqd),
3545 					     PAGE_SIZE, true,
3546 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3547 					     NULL, &ring->mqd_obj);
3548 			if (r) {
3549 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3550 				return r;
3551 			}
3552 		}
3553 
3554 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3555 		if (unlikely(r != 0)) {
3556 			gfx_v8_0_cp_compute_fini(adev);
3557 			return r;
3558 		}
3559 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3560 				  &mqd_gpu_addr);
3561 		if (r) {
3562 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3563 			gfx_v8_0_cp_compute_fini(adev);
3564 			return r;
3565 		}
3566 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3567 		if (r) {
3568 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3569 			gfx_v8_0_cp_compute_fini(adev);
3570 			return r;
3571 		}
3572 
3573 		/* init the mqd struct */
3574 		memset(buf, 0, sizeof(struct vi_mqd));
3575 
3576 		mqd = (struct vi_mqd *)buf;
3577 		mqd->header = 0xC0310800;
3578 		mqd->compute_pipelinestat_enable = 0x00000001;
3579 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3580 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3581 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3582 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3583 		mqd->compute_misc_reserved = 0x00000003;
3584 
3585 		mutex_lock(&adev->srbm_mutex);
3586 		vi_srbm_select(adev, ring->me,
3587 			       ring->pipe,
3588 			       ring->queue, 0);
3589 
3590 		/* disable wptr polling */
3591 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3592 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3593 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3594 
3595 		mqd->cp_hqd_eop_base_addr_lo =
3596 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3597 		mqd->cp_hqd_eop_base_addr_hi =
3598 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3599 
3600 		/* enable doorbell? */
3601 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3602 		if (use_doorbell) {
3603 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3604 		} else {
3605 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3606 		}
3607 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3608 		mqd->cp_hqd_pq_doorbell_control = tmp;
3609 
3610 		/* disable the queue if it's active */
3611 		mqd->cp_hqd_dequeue_request = 0;
3612 		mqd->cp_hqd_pq_rptr = 0;
3613 		mqd->cp_hqd_pq_wptr= 0;
3614 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3615 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3616 			for (j = 0; j < adev->usec_timeout; j++) {
3617 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3618 					break;
3619 				udelay(1);
3620 			}
3621 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3622 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3623 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3624 		}
3625 
3626 		/* set the pointer to the MQD */
3627 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3628 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3629 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3630 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3631 
3632 		/* set MQD vmid to 0 */
3633 		tmp = RREG32(mmCP_MQD_CONTROL);
3634 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3635 		WREG32(mmCP_MQD_CONTROL, tmp);
3636 		mqd->cp_mqd_control = tmp;
3637 
3638 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3639 		hqd_gpu_addr = ring->gpu_addr >> 8;
3640 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3641 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3642 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3643 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3644 
3645 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3646 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3647 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3648 				    (order_base_2(ring->ring_size / 4) - 1));
3649 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3650 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3651 #ifdef __BIG_ENDIAN
3652 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3653 #endif
3654 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3655 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3656 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3657 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3658 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3659 		mqd->cp_hqd_pq_control = tmp;
3660 
3661 		/* set the wb address wether it's enabled or not */
3662 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3663 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3664 		mqd->cp_hqd_pq_rptr_report_addr_hi =
3665 			upper_32_bits(wb_gpu_addr) & 0xffff;
3666 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3667 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3668 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3669 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3670 
3671 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3672 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3673 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3674 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3675 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3676 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3677 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3678 
3679 		/* enable the doorbell if requested */
3680 		if (use_doorbell) {
3681 			if ((adev->asic_type == CHIP_CARRIZO) ||
3682 			    (adev->asic_type == CHIP_FIJI) ||
3683 			    (adev->asic_type == CHIP_STONEY)) {
3684 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3685 				       AMDGPU_DOORBELL_KIQ << 2);
3686 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3687 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3688 			}
3689 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3690 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3691 					    DOORBELL_OFFSET, ring->doorbell_index);
3692 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3693 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3694 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3695 			mqd->cp_hqd_pq_doorbell_control = tmp;
3696 
3697 		} else {
3698 			mqd->cp_hqd_pq_doorbell_control = 0;
3699 		}
3700 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3701 		       mqd->cp_hqd_pq_doorbell_control);
3702 
3703 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3704 		ring->wptr = 0;
3705 		mqd->cp_hqd_pq_wptr = ring->wptr;
3706 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3707 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3708 
3709 		/* set the vmid for the queue */
3710 		mqd->cp_hqd_vmid = 0;
3711 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3712 
3713 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3714 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3715 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3716 		mqd->cp_hqd_persistent_state = tmp;
3717 		if (adev->asic_type == CHIP_STONEY) {
3718 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3719 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3720 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3721 		}
3722 
3723 		/* activate the queue */
3724 		mqd->cp_hqd_active = 1;
3725 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3726 
3727 		vi_srbm_select(adev, 0, 0, 0, 0);
3728 		mutex_unlock(&adev->srbm_mutex);
3729 
3730 		amdgpu_bo_kunmap(ring->mqd_obj);
3731 		amdgpu_bo_unreserve(ring->mqd_obj);
3732 	}
3733 
3734 	if (use_doorbell) {
3735 		tmp = RREG32(mmCP_PQ_STATUS);
3736 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3737 		WREG32(mmCP_PQ_STATUS, tmp);
3738 	}
3739 
3740 	gfx_v8_0_cp_compute_enable(adev, true);
3741 
3742 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3743 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3744 
3745 		ring->ready = true;
3746 		r = amdgpu_ring_test_ring(ring);
3747 		if (r)
3748 			ring->ready = false;
3749 	}
3750 
3751 	return 0;
3752 }
3753 
3754 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3755 {
3756 	int r;
3757 
3758 	if (!(adev->flags & AMD_IS_APU))
3759 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3760 
3761 	if (!adev->pp_enabled) {
3762 		if (!adev->firmware.smu_load) {
3763 			/* legacy firmware loading */
3764 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3765 			if (r)
3766 				return r;
3767 
3768 			r = gfx_v8_0_cp_compute_load_microcode(adev);
3769 			if (r)
3770 				return r;
3771 		} else {
3772 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3773 							AMDGPU_UCODE_ID_CP_CE);
3774 			if (r)
3775 				return -EINVAL;
3776 
3777 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3778 							AMDGPU_UCODE_ID_CP_PFP);
3779 			if (r)
3780 				return -EINVAL;
3781 
3782 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3783 							AMDGPU_UCODE_ID_CP_ME);
3784 			if (r)
3785 				return -EINVAL;
3786 
3787 			if (adev->asic_type == CHIP_TOPAZ) {
3788 				r = gfx_v8_0_cp_compute_load_microcode(adev);
3789 				if (r)
3790 					return r;
3791 			} else {
3792 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3793 										 AMDGPU_UCODE_ID_CP_MEC1);
3794 				if (r)
3795 					return -EINVAL;
3796 			}
3797 		}
3798 	}
3799 
3800 	r = gfx_v8_0_cp_gfx_resume(adev);
3801 	if (r)
3802 		return r;
3803 
3804 	r = gfx_v8_0_cp_compute_resume(adev);
3805 	if (r)
3806 		return r;
3807 
3808 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3809 
3810 	return 0;
3811 }
3812 
3813 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3814 {
3815 	gfx_v8_0_cp_gfx_enable(adev, enable);
3816 	gfx_v8_0_cp_compute_enable(adev, enable);
3817 }
3818 
3819 static int gfx_v8_0_hw_init(void *handle)
3820 {
3821 	int r;
3822 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3823 
3824 	gfx_v8_0_init_golden_registers(adev);
3825 
3826 	gfx_v8_0_gpu_init(adev);
3827 
3828 	r = gfx_v8_0_rlc_resume(adev);
3829 	if (r)
3830 		return r;
3831 
3832 	r = gfx_v8_0_cp_resume(adev);
3833 	if (r)
3834 		return r;
3835 
3836 	return r;
3837 }
3838 
3839 static int gfx_v8_0_hw_fini(void *handle)
3840 {
3841 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3842 
3843 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3844 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3845 	gfx_v8_0_cp_enable(adev, false);
3846 	gfx_v8_0_rlc_stop(adev);
3847 	gfx_v8_0_cp_compute_fini(adev);
3848 
3849 	return 0;
3850 }
3851 
3852 static int gfx_v8_0_suspend(void *handle)
3853 {
3854 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3855 
3856 	return gfx_v8_0_hw_fini(adev);
3857 }
3858 
3859 static int gfx_v8_0_resume(void *handle)
3860 {
3861 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3862 
3863 	return gfx_v8_0_hw_init(adev);
3864 }
3865 
3866 static bool gfx_v8_0_is_idle(void *handle)
3867 {
3868 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3869 
3870 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3871 		return false;
3872 	else
3873 		return true;
3874 }
3875 
3876 static int gfx_v8_0_wait_for_idle(void *handle)
3877 {
3878 	unsigned i;
3879 	u32 tmp;
3880 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881 
3882 	for (i = 0; i < adev->usec_timeout; i++) {
3883 		/* read MC_STATUS */
3884 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3885 
3886 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3887 			return 0;
3888 		udelay(1);
3889 	}
3890 	return -ETIMEDOUT;
3891 }
3892 
3893 static void gfx_v8_0_print_status(void *handle)
3894 {
3895 	int i;
3896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3897 
3898 	dev_info(adev->dev, "GFX 8.x registers\n");
3899 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3900 		 RREG32(mmGRBM_STATUS));
3901 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3902 		 RREG32(mmGRBM_STATUS2));
3903 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3904 		 RREG32(mmGRBM_STATUS_SE0));
3905 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3906 		 RREG32(mmGRBM_STATUS_SE1));
3907 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3908 		 RREG32(mmGRBM_STATUS_SE2));
3909 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3910 		 RREG32(mmGRBM_STATUS_SE3));
3911 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3912 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3913 		 RREG32(mmCP_STALLED_STAT1));
3914 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3915 		 RREG32(mmCP_STALLED_STAT2));
3916 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3917 		 RREG32(mmCP_STALLED_STAT3));
3918 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3919 		 RREG32(mmCP_CPF_BUSY_STAT));
3920 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3921 		 RREG32(mmCP_CPF_STALLED_STAT1));
3922 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3923 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3924 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3925 		 RREG32(mmCP_CPC_STALLED_STAT1));
3926 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3927 
3928 	for (i = 0; i < 32; i++) {
3929 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3930 			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3931 	}
3932 	for (i = 0; i < 16; i++) {
3933 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3934 			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3935 	}
3936 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3937 		dev_info(adev->dev, "  se: %d\n", i);
3938 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3939 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3940 			 RREG32(mmPA_SC_RASTER_CONFIG));
3941 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3942 			 RREG32(mmPA_SC_RASTER_CONFIG_1));
3943 	}
3944 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3945 
3946 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3947 		 RREG32(mmGB_ADDR_CONFIG));
3948 	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3949 		 RREG32(mmHDP_ADDR_CONFIG));
3950 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3951 		 RREG32(mmDMIF_ADDR_CALC));
3952 
3953 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3954 		 RREG32(mmCP_MEQ_THRESHOLDS));
3955 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3956 		 RREG32(mmSX_DEBUG_1));
3957 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3958 		 RREG32(mmTA_CNTL_AUX));
3959 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3960 		 RREG32(mmSPI_CONFIG_CNTL));
3961 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3962 		 RREG32(mmSQ_CONFIG));
3963 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3964 		 RREG32(mmDB_DEBUG));
3965 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3966 		 RREG32(mmDB_DEBUG2));
3967 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3968 		 RREG32(mmDB_DEBUG3));
3969 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3970 		 RREG32(mmCB_HW_CONTROL));
3971 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3972 		 RREG32(mmSPI_CONFIG_CNTL_1));
3973 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3974 		 RREG32(mmPA_SC_FIFO_SIZE));
3975 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3976 		 RREG32(mmVGT_NUM_INSTANCES));
3977 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3978 		 RREG32(mmCP_PERFMON_CNTL));
3979 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3980 		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3981 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3982 		 RREG32(mmVGT_CACHE_INVALIDATION));
3983 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3984 		 RREG32(mmVGT_GS_VERTEX_REUSE));
3985 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3986 		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3987 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3988 		 RREG32(mmPA_CL_ENHANCE));
3989 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3990 		 RREG32(mmPA_SC_ENHANCE));
3991 
3992 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3993 		 RREG32(mmCP_ME_CNTL));
3994 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3995 		 RREG32(mmCP_MAX_CONTEXT));
3996 	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3997 		 RREG32(mmCP_ENDIAN_SWAP));
3998 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3999 		 RREG32(mmCP_DEVICE_ID));
4000 
4001 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4002 		 RREG32(mmCP_SEM_WAIT_TIMER));
4003 
4004 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4005 		 RREG32(mmCP_RB_WPTR_DELAY));
4006 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4007 		 RREG32(mmCP_RB_VMID));
4008 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4009 		 RREG32(mmCP_RB0_CNTL));
4010 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4011 		 RREG32(mmCP_RB0_WPTR));
4012 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4013 		 RREG32(mmCP_RB0_RPTR_ADDR));
4014 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4015 		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4016 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4017 		 RREG32(mmCP_RB0_CNTL));
4018 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4019 		 RREG32(mmCP_RB0_BASE));
4020 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4021 		 RREG32(mmCP_RB0_BASE_HI));
4022 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4023 		 RREG32(mmCP_MEC_CNTL));
4024 	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4025 		 RREG32(mmCP_CPF_DEBUG));
4026 
4027 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4028 		 RREG32(mmSCRATCH_ADDR));
4029 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4030 		 RREG32(mmSCRATCH_UMSK));
4031 
4032 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4033 		 RREG32(mmCP_INT_CNTL_RING0));
4034 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4035 		 RREG32(mmRLC_LB_CNTL));
4036 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4037 		 RREG32(mmRLC_CNTL));
4038 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4039 		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4040 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4041 		 RREG32(mmRLC_LB_CNTR_INIT));
4042 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4043 		 RREG32(mmRLC_LB_CNTR_MAX));
4044 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4045 		 RREG32(mmRLC_LB_INIT_CU_MASK));
4046 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4047 		 RREG32(mmRLC_LB_PARAMS));
4048 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4049 		 RREG32(mmRLC_LB_CNTL));
4050 	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4051 		 RREG32(mmRLC_MC_CNTL));
4052 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4053 		 RREG32(mmRLC_UCODE_CNTL));
4054 
4055 	mutex_lock(&adev->srbm_mutex);
4056 	for (i = 0; i < 16; i++) {
4057 		vi_srbm_select(adev, 0, 0, 0, i);
4058 		dev_info(adev->dev, "  VM %d:\n", i);
4059 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4060 			 RREG32(mmSH_MEM_CONFIG));
4061 		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4062 			 RREG32(mmSH_MEM_APE1_BASE));
4063 		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4064 			 RREG32(mmSH_MEM_APE1_LIMIT));
4065 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4066 			 RREG32(mmSH_MEM_BASES));
4067 	}
4068 	vi_srbm_select(adev, 0, 0, 0, 0);
4069 	mutex_unlock(&adev->srbm_mutex);
4070 }
4071 
4072 static int gfx_v8_0_soft_reset(void *handle)
4073 {
4074 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4075 	u32 tmp;
4076 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4077 
4078 	/* GRBM_STATUS */
4079 	tmp = RREG32(mmGRBM_STATUS);
4080 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4081 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4082 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4083 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4084 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4085 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4086 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4087 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4088 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4089 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4090 	}
4091 
4092 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4093 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4094 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4095 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4096 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4097 	}
4098 
4099 	/* GRBM_STATUS2 */
4100 	tmp = RREG32(mmGRBM_STATUS2);
4101 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4102 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4104 
4105 	/* SRBM_STATUS */
4106 	tmp = RREG32(mmSRBM_STATUS);
4107 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4108 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4109 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4110 
4111 	if (grbm_soft_reset || srbm_soft_reset) {
4112 		gfx_v8_0_print_status((void *)adev);
4113 		/* stop the rlc */
4114 		gfx_v8_0_rlc_stop(adev);
4115 
4116 		/* Disable GFX parsing/prefetching */
4117 		gfx_v8_0_cp_gfx_enable(adev, false);
4118 
4119 		/* Disable MEC parsing/prefetching */
4120 		gfx_v8_0_cp_compute_enable(adev, false);
4121 
4122 		if (grbm_soft_reset || srbm_soft_reset) {
4123 			tmp = RREG32(mmGMCON_DEBUG);
4124 			tmp = REG_SET_FIELD(tmp,
4125 					    GMCON_DEBUG, GFX_STALL, 1);
4126 			tmp = REG_SET_FIELD(tmp,
4127 					    GMCON_DEBUG, GFX_CLEAR, 1);
4128 			WREG32(mmGMCON_DEBUG, tmp);
4129 
4130 			udelay(50);
4131 		}
4132 
4133 		if (grbm_soft_reset) {
4134 			tmp = RREG32(mmGRBM_SOFT_RESET);
4135 			tmp |= grbm_soft_reset;
4136 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4137 			WREG32(mmGRBM_SOFT_RESET, tmp);
4138 			tmp = RREG32(mmGRBM_SOFT_RESET);
4139 
4140 			udelay(50);
4141 
4142 			tmp &= ~grbm_soft_reset;
4143 			WREG32(mmGRBM_SOFT_RESET, tmp);
4144 			tmp = RREG32(mmGRBM_SOFT_RESET);
4145 		}
4146 
4147 		if (srbm_soft_reset) {
4148 			tmp = RREG32(mmSRBM_SOFT_RESET);
4149 			tmp |= srbm_soft_reset;
4150 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4151 			WREG32(mmSRBM_SOFT_RESET, tmp);
4152 			tmp = RREG32(mmSRBM_SOFT_RESET);
4153 
4154 			udelay(50);
4155 
4156 			tmp &= ~srbm_soft_reset;
4157 			WREG32(mmSRBM_SOFT_RESET, tmp);
4158 			tmp = RREG32(mmSRBM_SOFT_RESET);
4159 		}
4160 
4161 		if (grbm_soft_reset || srbm_soft_reset) {
4162 			tmp = RREG32(mmGMCON_DEBUG);
4163 			tmp = REG_SET_FIELD(tmp,
4164 					    GMCON_DEBUG, GFX_STALL, 0);
4165 			tmp = REG_SET_FIELD(tmp,
4166 					    GMCON_DEBUG, GFX_CLEAR, 0);
4167 			WREG32(mmGMCON_DEBUG, tmp);
4168 		}
4169 
4170 		/* Wait a little for things to settle down */
4171 		udelay(50);
4172 		gfx_v8_0_print_status((void *)adev);
4173 	}
4174 	return 0;
4175 }
4176 
4177 /**
4178  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4179  *
4180  * @adev: amdgpu_device pointer
4181  *
4182  * Fetches a GPU clock counter snapshot.
4183  * Returns the 64 bit clock counter snapshot.
4184  */
4185 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4186 {
4187 	uint64_t clock;
4188 
4189 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4190 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4191 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4192 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4193 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4194 	return clock;
4195 }
4196 
4197 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4198 					  uint32_t vmid,
4199 					  uint32_t gds_base, uint32_t gds_size,
4200 					  uint32_t gws_base, uint32_t gws_size,
4201 					  uint32_t oa_base, uint32_t oa_size)
4202 {
4203 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4204 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4205 
4206 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4207 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4208 
4209 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4210 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4211 
4212 	/* GDS Base */
4213 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4214 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4215 				WRITE_DATA_DST_SEL(0)));
4216 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4217 	amdgpu_ring_write(ring, 0);
4218 	amdgpu_ring_write(ring, gds_base);
4219 
4220 	/* GDS Size */
4221 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4222 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4223 				WRITE_DATA_DST_SEL(0)));
4224 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4225 	amdgpu_ring_write(ring, 0);
4226 	amdgpu_ring_write(ring, gds_size);
4227 
4228 	/* GWS */
4229 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4230 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4231 				WRITE_DATA_DST_SEL(0)));
4232 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4233 	amdgpu_ring_write(ring, 0);
4234 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4235 
4236 	/* OA */
4237 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4238 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4239 				WRITE_DATA_DST_SEL(0)));
4240 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4241 	amdgpu_ring_write(ring, 0);
4242 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4243 }
4244 
4245 static int gfx_v8_0_early_init(void *handle)
4246 {
4247 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4248 
4249 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4250 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4251 	gfx_v8_0_set_ring_funcs(adev);
4252 	gfx_v8_0_set_irq_funcs(adev);
4253 	gfx_v8_0_set_gds_init(adev);
4254 	gfx_v8_0_set_rlc_funcs(adev);
4255 
4256 	return 0;
4257 }
4258 
4259 static int gfx_v8_0_late_init(void *handle)
4260 {
4261 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4262 	int r;
4263 
4264 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4265 	if (r)
4266 		return r;
4267 
4268 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4269 	if (r)
4270 		return r;
4271 
4272 	/* requires IBs so do in late init after IB pool is initialized */
4273 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4274 	if (r)
4275 		return r;
4276 
4277 	return 0;
4278 }
4279 
4280 static int gfx_v8_0_set_powergating_state(void *handle,
4281 					  enum amd_powergating_state state)
4282 {
4283 	return 0;
4284 }
4285 
4286 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
4287 				     uint32_t reg_addr, uint32_t cmd)
4288 {
4289 	uint32_t data;
4290 
4291 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4292 
4293 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4294 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4295 
4296 	data = RREG32(mmRLC_SERDES_WR_CTRL);
4297 	if (adev->asic_type == CHIP_STONEY)
4298 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4299 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4300 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4301 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4302 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4303 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4304 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4305 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4306 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4307 	else
4308 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4309 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4310 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4311 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4312 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4313 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4314 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4315 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4316 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4317 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4318 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4319 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4320 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4321 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4322 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4323 
4324 	WREG32(mmRLC_SERDES_WR_CTRL, data);
4325 }
4326 
4327 #define MSG_ENTER_RLC_SAFE_MODE     1
4328 #define MSG_EXIT_RLC_SAFE_MODE      0
4329 
4330 #define RLC_GPR_REG2__REQ_MASK           0x00000001
4331 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
4332 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
4333 
4334 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
4335 {
4336 	u32 data = 0;
4337 	unsigned i;
4338 
4339 	data = RREG32(mmRLC_CNTL);
4340 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4341 		return;
4342 
4343 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4344 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4345 			       AMD_PG_SUPPORT_GFX_DMG))) {
4346 		data |= RLC_GPR_REG2__REQ_MASK;
4347 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4348 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4349 		WREG32(mmRLC_GPR_REG2, data);
4350 
4351 		for (i = 0; i < adev->usec_timeout; i++) {
4352 			if ((RREG32(mmRLC_GPM_STAT) &
4353 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4354 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4355 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4356 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4357 				break;
4358 			udelay(1);
4359 		}
4360 
4361 		for (i = 0; i < adev->usec_timeout; i++) {
4362 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4363 				break;
4364 			udelay(1);
4365 		}
4366 		adev->gfx.rlc.in_safe_mode = true;
4367 	}
4368 }
4369 
4370 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
4371 {
4372 	u32 data;
4373 	unsigned i;
4374 
4375 	data = RREG32(mmRLC_CNTL);
4376 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4377 		return;
4378 
4379 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4380 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4381 			       AMD_PG_SUPPORT_GFX_DMG))) {
4382 		data |= RLC_GPR_REG2__REQ_MASK;
4383 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4384 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4385 		WREG32(mmRLC_GPR_REG2, data);
4386 		adev->gfx.rlc.in_safe_mode = false;
4387 	}
4388 
4389 	for (i = 0; i < adev->usec_timeout; i++) {
4390 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4391 			break;
4392 		udelay(1);
4393 	}
4394 }
4395 
4396 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
4397 {
4398 	u32 data;
4399 	unsigned i;
4400 
4401 	data = RREG32(mmRLC_CNTL);
4402 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4403 		return;
4404 
4405 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4406 		data |= RLC_SAFE_MODE__CMD_MASK;
4407 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4408 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4409 		WREG32(mmRLC_SAFE_MODE, data);
4410 
4411 		for (i = 0; i < adev->usec_timeout; i++) {
4412 			if ((RREG32(mmRLC_GPM_STAT) &
4413 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4414 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4415 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4416 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4417 				break;
4418 			udelay(1);
4419 		}
4420 
4421 		for (i = 0; i < adev->usec_timeout; i++) {
4422 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4423 				break;
4424 			udelay(1);
4425 		}
4426 		adev->gfx.rlc.in_safe_mode = true;
4427 	}
4428 }
4429 
4430 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
4431 {
4432 	u32 data = 0;
4433 	unsigned i;
4434 
4435 	data = RREG32(mmRLC_CNTL);
4436 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4437 		return;
4438 
4439 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4440 		if (adev->gfx.rlc.in_safe_mode) {
4441 			data |= RLC_SAFE_MODE__CMD_MASK;
4442 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4443 			WREG32(mmRLC_SAFE_MODE, data);
4444 			adev->gfx.rlc.in_safe_mode = false;
4445 		}
4446 	}
4447 
4448 	for (i = 0; i < adev->usec_timeout; i++) {
4449 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4450 			break;
4451 		udelay(1);
4452 	}
4453 }
4454 
4455 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
4456 {
4457 	adev->gfx.rlc.in_safe_mode = true;
4458 }
4459 
4460 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
4461 {
4462 	adev->gfx.rlc.in_safe_mode = false;
4463 }
4464 
4465 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
4466 	.enter_safe_mode = cz_enter_rlc_safe_mode,
4467 	.exit_safe_mode = cz_exit_rlc_safe_mode
4468 };
4469 
4470 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
4471 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
4472 	.exit_safe_mode = iceland_exit_rlc_safe_mode
4473 };
4474 
4475 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
4476 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
4477 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
4478 };
4479 
4480 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4481 						      bool enable)
4482 {
4483 	uint32_t temp, data;
4484 
4485 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
4486 
4487 	/* It is disabled by HW by default */
4488 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4489 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4490 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4491 				/* 1 - RLC memory Light sleep */
4492 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4493 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4494 				if (temp != data)
4495 					WREG32(mmRLC_MEM_SLP_CNTL, data);
4496 			}
4497 
4498 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4499 				/* 2 - CP memory Light sleep */
4500 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4501 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4502 				if (temp != data)
4503 					WREG32(mmCP_MEM_SLP_CNTL, data);
4504 			}
4505 		}
4506 
4507 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4508 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4509 		if (adev->flags & AMD_IS_APU)
4510 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4511 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4512 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
4513 		else
4514 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4515 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4516 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4517 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4518 
4519 		if (temp != data)
4520 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4521 
4522 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4523 		gfx_v8_0_wait_for_rlc_serdes(adev);
4524 
4525 		/* 5 - clear mgcg override */
4526 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4527 
4528 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
4529 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4530 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4531 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4532 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4533 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4534 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4535 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
4536 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
4537 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4538 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4539 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4540 			if (temp != data)
4541 				WREG32(mmCGTS_SM_CTRL_REG, data);
4542 		}
4543 		udelay(50);
4544 
4545 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4546 		gfx_v8_0_wait_for_rlc_serdes(adev);
4547 	} else {
4548 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4549 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4550 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4551 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4552 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4553 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4554 		if (temp != data)
4555 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4556 
4557 		/* 2 - disable MGLS in RLC */
4558 		data = RREG32(mmRLC_MEM_SLP_CNTL);
4559 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4560 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4561 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4562 		}
4563 
4564 		/* 3 - disable MGLS in CP */
4565 		data = RREG32(mmCP_MEM_SLP_CNTL);
4566 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4567 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4568 			WREG32(mmCP_MEM_SLP_CNTL, data);
4569 		}
4570 
4571 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4572 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4573 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4574 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4575 		if (temp != data)
4576 			WREG32(mmCGTS_SM_CTRL_REG, data);
4577 
4578 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4579 		gfx_v8_0_wait_for_rlc_serdes(adev);
4580 
4581 		/* 6 - set mgcg override */
4582 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4583 
4584 		udelay(50);
4585 
4586 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4587 		gfx_v8_0_wait_for_rlc_serdes(adev);
4588 	}
4589 
4590 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
4591 }
4592 
4593 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4594 						      bool enable)
4595 {
4596 	uint32_t temp, temp1, data, data1;
4597 
4598 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4599 
4600 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
4601 
4602 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4603 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4604 		 * Cmp_busy/GFX_Idle interrupts
4605 		 */
4606 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4607 
4608 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4609 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4610 		if (temp1 != data1)
4611 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4612 
4613 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4614 		gfx_v8_0_wait_for_rlc_serdes(adev);
4615 
4616 		/* 3 - clear cgcg override */
4617 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4618 
4619 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4620 		gfx_v8_0_wait_for_rlc_serdes(adev);
4621 
4622 		/* 4 - write cmd to set CGLS */
4623 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4624 
4625 		/* 5 - enable cgcg */
4626 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4627 
4628 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4629 			/* enable cgls*/
4630 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4631 
4632 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4633 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4634 
4635 			if (temp1 != data1)
4636 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4637 		} else {
4638 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4639 		}
4640 
4641 		if (temp != data)
4642 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4643 	} else {
4644 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4645 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4646 
4647 		/* TEST CGCG */
4648 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4649 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4650 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4651 		if (temp1 != data1)
4652 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4653 
4654 		/* read gfx register to wake up cgcg */
4655 		RREG32(mmCB_CGTT_SCLK_CTRL);
4656 		RREG32(mmCB_CGTT_SCLK_CTRL);
4657 		RREG32(mmCB_CGTT_SCLK_CTRL);
4658 		RREG32(mmCB_CGTT_SCLK_CTRL);
4659 
4660 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4661 		gfx_v8_0_wait_for_rlc_serdes(adev);
4662 
4663 		/* write cmd to Set CGCG Overrride */
4664 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4665 
4666 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4667 		gfx_v8_0_wait_for_rlc_serdes(adev);
4668 
4669 		/* write cmd to Clear CGLS */
4670 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4671 
4672 		/* disable cgcg, cgls should be disabled too. */
4673 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4674 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4675 		if (temp != data)
4676 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4677 	}
4678 
4679 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
4680 }
4681 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4682 					    bool enable)
4683 {
4684 	if (enable) {
4685 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4686 		 * ===  MGCG + MGLS + TS(CG/LS) ===
4687 		 */
4688 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
4689 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
4690 	} else {
4691 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4692 		 * ===  CGCG + CGLS ===
4693 		 */
4694 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
4695 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
4696 	}
4697 	return 0;
4698 }
4699 
4700 static int gfx_v8_0_set_clockgating_state(void *handle,
4701 					  enum amd_clockgating_state state)
4702 {
4703 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4704 
4705 	switch (adev->asic_type) {
4706 	case CHIP_FIJI:
4707 	case CHIP_CARRIZO:
4708 	case CHIP_STONEY:
4709 		gfx_v8_0_update_gfx_clock_gating(adev,
4710 						 state == AMD_CG_STATE_GATE ? true : false);
4711 		break;
4712 	default:
4713 		break;
4714 	}
4715 	return 0;
4716 }
4717 
4718 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4719 {
4720 	u32 rptr;
4721 
4722 	rptr = ring->adev->wb.wb[ring->rptr_offs];
4723 
4724 	return rptr;
4725 }
4726 
4727 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4728 {
4729 	struct amdgpu_device *adev = ring->adev;
4730 	u32 wptr;
4731 
4732 	if (ring->use_doorbell)
4733 		/* XXX check if swapping is necessary on BE */
4734 		wptr = ring->adev->wb.wb[ring->wptr_offs];
4735 	else
4736 		wptr = RREG32(mmCP_RB0_WPTR);
4737 
4738 	return wptr;
4739 }
4740 
4741 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4742 {
4743 	struct amdgpu_device *adev = ring->adev;
4744 
4745 	if (ring->use_doorbell) {
4746 		/* XXX check if swapping is necessary on BE */
4747 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4748 		WDOORBELL32(ring->doorbell_index, ring->wptr);
4749 	} else {
4750 		WREG32(mmCP_RB0_WPTR, ring->wptr);
4751 		(void)RREG32(mmCP_RB0_WPTR);
4752 	}
4753 }
4754 
4755 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4756 {
4757 	u32 ref_and_mask, reg_mem_engine;
4758 
4759 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4760 		switch (ring->me) {
4761 		case 1:
4762 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4763 			break;
4764 		case 2:
4765 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4766 			break;
4767 		default:
4768 			return;
4769 		}
4770 		reg_mem_engine = 0;
4771 	} else {
4772 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4773 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4774 	}
4775 
4776 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4777 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4778 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4779 				 reg_mem_engine));
4780 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4781 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4782 	amdgpu_ring_write(ring, ref_and_mask);
4783 	amdgpu_ring_write(ring, ref_and_mask);
4784 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4785 }
4786 
4787 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4788 {
4789 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4790 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4791 				 WRITE_DATA_DST_SEL(0) |
4792 				 WR_CONFIRM));
4793 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
4794 	amdgpu_ring_write(ring, 0);
4795 	amdgpu_ring_write(ring, 1);
4796 
4797 }
4798 
4799 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4800 				  struct amdgpu_ib *ib)
4801 {
4802 	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4803 	u32 header, control = 0;
4804 	u32 next_rptr = ring->wptr + 5;
4805 
4806 	/* drop the CE preamble IB for the same context */
4807 	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4808 		return;
4809 
4810 	if (need_ctx_switch)
4811 		next_rptr += 2;
4812 
4813 	next_rptr += 4;
4814 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4816 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4817 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4818 	amdgpu_ring_write(ring, next_rptr);
4819 
4820 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4821 	if (need_ctx_switch) {
4822 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4823 		amdgpu_ring_write(ring, 0);
4824 	}
4825 
4826 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4827 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4828 	else
4829 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4830 
4831 	control |= ib->length_dw | (ib->vm_id << 24);
4832 
4833 	amdgpu_ring_write(ring, header);
4834 	amdgpu_ring_write(ring,
4835 #ifdef __BIG_ENDIAN
4836 			  (2 << 0) |
4837 #endif
4838 			  (ib->gpu_addr & 0xFFFFFFFC));
4839 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4840 	amdgpu_ring_write(ring, control);
4841 }
4842 
4843 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4844 				  struct amdgpu_ib *ib)
4845 {
4846 	u32 header, control = 0;
4847 	u32 next_rptr = ring->wptr + 5;
4848 
4849 	control |= INDIRECT_BUFFER_VALID;
4850 
4851 	next_rptr += 4;
4852 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4853 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4854 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4855 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4856 	amdgpu_ring_write(ring, next_rptr);
4857 
4858 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4859 
4860 	control |= ib->length_dw | (ib->vm_id << 24);
4861 
4862 	amdgpu_ring_write(ring, header);
4863 	amdgpu_ring_write(ring,
4864 #ifdef __BIG_ENDIAN
4865 					  (2 << 0) |
4866 #endif
4867 					  (ib->gpu_addr & 0xFFFFFFFC));
4868 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4869 	amdgpu_ring_write(ring, control);
4870 }
4871 
4872 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4873 					 u64 seq, unsigned flags)
4874 {
4875 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4876 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4877 
4878 	/* EVENT_WRITE_EOP - flush caches, send int */
4879 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4880 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4881 				 EOP_TC_ACTION_EN |
4882 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4883 				 EVENT_INDEX(5)));
4884 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4885 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4886 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4887 	amdgpu_ring_write(ring, lower_32_bits(seq));
4888 	amdgpu_ring_write(ring, upper_32_bits(seq));
4889 
4890 }
4891 
4892 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4893 {
4894 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4895 	uint32_t seq = ring->fence_drv.sync_seq;
4896 	uint64_t addr = ring->fence_drv.gpu_addr;
4897 
4898 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4899 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4900 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
4901 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4902 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4903 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4904 	amdgpu_ring_write(ring, seq);
4905 	amdgpu_ring_write(ring, 0xffffffff);
4906 	amdgpu_ring_write(ring, 4); /* poll interval */
4907 
4908 	if (usepfp) {
4909 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4910 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4911 		amdgpu_ring_write(ring, 0);
4912 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4913 		amdgpu_ring_write(ring, 0);
4914 	}
4915 }
4916 
4917 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4918 					unsigned vm_id, uint64_t pd_addr)
4919 {
4920 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4921 
4922 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4923 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4924 				 WRITE_DATA_DST_SEL(0)) |
4925 				 WR_CONFIRM);
4926 	if (vm_id < 8) {
4927 		amdgpu_ring_write(ring,
4928 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4929 	} else {
4930 		amdgpu_ring_write(ring,
4931 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4932 	}
4933 	amdgpu_ring_write(ring, 0);
4934 	amdgpu_ring_write(ring, pd_addr >> 12);
4935 
4936 	/* bits 0-15 are the VM contexts0-15 */
4937 	/* invalidate the cache */
4938 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4939 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4940 				 WRITE_DATA_DST_SEL(0)));
4941 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4942 	amdgpu_ring_write(ring, 0);
4943 	amdgpu_ring_write(ring, 1 << vm_id);
4944 
4945 	/* wait for the invalidate to complete */
4946 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4947 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4948 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4949 				 WAIT_REG_MEM_ENGINE(0))); /* me */
4950 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4951 	amdgpu_ring_write(ring, 0);
4952 	amdgpu_ring_write(ring, 0); /* ref */
4953 	amdgpu_ring_write(ring, 0); /* mask */
4954 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4955 
4956 	/* compute doesn't have PFP */
4957 	if (usepfp) {
4958 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4959 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4960 		amdgpu_ring_write(ring, 0x0);
4961 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4962 		amdgpu_ring_write(ring, 0);
4963 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4964 		amdgpu_ring_write(ring, 0);
4965 	}
4966 }
4967 
4968 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4969 {
4970 	return ring->adev->wb.wb[ring->rptr_offs];
4971 }
4972 
4973 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4974 {
4975 	return ring->adev->wb.wb[ring->wptr_offs];
4976 }
4977 
4978 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4979 {
4980 	struct amdgpu_device *adev = ring->adev;
4981 
4982 	/* XXX check if swapping is necessary on BE */
4983 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4984 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4985 }
4986 
4987 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4988 					     u64 addr, u64 seq,
4989 					     unsigned flags)
4990 {
4991 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4992 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4993 
4994 	/* RELEASE_MEM - flush caches, send int */
4995 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4996 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4997 				 EOP_TC_ACTION_EN |
4998 				 EOP_TC_WB_ACTION_EN |
4999 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5000 				 EVENT_INDEX(5)));
5001 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5002 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5003 	amdgpu_ring_write(ring, upper_32_bits(addr));
5004 	amdgpu_ring_write(ring, lower_32_bits(seq));
5005 	amdgpu_ring_write(ring, upper_32_bits(seq));
5006 }
5007 
5008 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5009 						 enum amdgpu_interrupt_state state)
5010 {
5011 	u32 cp_int_cntl;
5012 
5013 	switch (state) {
5014 	case AMDGPU_IRQ_STATE_DISABLE:
5015 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5016 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5017 					    TIME_STAMP_INT_ENABLE, 0);
5018 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5019 		break;
5020 	case AMDGPU_IRQ_STATE_ENABLE:
5021 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5022 		cp_int_cntl =
5023 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5024 				      TIME_STAMP_INT_ENABLE, 1);
5025 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5026 		break;
5027 	default:
5028 		break;
5029 	}
5030 }
5031 
5032 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5033 						     int me, int pipe,
5034 						     enum amdgpu_interrupt_state state)
5035 {
5036 	u32 mec_int_cntl, mec_int_cntl_reg;
5037 
5038 	/*
5039 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5040 	 * handles the setting of interrupts for this specific pipe. All other
5041 	 * pipes' interrupts are set by amdkfd.
5042 	 */
5043 
5044 	if (me == 1) {
5045 		switch (pipe) {
5046 		case 0:
5047 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5048 			break;
5049 		default:
5050 			DRM_DEBUG("invalid pipe %d\n", pipe);
5051 			return;
5052 		}
5053 	} else {
5054 		DRM_DEBUG("invalid me %d\n", me);
5055 		return;
5056 	}
5057 
5058 	switch (state) {
5059 	case AMDGPU_IRQ_STATE_DISABLE:
5060 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5061 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5062 					     TIME_STAMP_INT_ENABLE, 0);
5063 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5064 		break;
5065 	case AMDGPU_IRQ_STATE_ENABLE:
5066 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5067 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5068 					     TIME_STAMP_INT_ENABLE, 1);
5069 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5070 		break;
5071 	default:
5072 		break;
5073 	}
5074 }
5075 
5076 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5077 					     struct amdgpu_irq_src *source,
5078 					     unsigned type,
5079 					     enum amdgpu_interrupt_state state)
5080 {
5081 	u32 cp_int_cntl;
5082 
5083 	switch (state) {
5084 	case AMDGPU_IRQ_STATE_DISABLE:
5085 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5086 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5087 					    PRIV_REG_INT_ENABLE, 0);
5088 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5089 		break;
5090 	case AMDGPU_IRQ_STATE_ENABLE:
5091 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5092 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5093 					    PRIV_REG_INT_ENABLE, 1);
5094 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5095 		break;
5096 	default:
5097 		break;
5098 	}
5099 
5100 	return 0;
5101 }
5102 
5103 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5104 					      struct amdgpu_irq_src *source,
5105 					      unsigned type,
5106 					      enum amdgpu_interrupt_state state)
5107 {
5108 	u32 cp_int_cntl;
5109 
5110 	switch (state) {
5111 	case AMDGPU_IRQ_STATE_DISABLE:
5112 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5113 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5114 					    PRIV_INSTR_INT_ENABLE, 0);
5115 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5116 		break;
5117 	case AMDGPU_IRQ_STATE_ENABLE:
5118 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5119 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5120 					    PRIV_INSTR_INT_ENABLE, 1);
5121 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5122 		break;
5123 	default:
5124 		break;
5125 	}
5126 
5127 	return 0;
5128 }
5129 
5130 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5131 					    struct amdgpu_irq_src *src,
5132 					    unsigned type,
5133 					    enum amdgpu_interrupt_state state)
5134 {
5135 	switch (type) {
5136 	case AMDGPU_CP_IRQ_GFX_EOP:
5137 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5138 		break;
5139 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5140 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5141 		break;
5142 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5143 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5144 		break;
5145 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5146 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5147 		break;
5148 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5149 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5150 		break;
5151 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5152 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5153 		break;
5154 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5155 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5156 		break;
5157 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5158 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5159 		break;
5160 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5161 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5162 		break;
5163 	default:
5164 		break;
5165 	}
5166 	return 0;
5167 }
5168 
5169 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5170 			    struct amdgpu_irq_src *source,
5171 			    struct amdgpu_iv_entry *entry)
5172 {
5173 	int i;
5174 	u8 me_id, pipe_id, queue_id;
5175 	struct amdgpu_ring *ring;
5176 
5177 	DRM_DEBUG("IH: CP EOP\n");
5178 	me_id = (entry->ring_id & 0x0c) >> 2;
5179 	pipe_id = (entry->ring_id & 0x03) >> 0;
5180 	queue_id = (entry->ring_id & 0x70) >> 4;
5181 
5182 	switch (me_id) {
5183 	case 0:
5184 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5185 		break;
5186 	case 1:
5187 	case 2:
5188 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5189 			ring = &adev->gfx.compute_ring[i];
5190 			/* Per-queue interrupt is supported for MEC starting from VI.
5191 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5192 			  */
5193 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5194 				amdgpu_fence_process(ring);
5195 		}
5196 		break;
5197 	}
5198 	return 0;
5199 }
5200 
5201 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5202 				 struct amdgpu_irq_src *source,
5203 				 struct amdgpu_iv_entry *entry)
5204 {
5205 	DRM_ERROR("Illegal register access in command stream\n");
5206 	schedule_work(&adev->reset_work);
5207 	return 0;
5208 }
5209 
5210 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5211 				  struct amdgpu_irq_src *source,
5212 				  struct amdgpu_iv_entry *entry)
5213 {
5214 	DRM_ERROR("Illegal instruction in command stream\n");
5215 	schedule_work(&adev->reset_work);
5216 	return 0;
5217 }
5218 
5219 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5220 	.early_init = gfx_v8_0_early_init,
5221 	.late_init = gfx_v8_0_late_init,
5222 	.sw_init = gfx_v8_0_sw_init,
5223 	.sw_fini = gfx_v8_0_sw_fini,
5224 	.hw_init = gfx_v8_0_hw_init,
5225 	.hw_fini = gfx_v8_0_hw_fini,
5226 	.suspend = gfx_v8_0_suspend,
5227 	.resume = gfx_v8_0_resume,
5228 	.is_idle = gfx_v8_0_is_idle,
5229 	.wait_for_idle = gfx_v8_0_wait_for_idle,
5230 	.soft_reset = gfx_v8_0_soft_reset,
5231 	.print_status = gfx_v8_0_print_status,
5232 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5233 	.set_powergating_state = gfx_v8_0_set_powergating_state,
5234 };
5235 
5236 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5237 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5238 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5239 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5240 	.parse_cs = NULL,
5241 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5242 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5243 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5244 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5245 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5246 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5247 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5248 	.test_ring = gfx_v8_0_ring_test_ring,
5249 	.test_ib = gfx_v8_0_ring_test_ib,
5250 	.insert_nop = amdgpu_ring_insert_nop,
5251 	.pad_ib = amdgpu_ring_generic_pad_ib,
5252 };
5253 
5254 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5255 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
5256 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5257 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5258 	.parse_cs = NULL,
5259 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5260 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5261 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5262 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5263 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5264 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5265 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5266 	.test_ring = gfx_v8_0_ring_test_ring,
5267 	.test_ib = gfx_v8_0_ring_test_ib,
5268 	.insert_nop = amdgpu_ring_insert_nop,
5269 	.pad_ib = amdgpu_ring_generic_pad_ib,
5270 };
5271 
5272 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5273 {
5274 	int i;
5275 
5276 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5277 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5278 
5279 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5280 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5281 }
5282 
5283 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5284 	.set = gfx_v8_0_set_eop_interrupt_state,
5285 	.process = gfx_v8_0_eop_irq,
5286 };
5287 
5288 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5289 	.set = gfx_v8_0_set_priv_reg_fault_state,
5290 	.process = gfx_v8_0_priv_reg_irq,
5291 };
5292 
5293 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5294 	.set = gfx_v8_0_set_priv_inst_fault_state,
5295 	.process = gfx_v8_0_priv_inst_irq,
5296 };
5297 
5298 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5299 {
5300 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5301 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5302 
5303 	adev->gfx.priv_reg_irq.num_types = 1;
5304 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5305 
5306 	adev->gfx.priv_inst_irq.num_types = 1;
5307 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5308 }
5309 
5310 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
5311 {
5312 	switch (adev->asic_type) {
5313 	case CHIP_TOPAZ:
5314 	case CHIP_STONEY:
5315 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
5316 		break;
5317 	case CHIP_CARRIZO:
5318 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
5319 		break;
5320 	default:
5321 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
5322 		break;
5323 	}
5324 }
5325 
5326 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5327 {
5328 	/* init asci gds info */
5329 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5330 	adev->gds.gws.total_size = 64;
5331 	adev->gds.oa.total_size = 16;
5332 
5333 	if (adev->gds.mem.total_size == 64 * 1024) {
5334 		adev->gds.mem.gfx_partition_size = 4096;
5335 		adev->gds.mem.cs_partition_size = 4096;
5336 
5337 		adev->gds.gws.gfx_partition_size = 4;
5338 		adev->gds.gws.cs_partition_size = 4;
5339 
5340 		adev->gds.oa.gfx_partition_size = 4;
5341 		adev->gds.oa.cs_partition_size = 1;
5342 	} else {
5343 		adev->gds.mem.gfx_partition_size = 1024;
5344 		adev->gds.mem.cs_partition_size = 1024;
5345 
5346 		adev->gds.gws.gfx_partition_size = 16;
5347 		adev->gds.gws.cs_partition_size = 16;
5348 
5349 		adev->gds.oa.gfx_partition_size = 4;
5350 		adev->gds.oa.cs_partition_size = 4;
5351 	}
5352 }
5353 
5354 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5355 {
5356 	u32 data, mask;
5357 
5358 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5359 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5360 
5361 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5362 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5363 
5364 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5365 
5366 	return (~data) & mask;
5367 }
5368 
5369 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5370 			 struct amdgpu_cu_info *cu_info)
5371 {
5372 	int i, j, k, counter, active_cu_number = 0;
5373 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5374 
5375 	if (!adev || !cu_info)
5376 		return -EINVAL;
5377 
5378 	memset(cu_info, 0, sizeof(*cu_info));
5379 
5380 	mutex_lock(&adev->grbm_idx_mutex);
5381 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5382 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5383 			mask = 1;
5384 			ao_bitmap = 0;
5385 			counter = 0;
5386 			gfx_v8_0_select_se_sh(adev, i, j);
5387 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5388 			cu_info->bitmap[i][j] = bitmap;
5389 
5390 			for (k = 0; k < 16; k ++) {
5391 				if (bitmap & mask) {
5392 					if (counter < 2)
5393 						ao_bitmap |= mask;
5394 					counter ++;
5395 				}
5396 				mask <<= 1;
5397 			}
5398 			active_cu_number += counter;
5399 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5400 		}
5401 	}
5402 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5403 	mutex_unlock(&adev->grbm_idx_mutex);
5404 
5405 	cu_info->number = active_cu_number;
5406 	cu_info->ao_cu_mask = ao_cu_mask;
5407 
5408 	return 0;
5409 }
5410