xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision c0c914eca7f251c70facc37dfebeaf176601918d)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31 
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34 
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40 
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45 
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48 
49 #define GFX8_NUM_GFX_RINGS     1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51 
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55 
56 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65 
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
72 
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD    1
75 #define CLE_BPM_SERDES_CMD    0
76 
77 /* BPM Register Address*/
78 enum {
79 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
80 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
81 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
82 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
83 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
84 	BPM_REG_FGCG_MAX
85 };
86 
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99 
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119 
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139 
140 static const u32 golden_settings_tonga_a11[] =
141 {
142 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
146 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158 
159 static const u32 tonga_golden_common_all[] =
160 {
161 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170 
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249 
250 static const u32 fiji_golden_common_all[] =
251 {
252 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263 
264 static const u32 golden_settings_fiji_a10[] =
265 {
266 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278 
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317 
318 static const u32 golden_settings_iceland_a11[] =
319 {
320 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
324 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336 
337 static const u32 iceland_golden_common_all[] =
338 {
339 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348 
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416 
417 static const u32 cz_golden_settings_a11[] =
418 {
419 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
422 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430 
431 static const u32 cz_golden_common_all[] =
432 {
433 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442 
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521 
522 static const u32 stoney_golden_settings_a11[] =
523 {
524 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
526 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530   	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535 
536 static const u32 stoney_golden_common_all[] =
537 {
538 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547 
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557 
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561 
562 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
563 {
564 	switch (adev->asic_type) {
565 	case CHIP_TOPAZ:
566 		amdgpu_program_register_sequence(adev,
567 						 iceland_mgcg_cgcg_init,
568 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
569 		amdgpu_program_register_sequence(adev,
570 						 golden_settings_iceland_a11,
571 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
572 		amdgpu_program_register_sequence(adev,
573 						 iceland_golden_common_all,
574 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
575 		break;
576 	case CHIP_FIJI:
577 		amdgpu_program_register_sequence(adev,
578 						 fiji_mgcg_cgcg_init,
579 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
580 		amdgpu_program_register_sequence(adev,
581 						 golden_settings_fiji_a10,
582 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
583 		amdgpu_program_register_sequence(adev,
584 						 fiji_golden_common_all,
585 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
586 		break;
587 
588 	case CHIP_TONGA:
589 		amdgpu_program_register_sequence(adev,
590 						 tonga_mgcg_cgcg_init,
591 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
592 		amdgpu_program_register_sequence(adev,
593 						 golden_settings_tonga_a11,
594 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
595 		amdgpu_program_register_sequence(adev,
596 						 tonga_golden_common_all,
597 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
598 		break;
599 	case CHIP_CARRIZO:
600 		amdgpu_program_register_sequence(adev,
601 						 cz_mgcg_cgcg_init,
602 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
603 		amdgpu_program_register_sequence(adev,
604 						 cz_golden_settings_a11,
605 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
606 		amdgpu_program_register_sequence(adev,
607 						 cz_golden_common_all,
608 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
609 		break;
610 	case CHIP_STONEY:
611 		amdgpu_program_register_sequence(adev,
612 						 stoney_mgcg_cgcg_init,
613 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
614 		amdgpu_program_register_sequence(adev,
615 						 stoney_golden_settings_a11,
616 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
617 		amdgpu_program_register_sequence(adev,
618 						 stoney_golden_common_all,
619 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
620 		break;
621 	default:
622 		break;
623 	}
624 }
625 
626 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
627 {
628 	int i;
629 
630 	adev->gfx.scratch.num_reg = 7;
631 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
632 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
633 		adev->gfx.scratch.free[i] = true;
634 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
635 	}
636 }
637 
638 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
639 {
640 	struct amdgpu_device *adev = ring->adev;
641 	uint32_t scratch;
642 	uint32_t tmp = 0;
643 	unsigned i;
644 	int r;
645 
646 	r = amdgpu_gfx_scratch_get(adev, &scratch);
647 	if (r) {
648 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
649 		return r;
650 	}
651 	WREG32(scratch, 0xCAFEDEAD);
652 	r = amdgpu_ring_alloc(ring, 3);
653 	if (r) {
654 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
655 			  ring->idx, r);
656 		amdgpu_gfx_scratch_free(adev, scratch);
657 		return r;
658 	}
659 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
660 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
661 	amdgpu_ring_write(ring, 0xDEADBEEF);
662 	amdgpu_ring_commit(ring);
663 
664 	for (i = 0; i < adev->usec_timeout; i++) {
665 		tmp = RREG32(scratch);
666 		if (tmp == 0xDEADBEEF)
667 			break;
668 		DRM_UDELAY(1);
669 	}
670 	if (i < adev->usec_timeout) {
671 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
672 			 ring->idx, i);
673 	} else {
674 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
675 			  ring->idx, scratch, tmp);
676 		r = -EINVAL;
677 	}
678 	amdgpu_gfx_scratch_free(adev, scratch);
679 	return r;
680 }
681 
682 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
683 {
684 	struct amdgpu_device *adev = ring->adev;
685 	struct amdgpu_ib ib;
686 	struct fence *f = NULL;
687 	uint32_t scratch;
688 	uint32_t tmp = 0;
689 	unsigned i;
690 	int r;
691 
692 	r = amdgpu_gfx_scratch_get(adev, &scratch);
693 	if (r) {
694 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
695 		return r;
696 	}
697 	WREG32(scratch, 0xCAFEDEAD);
698 	memset(&ib, 0, sizeof(ib));
699 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
700 	if (r) {
701 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
702 		goto err1;
703 	}
704 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
705 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
706 	ib.ptr[2] = 0xDEADBEEF;
707 	ib.length_dw = 3;
708 
709 	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
710 			       NULL, &f);
711 	if (r)
712 		goto err2;
713 
714 	r = fence_wait(f, false);
715 	if (r) {
716 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
717 		goto err2;
718 	}
719 	for (i = 0; i < adev->usec_timeout; i++) {
720 		tmp = RREG32(scratch);
721 		if (tmp == 0xDEADBEEF)
722 			break;
723 		DRM_UDELAY(1);
724 	}
725 	if (i < adev->usec_timeout) {
726 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
727 			 ring->idx, i);
728 		goto err2;
729 	} else {
730 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
731 			  scratch, tmp);
732 		r = -EINVAL;
733 	}
734 err2:
735 	fence_put(f);
736 	amdgpu_ib_free(adev, &ib);
737 err1:
738 	amdgpu_gfx_scratch_free(adev, scratch);
739 	return r;
740 }
741 
742 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
743 {
744 	const char *chip_name;
745 	char fw_name[30];
746 	int err;
747 	struct amdgpu_firmware_info *info = NULL;
748 	const struct common_firmware_header *header = NULL;
749 	const struct gfx_firmware_header_v1_0 *cp_hdr;
750 
751 	DRM_DEBUG("\n");
752 
753 	switch (adev->asic_type) {
754 	case CHIP_TOPAZ:
755 		chip_name = "topaz";
756 		break;
757 	case CHIP_TONGA:
758 		chip_name = "tonga";
759 		break;
760 	case CHIP_CARRIZO:
761 		chip_name = "carrizo";
762 		break;
763 	case CHIP_FIJI:
764 		chip_name = "fiji";
765 		break;
766 	case CHIP_STONEY:
767 		chip_name = "stoney";
768 		break;
769 	default:
770 		BUG();
771 	}
772 
773 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
774 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
775 	if (err)
776 		goto out;
777 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
778 	if (err)
779 		goto out;
780 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
781 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
782 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
783 
784 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
785 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
786 	if (err)
787 		goto out;
788 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
789 	if (err)
790 		goto out;
791 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
792 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
793 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
794 
795 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
796 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
797 	if (err)
798 		goto out;
799 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
800 	if (err)
801 		goto out;
802 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
803 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
804 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
805 
806 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
807 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
808 	if (err)
809 		goto out;
810 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
811 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
812 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
813 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
814 
815 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
816 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
817 	if (err)
818 		goto out;
819 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
820 	if (err)
821 		goto out;
822 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
823 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
824 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
825 
826 	if ((adev->asic_type != CHIP_STONEY) &&
827 	    (adev->asic_type != CHIP_TOPAZ)) {
828 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
829 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
830 		if (!err) {
831 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
832 			if (err)
833 				goto out;
834 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
835 				adev->gfx.mec2_fw->data;
836 			adev->gfx.mec2_fw_version =
837 				le32_to_cpu(cp_hdr->header.ucode_version);
838 			adev->gfx.mec2_feature_version =
839 				le32_to_cpu(cp_hdr->ucode_feature_version);
840 		} else {
841 			err = 0;
842 			adev->gfx.mec2_fw = NULL;
843 		}
844 	}
845 
846 	if (adev->firmware.smu_load) {
847 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
848 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
849 		info->fw = adev->gfx.pfp_fw;
850 		header = (const struct common_firmware_header *)info->fw->data;
851 		adev->firmware.fw_size +=
852 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
853 
854 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
855 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
856 		info->fw = adev->gfx.me_fw;
857 		header = (const struct common_firmware_header *)info->fw->data;
858 		adev->firmware.fw_size +=
859 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
860 
861 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
862 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
863 		info->fw = adev->gfx.ce_fw;
864 		header = (const struct common_firmware_header *)info->fw->data;
865 		adev->firmware.fw_size +=
866 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
867 
868 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
869 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
870 		info->fw = adev->gfx.rlc_fw;
871 		header = (const struct common_firmware_header *)info->fw->data;
872 		adev->firmware.fw_size +=
873 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
874 
875 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
876 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
877 		info->fw = adev->gfx.mec_fw;
878 		header = (const struct common_firmware_header *)info->fw->data;
879 		adev->firmware.fw_size +=
880 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
881 
882 		if (adev->gfx.mec2_fw) {
883 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
884 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
885 			info->fw = adev->gfx.mec2_fw;
886 			header = (const struct common_firmware_header *)info->fw->data;
887 			adev->firmware.fw_size +=
888 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
889 		}
890 
891 	}
892 
893 out:
894 	if (err) {
895 		dev_err(adev->dev,
896 			"gfx8: Failed to load firmware \"%s\"\n",
897 			fw_name);
898 		release_firmware(adev->gfx.pfp_fw);
899 		adev->gfx.pfp_fw = NULL;
900 		release_firmware(adev->gfx.me_fw);
901 		adev->gfx.me_fw = NULL;
902 		release_firmware(adev->gfx.ce_fw);
903 		adev->gfx.ce_fw = NULL;
904 		release_firmware(adev->gfx.rlc_fw);
905 		adev->gfx.rlc_fw = NULL;
906 		release_firmware(adev->gfx.mec_fw);
907 		adev->gfx.mec_fw = NULL;
908 		release_firmware(adev->gfx.mec2_fw);
909 		adev->gfx.mec2_fw = NULL;
910 	}
911 	return err;
912 }
913 
914 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
915 {
916 	int r;
917 
918 	if (adev->gfx.mec.hpd_eop_obj) {
919 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
920 		if (unlikely(r != 0))
921 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
922 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
923 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
924 
925 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
926 		adev->gfx.mec.hpd_eop_obj = NULL;
927 	}
928 }
929 
930 #define MEC_HPD_SIZE 2048
931 
932 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
933 {
934 	int r;
935 	u32 *hpd;
936 
937 	/*
938 	 * we assign only 1 pipe because all other pipes will
939 	 * be handled by KFD
940 	 */
941 	adev->gfx.mec.num_mec = 1;
942 	adev->gfx.mec.num_pipe = 1;
943 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
944 
945 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
946 		r = amdgpu_bo_create(adev,
947 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
948 				     PAGE_SIZE, true,
949 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
950 				     &adev->gfx.mec.hpd_eop_obj);
951 		if (r) {
952 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
953 			return r;
954 		}
955 	}
956 
957 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
958 	if (unlikely(r != 0)) {
959 		gfx_v8_0_mec_fini(adev);
960 		return r;
961 	}
962 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
963 			  &adev->gfx.mec.hpd_eop_gpu_addr);
964 	if (r) {
965 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
966 		gfx_v8_0_mec_fini(adev);
967 		return r;
968 	}
969 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
970 	if (r) {
971 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
972 		gfx_v8_0_mec_fini(adev);
973 		return r;
974 	}
975 
976 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
977 
978 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
979 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
980 
981 	return 0;
982 }
983 
984 static const u32 vgpr_init_compute_shader[] =
985 {
986 	0x7e000209, 0x7e020208,
987 	0x7e040207, 0x7e060206,
988 	0x7e080205, 0x7e0a0204,
989 	0x7e0c0203, 0x7e0e0202,
990 	0x7e100201, 0x7e120200,
991 	0x7e140209, 0x7e160208,
992 	0x7e180207, 0x7e1a0206,
993 	0x7e1c0205, 0x7e1e0204,
994 	0x7e200203, 0x7e220202,
995 	0x7e240201, 0x7e260200,
996 	0x7e280209, 0x7e2a0208,
997 	0x7e2c0207, 0x7e2e0206,
998 	0x7e300205, 0x7e320204,
999 	0x7e340203, 0x7e360202,
1000 	0x7e380201, 0x7e3a0200,
1001 	0x7e3c0209, 0x7e3e0208,
1002 	0x7e400207, 0x7e420206,
1003 	0x7e440205, 0x7e460204,
1004 	0x7e480203, 0x7e4a0202,
1005 	0x7e4c0201, 0x7e4e0200,
1006 	0x7e500209, 0x7e520208,
1007 	0x7e540207, 0x7e560206,
1008 	0x7e580205, 0x7e5a0204,
1009 	0x7e5c0203, 0x7e5e0202,
1010 	0x7e600201, 0x7e620200,
1011 	0x7e640209, 0x7e660208,
1012 	0x7e680207, 0x7e6a0206,
1013 	0x7e6c0205, 0x7e6e0204,
1014 	0x7e700203, 0x7e720202,
1015 	0x7e740201, 0x7e760200,
1016 	0x7e780209, 0x7e7a0208,
1017 	0x7e7c0207, 0x7e7e0206,
1018 	0xbf8a0000, 0xbf810000,
1019 };
1020 
1021 static const u32 sgpr_init_compute_shader[] =
1022 {
1023 	0xbe8a0100, 0xbe8c0102,
1024 	0xbe8e0104, 0xbe900106,
1025 	0xbe920108, 0xbe940100,
1026 	0xbe960102, 0xbe980104,
1027 	0xbe9a0106, 0xbe9c0108,
1028 	0xbe9e0100, 0xbea00102,
1029 	0xbea20104, 0xbea40106,
1030 	0xbea60108, 0xbea80100,
1031 	0xbeaa0102, 0xbeac0104,
1032 	0xbeae0106, 0xbeb00108,
1033 	0xbeb20100, 0xbeb40102,
1034 	0xbeb60104, 0xbeb80106,
1035 	0xbeba0108, 0xbebc0100,
1036 	0xbebe0102, 0xbec00104,
1037 	0xbec20106, 0xbec40108,
1038 	0xbec60100, 0xbec80102,
1039 	0xbee60004, 0xbee70005,
1040 	0xbeea0006, 0xbeeb0007,
1041 	0xbee80008, 0xbee90009,
1042 	0xbefc0000, 0xbf8a0000,
1043 	0xbf810000, 0x00000000,
1044 };
1045 
1046 static const u32 vgpr_init_regs[] =
1047 {
1048 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1049 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1050 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1051 	mmCOMPUTE_NUM_THREAD_Y, 1,
1052 	mmCOMPUTE_NUM_THREAD_Z, 1,
1053 	mmCOMPUTE_PGM_RSRC2, 20,
1054 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1055 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1056 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1057 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1058 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1059 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1060 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1061 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1062 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1063 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1064 };
1065 
1066 static const u32 sgpr1_init_regs[] =
1067 {
1068 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1069 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1070 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1071 	mmCOMPUTE_NUM_THREAD_Y, 1,
1072 	mmCOMPUTE_NUM_THREAD_Z, 1,
1073 	mmCOMPUTE_PGM_RSRC2, 20,
1074 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1075 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1076 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1077 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1078 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1079 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1080 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1081 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1082 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1083 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1084 };
1085 
1086 static const u32 sgpr2_init_regs[] =
1087 {
1088 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1089 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1090 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1091 	mmCOMPUTE_NUM_THREAD_Y, 1,
1092 	mmCOMPUTE_NUM_THREAD_Z, 1,
1093 	mmCOMPUTE_PGM_RSRC2, 20,
1094 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1095 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1096 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1097 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1098 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1099 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1100 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1101 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1102 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1103 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1104 };
1105 
1106 static const u32 sec_ded_counter_registers[] =
1107 {
1108 	mmCPC_EDC_ATC_CNT,
1109 	mmCPC_EDC_SCRATCH_CNT,
1110 	mmCPC_EDC_UCODE_CNT,
1111 	mmCPF_EDC_ATC_CNT,
1112 	mmCPF_EDC_ROQ_CNT,
1113 	mmCPF_EDC_TAG_CNT,
1114 	mmCPG_EDC_ATC_CNT,
1115 	mmCPG_EDC_DMA_CNT,
1116 	mmCPG_EDC_TAG_CNT,
1117 	mmDC_EDC_CSINVOC_CNT,
1118 	mmDC_EDC_RESTORE_CNT,
1119 	mmDC_EDC_STATE_CNT,
1120 	mmGDS_EDC_CNT,
1121 	mmGDS_EDC_GRBM_CNT,
1122 	mmGDS_EDC_OA_DED,
1123 	mmSPI_EDC_CNT,
1124 	mmSQC_ATC_EDC_GATCL1_CNT,
1125 	mmSQC_EDC_CNT,
1126 	mmSQ_EDC_DED_CNT,
1127 	mmSQ_EDC_INFO,
1128 	mmSQ_EDC_SEC_CNT,
1129 	mmTCC_EDC_CNT,
1130 	mmTCP_ATC_EDC_GATCL1_CNT,
1131 	mmTCP_EDC_CNT,
1132 	mmTD_EDC_CNT
1133 };
1134 
1135 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1136 {
1137 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1138 	struct amdgpu_ib ib;
1139 	struct fence *f = NULL;
1140 	int r, i;
1141 	u32 tmp;
1142 	unsigned total_size, vgpr_offset, sgpr_offset;
1143 	u64 gpu_addr;
1144 
1145 	/* only supported on CZ */
1146 	if (adev->asic_type != CHIP_CARRIZO)
1147 		return 0;
1148 
1149 	/* bail if the compute ring is not ready */
1150 	if (!ring->ready)
1151 		return 0;
1152 
1153 	tmp = RREG32(mmGB_EDC_MODE);
1154 	WREG32(mmGB_EDC_MODE, 0);
1155 
1156 	total_size =
1157 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1158 	total_size +=
1159 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1160 	total_size +=
1161 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162 	total_size = ALIGN(total_size, 256);
1163 	vgpr_offset = total_size;
1164 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1165 	sgpr_offset = total_size;
1166 	total_size += sizeof(sgpr_init_compute_shader);
1167 
1168 	/* allocate an indirect buffer to put the commands in */
1169 	memset(&ib, 0, sizeof(ib));
1170 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1171 	if (r) {
1172 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1173 		return r;
1174 	}
1175 
1176 	/* load the compute shaders */
1177 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1178 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1179 
1180 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1181 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1182 
1183 	/* init the ib length to 0 */
1184 	ib.length_dw = 0;
1185 
1186 	/* VGPR */
1187 	/* write the register state for the compute dispatch */
1188 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1189 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1190 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1191 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1192 	}
1193 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1194 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1195 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1196 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1197 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1198 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1199 
1200 	/* write dispatch packet */
1201 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1202 	ib.ptr[ib.length_dw++] = 8; /* x */
1203 	ib.ptr[ib.length_dw++] = 1; /* y */
1204 	ib.ptr[ib.length_dw++] = 1; /* z */
1205 	ib.ptr[ib.length_dw++] =
1206 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1207 
1208 	/* write CS partial flush packet */
1209 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1210 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1211 
1212 	/* SGPR1 */
1213 	/* write the register state for the compute dispatch */
1214 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1215 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1216 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1217 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1218 	}
1219 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1220 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1221 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1222 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1223 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1224 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1225 
1226 	/* write dispatch packet */
1227 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1228 	ib.ptr[ib.length_dw++] = 8; /* x */
1229 	ib.ptr[ib.length_dw++] = 1; /* y */
1230 	ib.ptr[ib.length_dw++] = 1; /* z */
1231 	ib.ptr[ib.length_dw++] =
1232 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1233 
1234 	/* write CS partial flush packet */
1235 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1236 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1237 
1238 	/* SGPR2 */
1239 	/* write the register state for the compute dispatch */
1240 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1241 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1242 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1243 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1244 	}
1245 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1246 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1247 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1248 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1249 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1250 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1251 
1252 	/* write dispatch packet */
1253 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1254 	ib.ptr[ib.length_dw++] = 8; /* x */
1255 	ib.ptr[ib.length_dw++] = 1; /* y */
1256 	ib.ptr[ib.length_dw++] = 1; /* z */
1257 	ib.ptr[ib.length_dw++] =
1258 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1259 
1260 	/* write CS partial flush packet */
1261 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1262 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1263 
1264 	/* shedule the ib on the ring */
1265 	r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
1266 			       NULL, &f);
1267 	if (r) {
1268 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1269 		goto fail;
1270 	}
1271 
1272 	/* wait for the GPU to finish processing the IB */
1273 	r = fence_wait(f, false);
1274 	if (r) {
1275 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1276 		goto fail;
1277 	}
1278 
1279 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1280 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1281 	WREG32(mmGB_EDC_MODE, tmp);
1282 
1283 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1284 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1285 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1286 
1287 
1288 	/* read back registers to clear the counters */
1289 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1290 		RREG32(sec_ded_counter_registers[i]);
1291 
1292 fail:
1293 	fence_put(f);
1294 	amdgpu_ib_free(adev, &ib);
1295 
1296 	return r;
1297 }
1298 
1299 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1300 {
1301 	u32 gb_addr_config;
1302 	u32 mc_shared_chmap, mc_arb_ramcfg;
1303 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1304 	u32 tmp;
1305 
1306 	switch (adev->asic_type) {
1307 	case CHIP_TOPAZ:
1308 		adev->gfx.config.max_shader_engines = 1;
1309 		adev->gfx.config.max_tile_pipes = 2;
1310 		adev->gfx.config.max_cu_per_sh = 6;
1311 		adev->gfx.config.max_sh_per_se = 1;
1312 		adev->gfx.config.max_backends_per_se = 2;
1313 		adev->gfx.config.max_texture_channel_caches = 2;
1314 		adev->gfx.config.max_gprs = 256;
1315 		adev->gfx.config.max_gs_threads = 32;
1316 		adev->gfx.config.max_hw_contexts = 8;
1317 
1318 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1319 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1320 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1321 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1322 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1323 		break;
1324 	case CHIP_FIJI:
1325 		adev->gfx.config.max_shader_engines = 4;
1326 		adev->gfx.config.max_tile_pipes = 16;
1327 		adev->gfx.config.max_cu_per_sh = 16;
1328 		adev->gfx.config.max_sh_per_se = 1;
1329 		adev->gfx.config.max_backends_per_se = 4;
1330 		adev->gfx.config.max_texture_channel_caches = 16;
1331 		adev->gfx.config.max_gprs = 256;
1332 		adev->gfx.config.max_gs_threads = 32;
1333 		adev->gfx.config.max_hw_contexts = 8;
1334 
1335 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1336 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1337 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1338 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1339 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1340 		break;
1341 	case CHIP_TONGA:
1342 		adev->gfx.config.max_shader_engines = 4;
1343 		adev->gfx.config.max_tile_pipes = 8;
1344 		adev->gfx.config.max_cu_per_sh = 8;
1345 		adev->gfx.config.max_sh_per_se = 1;
1346 		adev->gfx.config.max_backends_per_se = 2;
1347 		adev->gfx.config.max_texture_channel_caches = 8;
1348 		adev->gfx.config.max_gprs = 256;
1349 		adev->gfx.config.max_gs_threads = 32;
1350 		adev->gfx.config.max_hw_contexts = 8;
1351 
1352 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1353 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1354 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1355 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1356 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1357 		break;
1358 	case CHIP_CARRIZO:
1359 		adev->gfx.config.max_shader_engines = 1;
1360 		adev->gfx.config.max_tile_pipes = 2;
1361 		adev->gfx.config.max_sh_per_se = 1;
1362 		adev->gfx.config.max_backends_per_se = 2;
1363 
1364 		switch (adev->pdev->revision) {
1365 		case 0xc4:
1366 		case 0x84:
1367 		case 0xc8:
1368 		case 0xcc:
1369 		case 0xe1:
1370 		case 0xe3:
1371 			/* B10 */
1372 			adev->gfx.config.max_cu_per_sh = 8;
1373 			break;
1374 		case 0xc5:
1375 		case 0x81:
1376 		case 0x85:
1377 		case 0xc9:
1378 		case 0xcd:
1379 		case 0xe2:
1380 		case 0xe4:
1381 			/* B8 */
1382 			adev->gfx.config.max_cu_per_sh = 6;
1383 			break;
1384 		case 0xc6:
1385 		case 0xca:
1386 		case 0xce:
1387 		case 0x88:
1388 			/* B6 */
1389 			adev->gfx.config.max_cu_per_sh = 6;
1390 			break;
1391 		case 0xc7:
1392 		case 0x87:
1393 		case 0xcb:
1394 		case 0xe5:
1395 		case 0x89:
1396 		default:
1397 			/* B4 */
1398 			adev->gfx.config.max_cu_per_sh = 4;
1399 			break;
1400 		}
1401 
1402 		adev->gfx.config.max_texture_channel_caches = 2;
1403 		adev->gfx.config.max_gprs = 256;
1404 		adev->gfx.config.max_gs_threads = 32;
1405 		adev->gfx.config.max_hw_contexts = 8;
1406 
1407 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1408 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1409 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1410 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1411 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1412 		break;
1413 	case CHIP_STONEY:
1414 		adev->gfx.config.max_shader_engines = 1;
1415 		adev->gfx.config.max_tile_pipes = 2;
1416 		adev->gfx.config.max_sh_per_se = 1;
1417 		adev->gfx.config.max_backends_per_se = 1;
1418 
1419 		switch (adev->pdev->revision) {
1420 		case 0xc0:
1421 		case 0xc1:
1422 		case 0xc2:
1423 		case 0xc4:
1424 		case 0xc8:
1425 		case 0xc9:
1426 			adev->gfx.config.max_cu_per_sh = 3;
1427 			break;
1428 		case 0xd0:
1429 		case 0xd1:
1430 		case 0xd2:
1431 		default:
1432 			adev->gfx.config.max_cu_per_sh = 2;
1433 			break;
1434 		}
1435 
1436 		adev->gfx.config.max_texture_channel_caches = 2;
1437 		adev->gfx.config.max_gprs = 256;
1438 		adev->gfx.config.max_gs_threads = 16;
1439 		adev->gfx.config.max_hw_contexts = 8;
1440 
1441 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1442 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1443 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1444 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1445 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1446 		break;
1447 	default:
1448 		adev->gfx.config.max_shader_engines = 2;
1449 		adev->gfx.config.max_tile_pipes = 4;
1450 		adev->gfx.config.max_cu_per_sh = 2;
1451 		adev->gfx.config.max_sh_per_se = 1;
1452 		adev->gfx.config.max_backends_per_se = 2;
1453 		adev->gfx.config.max_texture_channel_caches = 4;
1454 		adev->gfx.config.max_gprs = 256;
1455 		adev->gfx.config.max_gs_threads = 32;
1456 		adev->gfx.config.max_hw_contexts = 8;
1457 
1458 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1459 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1460 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1461 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1462 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1463 		break;
1464 	}
1465 
1466 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1467 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1468 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1469 
1470 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1471 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1472 	if (adev->flags & AMD_IS_APU) {
1473 		/* Get memory bank mapping mode. */
1474 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1475 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1476 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1477 
1478 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1479 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1480 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1481 
1482 		/* Validate settings in case only one DIMM installed. */
1483 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1484 			dimm00_addr_map = 0;
1485 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1486 			dimm01_addr_map = 0;
1487 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1488 			dimm10_addr_map = 0;
1489 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1490 			dimm11_addr_map = 0;
1491 
1492 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1493 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1494 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1495 			adev->gfx.config.mem_row_size_in_kb = 2;
1496 		else
1497 			adev->gfx.config.mem_row_size_in_kb = 1;
1498 	} else {
1499 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1500 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1501 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1502 			adev->gfx.config.mem_row_size_in_kb = 4;
1503 	}
1504 
1505 	adev->gfx.config.shader_engine_tile_size = 32;
1506 	adev->gfx.config.num_gpus = 1;
1507 	adev->gfx.config.multi_gpu_tile_size = 64;
1508 
1509 	/* fix up row size */
1510 	switch (adev->gfx.config.mem_row_size_in_kb) {
1511 	case 1:
1512 	default:
1513 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1514 		break;
1515 	case 2:
1516 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1517 		break;
1518 	case 4:
1519 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1520 		break;
1521 	}
1522 	adev->gfx.config.gb_addr_config = gb_addr_config;
1523 }
1524 
1525 static int gfx_v8_0_sw_init(void *handle)
1526 {
1527 	int i, r;
1528 	struct amdgpu_ring *ring;
1529 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1530 
1531 	/* EOP Event */
1532 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1533 	if (r)
1534 		return r;
1535 
1536 	/* Privileged reg */
1537 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1538 	if (r)
1539 		return r;
1540 
1541 	/* Privileged inst */
1542 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1543 	if (r)
1544 		return r;
1545 
1546 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1547 
1548 	gfx_v8_0_scratch_init(adev);
1549 
1550 	r = gfx_v8_0_init_microcode(adev);
1551 	if (r) {
1552 		DRM_ERROR("Failed to load gfx firmware!\n");
1553 		return r;
1554 	}
1555 
1556 	r = gfx_v8_0_mec_init(adev);
1557 	if (r) {
1558 		DRM_ERROR("Failed to init MEC BOs!\n");
1559 		return r;
1560 	}
1561 
1562 	/* set up the gfx ring */
1563 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1564 		ring = &adev->gfx.gfx_ring[i];
1565 		ring->ring_obj = NULL;
1566 		sprintf(ring->name, "gfx");
1567 		/* no gfx doorbells on iceland */
1568 		if (adev->asic_type != CHIP_TOPAZ) {
1569 			ring->use_doorbell = true;
1570 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1571 		}
1572 
1573 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1574 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1575 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1576 				     AMDGPU_RING_TYPE_GFX);
1577 		if (r)
1578 			return r;
1579 	}
1580 
1581 	/* set up the compute queues */
1582 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1583 		unsigned irq_type;
1584 
1585 		/* max 32 queues per MEC */
1586 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1587 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1588 			break;
1589 		}
1590 		ring = &adev->gfx.compute_ring[i];
1591 		ring->ring_obj = NULL;
1592 		ring->use_doorbell = true;
1593 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1594 		ring->me = 1; /* first MEC */
1595 		ring->pipe = i / 8;
1596 		ring->queue = i % 8;
1597 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1598 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1599 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1600 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1601 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1602 				     &adev->gfx.eop_irq, irq_type,
1603 				     AMDGPU_RING_TYPE_COMPUTE);
1604 		if (r)
1605 			return r;
1606 	}
1607 
1608 	/* reserve GDS, GWS and OA resource for gfx */
1609 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1610 			PAGE_SIZE, true,
1611 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1612 			NULL, &adev->gds.gds_gfx_bo);
1613 	if (r)
1614 		return r;
1615 
1616 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1617 		PAGE_SIZE, true,
1618 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1619 		NULL, &adev->gds.gws_gfx_bo);
1620 	if (r)
1621 		return r;
1622 
1623 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1624 			PAGE_SIZE, true,
1625 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1626 			NULL, &adev->gds.oa_gfx_bo);
1627 	if (r)
1628 		return r;
1629 
1630 	adev->gfx.ce_ram_size = 0x8000;
1631 
1632 	gfx_v8_0_gpu_early_init(adev);
1633 
1634 	return 0;
1635 }
1636 
1637 static int gfx_v8_0_sw_fini(void *handle)
1638 {
1639 	int i;
1640 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1641 
1642 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1643 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1644 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1645 
1646 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1647 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1648 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1649 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1650 
1651 	gfx_v8_0_mec_fini(adev);
1652 
1653 	return 0;
1654 }
1655 
1656 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1657 {
1658 	uint32_t *modearray, *mod2array;
1659 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1660 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1661 	u32 reg_offset;
1662 
1663 	modearray = adev->gfx.config.tile_mode_array;
1664 	mod2array = adev->gfx.config.macrotile_mode_array;
1665 
1666 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1667 		modearray[reg_offset] = 0;
1668 
1669 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1670 		mod2array[reg_offset] = 0;
1671 
1672 	switch (adev->asic_type) {
1673 	case CHIP_TOPAZ:
1674 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675 				PIPE_CONFIG(ADDR_SURF_P2) |
1676 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1677 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1679 				PIPE_CONFIG(ADDR_SURF_P2) |
1680 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1681 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1683 				PIPE_CONFIG(ADDR_SURF_P2) |
1684 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1685 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1687 				PIPE_CONFIG(ADDR_SURF_P2) |
1688 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1689 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1690 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1691 				PIPE_CONFIG(ADDR_SURF_P2) |
1692 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1693 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1694 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1695 				PIPE_CONFIG(ADDR_SURF_P2) |
1696 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1697 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1698 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1699 				PIPE_CONFIG(ADDR_SURF_P2) |
1700 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1701 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1702 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1703 				PIPE_CONFIG(ADDR_SURF_P2));
1704 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1705 				PIPE_CONFIG(ADDR_SURF_P2) |
1706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1707 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1709 				 PIPE_CONFIG(ADDR_SURF_P2) |
1710 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1711 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713 				 PIPE_CONFIG(ADDR_SURF_P2) |
1714 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1715 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1717 				 PIPE_CONFIG(ADDR_SURF_P2) |
1718 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1720 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1721 				 PIPE_CONFIG(ADDR_SURF_P2) |
1722 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1723 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1724 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1725 				 PIPE_CONFIG(ADDR_SURF_P2) |
1726 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1727 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1728 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1729 				 PIPE_CONFIG(ADDR_SURF_P2) |
1730 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1731 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1732 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1733 				 PIPE_CONFIG(ADDR_SURF_P2) |
1734 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1735 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1737 				 PIPE_CONFIG(ADDR_SURF_P2) |
1738 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1739 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1741 				 PIPE_CONFIG(ADDR_SURF_P2) |
1742 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1745 				 PIPE_CONFIG(ADDR_SURF_P2) |
1746 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1749 				 PIPE_CONFIG(ADDR_SURF_P2) |
1750 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1751 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1752 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1753 				 PIPE_CONFIG(ADDR_SURF_P2) |
1754 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1755 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1756 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1757 				 PIPE_CONFIG(ADDR_SURF_P2) |
1758 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1759 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1760 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1761 				 PIPE_CONFIG(ADDR_SURF_P2) |
1762 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1763 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1764 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1765 				 PIPE_CONFIG(ADDR_SURF_P2) |
1766 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1767 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1768 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1769 				 PIPE_CONFIG(ADDR_SURF_P2) |
1770 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1771 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1772 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1773 				 PIPE_CONFIG(ADDR_SURF_P2) |
1774 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1775 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1776 
1777 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1778 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1779 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780 				NUM_BANKS(ADDR_SURF_8_BANK));
1781 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1782 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1783 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784 				NUM_BANKS(ADDR_SURF_8_BANK));
1785 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1786 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1787 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788 				NUM_BANKS(ADDR_SURF_8_BANK));
1789 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1790 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1791 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792 				NUM_BANKS(ADDR_SURF_8_BANK));
1793 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1794 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1795 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1796 				NUM_BANKS(ADDR_SURF_8_BANK));
1797 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1798 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1799 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1800 				NUM_BANKS(ADDR_SURF_8_BANK));
1801 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1802 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1803 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1804 				NUM_BANKS(ADDR_SURF_8_BANK));
1805 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1806 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1807 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808 				NUM_BANKS(ADDR_SURF_16_BANK));
1809 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1810 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1811 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812 				NUM_BANKS(ADDR_SURF_16_BANK));
1813 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1814 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1815 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1816 				 NUM_BANKS(ADDR_SURF_16_BANK));
1817 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1818 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1819 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1820 				 NUM_BANKS(ADDR_SURF_16_BANK));
1821 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1822 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1823 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1824 				 NUM_BANKS(ADDR_SURF_16_BANK));
1825 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1826 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1827 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1828 				 NUM_BANKS(ADDR_SURF_16_BANK));
1829 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1830 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1831 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1832 				 NUM_BANKS(ADDR_SURF_8_BANK));
1833 
1834 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1835 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1836 			    reg_offset != 23)
1837 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1838 
1839 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1840 			if (reg_offset != 7)
1841 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1842 
1843 		break;
1844 	case CHIP_FIJI:
1845 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1848 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1850 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1852 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1854 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1856 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1858 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1859 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1860 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1862 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1863 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1864 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1865 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1866 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1867 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1868 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1869 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1870 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1871 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1872 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1873 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1874 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1875 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1876 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1877 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1878 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1879 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1882 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1886 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1888 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1890 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1891 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1893 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1894 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1896 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1897 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1899 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1900 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1903 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1904 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1906 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1907 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1908 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1910 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1911 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1912 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1913 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1914 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1915 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1916 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1918 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1920 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1921 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1926 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1928 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1932 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1936 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1937 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1938 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1939 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1940 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1942 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1943 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1944 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1946 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1947 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1948 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1949 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1950 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1951 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1952 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1953 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1954 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1955 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1956 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1957 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1958 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1959 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1960 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1961 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1962 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1963 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1964 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1965 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1966 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1967 
1968 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1971 				NUM_BANKS(ADDR_SURF_8_BANK));
1972 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1975 				NUM_BANKS(ADDR_SURF_8_BANK));
1976 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1978 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1979 				NUM_BANKS(ADDR_SURF_8_BANK));
1980 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1982 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983 				NUM_BANKS(ADDR_SURF_8_BANK));
1984 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1986 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1987 				NUM_BANKS(ADDR_SURF_8_BANK));
1988 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1990 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991 				NUM_BANKS(ADDR_SURF_8_BANK));
1992 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995 				NUM_BANKS(ADDR_SURF_8_BANK));
1996 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1998 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999 				NUM_BANKS(ADDR_SURF_8_BANK));
2000 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2002 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003 				NUM_BANKS(ADDR_SURF_8_BANK));
2004 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2006 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007 				 NUM_BANKS(ADDR_SURF_8_BANK));
2008 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2009 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2010 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2011 				 NUM_BANKS(ADDR_SURF_8_BANK));
2012 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2013 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2014 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2015 				 NUM_BANKS(ADDR_SURF_8_BANK));
2016 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2017 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2018 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2019 				 NUM_BANKS(ADDR_SURF_8_BANK));
2020 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2021 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2022 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2023 				 NUM_BANKS(ADDR_SURF_4_BANK));
2024 
2025 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2027 
2028 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2029 			if (reg_offset != 7)
2030 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2031 
2032 		break;
2033 	case CHIP_TONGA:
2034 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2037 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2041 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2043 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2045 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2047 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2048 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2049 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2051 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2052 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2053 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2054 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2055 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2056 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2057 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2058 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2059 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2060 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2061 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2062 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2063 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2064 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2065 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2066 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2067 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2068 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2071 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2075 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2077 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2079 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2080 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2082 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2083 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2085 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2086 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2089 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2093 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2095 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2096 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2097 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2099 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2100 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2101 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2102 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2103 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2104 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2105 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2107 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2109 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2110 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2115 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2117 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2121 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2125 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2126 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2127 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2128 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2129 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2132 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2133 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2135 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2136 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2137 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2138 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2139 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2140 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2142 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2143 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2146 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2147 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2150 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2151 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2153 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2154 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2155 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2156 
2157 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2159 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2160 				NUM_BANKS(ADDR_SURF_16_BANK));
2161 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2163 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2164 				NUM_BANKS(ADDR_SURF_16_BANK));
2165 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2167 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2168 				NUM_BANKS(ADDR_SURF_16_BANK));
2169 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2171 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172 				NUM_BANKS(ADDR_SURF_16_BANK));
2173 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176 				NUM_BANKS(ADDR_SURF_16_BANK));
2177 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2179 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2180 				NUM_BANKS(ADDR_SURF_16_BANK));
2181 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2184 				NUM_BANKS(ADDR_SURF_16_BANK));
2185 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2187 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2188 				NUM_BANKS(ADDR_SURF_16_BANK));
2189 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2192 				NUM_BANKS(ADDR_SURF_16_BANK));
2193 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2195 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 				 NUM_BANKS(ADDR_SURF_16_BANK));
2197 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2200 				 NUM_BANKS(ADDR_SURF_16_BANK));
2201 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2203 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2204 				 NUM_BANKS(ADDR_SURF_8_BANK));
2205 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2208 				 NUM_BANKS(ADDR_SURF_4_BANK));
2209 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2212 				 NUM_BANKS(ADDR_SURF_4_BANK));
2213 
2214 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2215 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2216 
2217 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2218 			if (reg_offset != 7)
2219 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2220 
2221 		break;
2222 	case CHIP_STONEY:
2223 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 				PIPE_CONFIG(ADDR_SURF_P2) |
2225 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2226 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228 				PIPE_CONFIG(ADDR_SURF_P2) |
2229 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2230 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2232 				PIPE_CONFIG(ADDR_SURF_P2) |
2233 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2234 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2236 				PIPE_CONFIG(ADDR_SURF_P2) |
2237 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2238 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2240 				PIPE_CONFIG(ADDR_SURF_P2) |
2241 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244 				PIPE_CONFIG(ADDR_SURF_P2) |
2245 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2246 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2247 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248 				PIPE_CONFIG(ADDR_SURF_P2) |
2249 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2250 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2251 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2252 				PIPE_CONFIG(ADDR_SURF_P2));
2253 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2254 				PIPE_CONFIG(ADDR_SURF_P2) |
2255 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 				 PIPE_CONFIG(ADDR_SURF_P2) |
2259 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262 				 PIPE_CONFIG(ADDR_SURF_P2) |
2263 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2264 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266 				 PIPE_CONFIG(ADDR_SURF_P2) |
2267 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2270 				 PIPE_CONFIG(ADDR_SURF_P2) |
2271 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2274 				 PIPE_CONFIG(ADDR_SURF_P2) |
2275 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2277 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278 				 PIPE_CONFIG(ADDR_SURF_P2) |
2279 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282 				 PIPE_CONFIG(ADDR_SURF_P2) |
2283 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286 				 PIPE_CONFIG(ADDR_SURF_P2) |
2287 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290 				 PIPE_CONFIG(ADDR_SURF_P2) |
2291 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294 				 PIPE_CONFIG(ADDR_SURF_P2) |
2295 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298 				 PIPE_CONFIG(ADDR_SURF_P2) |
2299 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2302 				 PIPE_CONFIG(ADDR_SURF_P2) |
2303 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2304 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2306 				 PIPE_CONFIG(ADDR_SURF_P2) |
2307 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2308 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2310 				 PIPE_CONFIG(ADDR_SURF_P2) |
2311 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2314 				 PIPE_CONFIG(ADDR_SURF_P2) |
2315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2317 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2318 				 PIPE_CONFIG(ADDR_SURF_P2) |
2319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2322 				 PIPE_CONFIG(ADDR_SURF_P2) |
2323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2325 
2326 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2328 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2329 				NUM_BANKS(ADDR_SURF_8_BANK));
2330 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2332 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2333 				NUM_BANKS(ADDR_SURF_8_BANK));
2334 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337 				NUM_BANKS(ADDR_SURF_8_BANK));
2338 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2340 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341 				NUM_BANKS(ADDR_SURF_8_BANK));
2342 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2344 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345 				NUM_BANKS(ADDR_SURF_8_BANK));
2346 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2348 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349 				NUM_BANKS(ADDR_SURF_8_BANK));
2350 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2352 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2353 				NUM_BANKS(ADDR_SURF_8_BANK));
2354 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2355 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2356 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357 				NUM_BANKS(ADDR_SURF_16_BANK));
2358 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2359 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2360 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361 				NUM_BANKS(ADDR_SURF_16_BANK));
2362 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2363 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2364 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2365 				 NUM_BANKS(ADDR_SURF_16_BANK));
2366 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2367 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2368 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2369 				 NUM_BANKS(ADDR_SURF_16_BANK));
2370 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2373 				 NUM_BANKS(ADDR_SURF_16_BANK));
2374 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2377 				 NUM_BANKS(ADDR_SURF_16_BANK));
2378 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2380 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381 				 NUM_BANKS(ADDR_SURF_8_BANK));
2382 
2383 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2384 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2385 			    reg_offset != 23)
2386 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2387 
2388 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2389 			if (reg_offset != 7)
2390 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2391 
2392 		break;
2393 	default:
2394 		dev_warn(adev->dev,
2395 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2396 			 adev->asic_type);
2397 
2398 	case CHIP_CARRIZO:
2399 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 				PIPE_CONFIG(ADDR_SURF_P2) |
2401 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2402 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404 				PIPE_CONFIG(ADDR_SURF_P2) |
2405 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2406 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408 				PIPE_CONFIG(ADDR_SURF_P2) |
2409 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2410 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 				PIPE_CONFIG(ADDR_SURF_P2) |
2413 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2414 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416 				PIPE_CONFIG(ADDR_SURF_P2) |
2417 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2418 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420 				PIPE_CONFIG(ADDR_SURF_P2) |
2421 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424 				PIPE_CONFIG(ADDR_SURF_P2) |
2425 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2428 				PIPE_CONFIG(ADDR_SURF_P2));
2429 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 				PIPE_CONFIG(ADDR_SURF_P2) |
2431 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 				 PIPE_CONFIG(ADDR_SURF_P2) |
2435 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2436 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438 				 PIPE_CONFIG(ADDR_SURF_P2) |
2439 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2442 				 PIPE_CONFIG(ADDR_SURF_P2) |
2443 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2446 				 PIPE_CONFIG(ADDR_SURF_P2) |
2447 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2448 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2449 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2450 				 PIPE_CONFIG(ADDR_SURF_P2) |
2451 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2452 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2453 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2454 				 PIPE_CONFIG(ADDR_SURF_P2) |
2455 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2457 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2458 				 PIPE_CONFIG(ADDR_SURF_P2) |
2459 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2462 				 PIPE_CONFIG(ADDR_SURF_P2) |
2463 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2464 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2466 				 PIPE_CONFIG(ADDR_SURF_P2) |
2467 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2470 				 PIPE_CONFIG(ADDR_SURF_P2) |
2471 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2474 				 PIPE_CONFIG(ADDR_SURF_P2) |
2475 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2476 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2478 				 PIPE_CONFIG(ADDR_SURF_P2) |
2479 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2480 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2482 				 PIPE_CONFIG(ADDR_SURF_P2) |
2483 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2486 				 PIPE_CONFIG(ADDR_SURF_P2) |
2487 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2490 				 PIPE_CONFIG(ADDR_SURF_P2) |
2491 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2492 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2493 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2494 				 PIPE_CONFIG(ADDR_SURF_P2) |
2495 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2496 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2498 				 PIPE_CONFIG(ADDR_SURF_P2) |
2499 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2500 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2501 
2502 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 				NUM_BANKS(ADDR_SURF_8_BANK));
2506 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509 				NUM_BANKS(ADDR_SURF_8_BANK));
2510 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513 				NUM_BANKS(ADDR_SURF_8_BANK));
2514 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2516 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2517 				NUM_BANKS(ADDR_SURF_8_BANK));
2518 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521 				NUM_BANKS(ADDR_SURF_8_BANK));
2522 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2525 				NUM_BANKS(ADDR_SURF_8_BANK));
2526 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 				NUM_BANKS(ADDR_SURF_8_BANK));
2530 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2531 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2532 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533 				NUM_BANKS(ADDR_SURF_16_BANK));
2534 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2535 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537 				NUM_BANKS(ADDR_SURF_16_BANK));
2538 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2539 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2541 				 NUM_BANKS(ADDR_SURF_16_BANK));
2542 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2543 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2545 				 NUM_BANKS(ADDR_SURF_16_BANK));
2546 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2549 				 NUM_BANKS(ADDR_SURF_16_BANK));
2550 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2553 				 NUM_BANKS(ADDR_SURF_16_BANK));
2554 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2556 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 				 NUM_BANKS(ADDR_SURF_8_BANK));
2558 
2559 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2560 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2561 			    reg_offset != 23)
2562 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2563 
2564 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2565 			if (reg_offset != 7)
2566 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2567 
2568 		break;
2569 	}
2570 }
2571 
2572 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2573 {
2574 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2575 
2576 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2577 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2578 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2579 	} else if (se_num == 0xffffffff) {
2580 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2581 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2582 	} else if (sh_num == 0xffffffff) {
2583 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2584 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2585 	} else {
2586 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2587 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2588 	}
2589 	WREG32(mmGRBM_GFX_INDEX, data);
2590 }
2591 
2592 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2593 {
2594 	return (u32)((1ULL << bit_width) - 1);
2595 }
2596 
2597 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2598 {
2599 	u32 data, mask;
2600 
2601 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2602 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2603 
2604 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2605 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2606 
2607 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2608 				       adev->gfx.config.max_sh_per_se);
2609 
2610 	return (~data) & mask;
2611 }
2612 
2613 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2614 {
2615 	int i, j;
2616 	u32 data;
2617 	u32 active_rbs = 0;
2618 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2619 					adev->gfx.config.max_sh_per_se;
2620 
2621 	mutex_lock(&adev->grbm_idx_mutex);
2622 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2623 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2624 			gfx_v8_0_select_se_sh(adev, i, j);
2625 			data = gfx_v8_0_get_rb_active_bitmap(adev);
2626 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2627 					       rb_bitmap_width_per_sh);
2628 		}
2629 	}
2630 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2631 	mutex_unlock(&adev->grbm_idx_mutex);
2632 
2633 	adev->gfx.config.backend_enable_mask = active_rbs;
2634 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2635 }
2636 
2637 /**
2638  * gfx_v8_0_init_compute_vmid - gart enable
2639  *
2640  * @rdev: amdgpu_device pointer
2641  *
2642  * Initialize compute vmid sh_mem registers
2643  *
2644  */
2645 #define DEFAULT_SH_MEM_BASES	(0x6000)
2646 #define FIRST_COMPUTE_VMID	(8)
2647 #define LAST_COMPUTE_VMID	(16)
2648 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2649 {
2650 	int i;
2651 	uint32_t sh_mem_config;
2652 	uint32_t sh_mem_bases;
2653 
2654 	/*
2655 	 * Configure apertures:
2656 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2657 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2658 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2659 	 */
2660 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2661 
2662 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2663 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2664 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2665 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2666 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2667 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2668 
2669 	mutex_lock(&adev->srbm_mutex);
2670 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2671 		vi_srbm_select(adev, 0, 0, 0, i);
2672 		/* CP and shaders */
2673 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2674 		WREG32(mmSH_MEM_APE1_BASE, 1);
2675 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2676 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2677 	}
2678 	vi_srbm_select(adev, 0, 0, 0, 0);
2679 	mutex_unlock(&adev->srbm_mutex);
2680 }
2681 
2682 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2683 {
2684 	u32 tmp;
2685 	int i;
2686 
2687 	tmp = RREG32(mmGRBM_CNTL);
2688 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2689 	WREG32(mmGRBM_CNTL, tmp);
2690 
2691 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2692 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2694 
2695 	gfx_v8_0_tiling_mode_table_init(adev);
2696 
2697 	gfx_v8_0_setup_rb(adev);
2698 
2699 	/* XXX SH_MEM regs */
2700 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2701 	mutex_lock(&adev->srbm_mutex);
2702 	for (i = 0; i < 16; i++) {
2703 		vi_srbm_select(adev, 0, 0, 0, i);
2704 		/* CP and shaders */
2705 		if (i == 0) {
2706 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2707 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2708 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2709 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2710 			WREG32(mmSH_MEM_CONFIG, tmp);
2711 		} else {
2712 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2713 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2714 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2715 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2716 			WREG32(mmSH_MEM_CONFIG, tmp);
2717 		}
2718 
2719 		WREG32(mmSH_MEM_APE1_BASE, 1);
2720 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2721 		WREG32(mmSH_MEM_BASES, 0);
2722 	}
2723 	vi_srbm_select(adev, 0, 0, 0, 0);
2724 	mutex_unlock(&adev->srbm_mutex);
2725 
2726 	gfx_v8_0_init_compute_vmid(adev);
2727 
2728 	mutex_lock(&adev->grbm_idx_mutex);
2729 	/*
2730 	 * making sure that the following register writes will be broadcasted
2731 	 * to all the shaders
2732 	 */
2733 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2734 
2735 	WREG32(mmPA_SC_FIFO_SIZE,
2736 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2737 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2738 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2739 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2740 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2741 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2742 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2743 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2744 	mutex_unlock(&adev->grbm_idx_mutex);
2745 
2746 }
2747 
2748 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2749 {
2750 	u32 i, j, k;
2751 	u32 mask;
2752 
2753 	mutex_lock(&adev->grbm_idx_mutex);
2754 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2755 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2756 			gfx_v8_0_select_se_sh(adev, i, j);
2757 			for (k = 0; k < adev->usec_timeout; k++) {
2758 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2759 					break;
2760 				udelay(1);
2761 			}
2762 		}
2763 	}
2764 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2765 	mutex_unlock(&adev->grbm_idx_mutex);
2766 
2767 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2768 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2769 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2770 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2771 	for (k = 0; k < adev->usec_timeout; k++) {
2772 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2773 			break;
2774 		udelay(1);
2775 	}
2776 }
2777 
2778 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2779 					       bool enable)
2780 {
2781 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2782 
2783 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2784 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2785 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2786 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2787 
2788 	WREG32(mmCP_INT_CNTL_RING0, tmp);
2789 }
2790 
2791 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2792 {
2793 	u32 tmp = RREG32(mmRLC_CNTL);
2794 
2795 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2796 	WREG32(mmRLC_CNTL, tmp);
2797 
2798 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2799 
2800 	gfx_v8_0_wait_for_rlc_serdes(adev);
2801 }
2802 
2803 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2804 {
2805 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2806 
2807 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2808 	WREG32(mmGRBM_SOFT_RESET, tmp);
2809 	udelay(50);
2810 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2811 	WREG32(mmGRBM_SOFT_RESET, tmp);
2812 	udelay(50);
2813 }
2814 
2815 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2816 {
2817 	u32 tmp = RREG32(mmRLC_CNTL);
2818 
2819 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2820 	WREG32(mmRLC_CNTL, tmp);
2821 
2822 	/* carrizo do enable cp interrupt after cp inited */
2823 	if (!(adev->flags & AMD_IS_APU))
2824 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2825 
2826 	udelay(50);
2827 }
2828 
2829 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2830 {
2831 	const struct rlc_firmware_header_v2_0 *hdr;
2832 	const __le32 *fw_data;
2833 	unsigned i, fw_size;
2834 
2835 	if (!adev->gfx.rlc_fw)
2836 		return -EINVAL;
2837 
2838 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2839 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2840 
2841 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2842 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2843 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2844 
2845 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2846 	for (i = 0; i < fw_size; i++)
2847 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2848 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2849 
2850 	return 0;
2851 }
2852 
2853 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2854 {
2855 	int r;
2856 
2857 	gfx_v8_0_rlc_stop(adev);
2858 
2859 	/* disable CG */
2860 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2861 
2862 	/* disable PG */
2863 	WREG32(mmRLC_PG_CNTL, 0);
2864 
2865 	gfx_v8_0_rlc_reset(adev);
2866 
2867 	if (!adev->pp_enabled) {
2868 		if (!adev->firmware.smu_load) {
2869 			/* legacy rlc firmware loading */
2870 			r = gfx_v8_0_rlc_load_microcode(adev);
2871 			if (r)
2872 				return r;
2873 		} else {
2874 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2875 							AMDGPU_UCODE_ID_RLC_G);
2876 			if (r)
2877 				return -EINVAL;
2878 		}
2879 	}
2880 
2881 	gfx_v8_0_rlc_start(adev);
2882 
2883 	return 0;
2884 }
2885 
2886 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2887 {
2888 	int i;
2889 	u32 tmp = RREG32(mmCP_ME_CNTL);
2890 
2891 	if (enable) {
2892 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2893 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2894 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2895 	} else {
2896 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2897 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2898 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2899 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2900 			adev->gfx.gfx_ring[i].ready = false;
2901 	}
2902 	WREG32(mmCP_ME_CNTL, tmp);
2903 	udelay(50);
2904 }
2905 
2906 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2907 {
2908 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2909 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2910 	const struct gfx_firmware_header_v1_0 *me_hdr;
2911 	const __le32 *fw_data;
2912 	unsigned i, fw_size;
2913 
2914 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2915 		return -EINVAL;
2916 
2917 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2918 		adev->gfx.pfp_fw->data;
2919 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2920 		adev->gfx.ce_fw->data;
2921 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2922 		adev->gfx.me_fw->data;
2923 
2924 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2925 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2926 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2927 
2928 	gfx_v8_0_cp_gfx_enable(adev, false);
2929 
2930 	/* PFP */
2931 	fw_data = (const __le32 *)
2932 		(adev->gfx.pfp_fw->data +
2933 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2934 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2935 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2936 	for (i = 0; i < fw_size; i++)
2937 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2938 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2939 
2940 	/* CE */
2941 	fw_data = (const __le32 *)
2942 		(adev->gfx.ce_fw->data +
2943 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2944 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2945 	WREG32(mmCP_CE_UCODE_ADDR, 0);
2946 	for (i = 0; i < fw_size; i++)
2947 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2948 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2949 
2950 	/* ME */
2951 	fw_data = (const __le32 *)
2952 		(adev->gfx.me_fw->data +
2953 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2954 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2955 	WREG32(mmCP_ME_RAM_WADDR, 0);
2956 	for (i = 0; i < fw_size; i++)
2957 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2958 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2959 
2960 	return 0;
2961 }
2962 
2963 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2964 {
2965 	u32 count = 0;
2966 	const struct cs_section_def *sect = NULL;
2967 	const struct cs_extent_def *ext = NULL;
2968 
2969 	/* begin clear state */
2970 	count += 2;
2971 	/* context control state */
2972 	count += 3;
2973 
2974 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2975 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2976 			if (sect->id == SECT_CONTEXT)
2977 				count += 2 + ext->reg_count;
2978 			else
2979 				return 0;
2980 		}
2981 	}
2982 	/* pa_sc_raster_config/pa_sc_raster_config1 */
2983 	count += 4;
2984 	/* end clear state */
2985 	count += 2;
2986 	/* clear state */
2987 	count += 2;
2988 
2989 	return count;
2990 }
2991 
2992 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2993 {
2994 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2995 	const struct cs_section_def *sect = NULL;
2996 	const struct cs_extent_def *ext = NULL;
2997 	int r, i;
2998 
2999 	/* init the CP */
3000 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3001 	WREG32(mmCP_ENDIAN_SWAP, 0);
3002 	WREG32(mmCP_DEVICE_ID, 1);
3003 
3004 	gfx_v8_0_cp_gfx_enable(adev, true);
3005 
3006 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3007 	if (r) {
3008 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3009 		return r;
3010 	}
3011 
3012 	/* clear state buffer */
3013 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3014 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3015 
3016 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3017 	amdgpu_ring_write(ring, 0x80000000);
3018 	amdgpu_ring_write(ring, 0x80000000);
3019 
3020 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3021 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3022 			if (sect->id == SECT_CONTEXT) {
3023 				amdgpu_ring_write(ring,
3024 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3025 					       ext->reg_count));
3026 				amdgpu_ring_write(ring,
3027 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3028 				for (i = 0; i < ext->reg_count; i++)
3029 					amdgpu_ring_write(ring, ext->extent[i]);
3030 			}
3031 		}
3032 	}
3033 
3034 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3035 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3036 	switch (adev->asic_type) {
3037 	case CHIP_TONGA:
3038 		amdgpu_ring_write(ring, 0x16000012);
3039 		amdgpu_ring_write(ring, 0x0000002A);
3040 		break;
3041 	case CHIP_FIJI:
3042 		amdgpu_ring_write(ring, 0x3a00161a);
3043 		amdgpu_ring_write(ring, 0x0000002e);
3044 		break;
3045 	case CHIP_TOPAZ:
3046 	case CHIP_CARRIZO:
3047 		amdgpu_ring_write(ring, 0x00000002);
3048 		amdgpu_ring_write(ring, 0x00000000);
3049 		break;
3050 	case CHIP_STONEY:
3051 		amdgpu_ring_write(ring, 0x00000000);
3052 		amdgpu_ring_write(ring, 0x00000000);
3053 		break;
3054 	default:
3055 		BUG();
3056 	}
3057 
3058 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3059 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3060 
3061 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3062 	amdgpu_ring_write(ring, 0);
3063 
3064 	/* init the CE partitions */
3065 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3066 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3067 	amdgpu_ring_write(ring, 0x8000);
3068 	amdgpu_ring_write(ring, 0x8000);
3069 
3070 	amdgpu_ring_commit(ring);
3071 
3072 	return 0;
3073 }
3074 
3075 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3076 {
3077 	struct amdgpu_ring *ring;
3078 	u32 tmp;
3079 	u32 rb_bufsz;
3080 	u64 rb_addr, rptr_addr;
3081 	int r;
3082 
3083 	/* Set the write pointer delay */
3084 	WREG32(mmCP_RB_WPTR_DELAY, 0);
3085 
3086 	/* set the RB to use vmid 0 */
3087 	WREG32(mmCP_RB_VMID, 0);
3088 
3089 	/* Set ring buffer size */
3090 	ring = &adev->gfx.gfx_ring[0];
3091 	rb_bufsz = order_base_2(ring->ring_size / 8);
3092 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3093 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3094 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3095 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3096 #ifdef __BIG_ENDIAN
3097 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3098 #endif
3099 	WREG32(mmCP_RB0_CNTL, tmp);
3100 
3101 	/* Initialize the ring buffer's read and write pointers */
3102 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3103 	ring->wptr = 0;
3104 	WREG32(mmCP_RB0_WPTR, ring->wptr);
3105 
3106 	/* set the wb address wether it's enabled or not */
3107 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3108 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3109 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3110 
3111 	mdelay(1);
3112 	WREG32(mmCP_RB0_CNTL, tmp);
3113 
3114 	rb_addr = ring->gpu_addr >> 8;
3115 	WREG32(mmCP_RB0_BASE, rb_addr);
3116 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3117 
3118 	/* no gfx doorbells on iceland */
3119 	if (adev->asic_type != CHIP_TOPAZ) {
3120 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3121 		if (ring->use_doorbell) {
3122 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3123 					    DOORBELL_OFFSET, ring->doorbell_index);
3124 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3125 					    DOORBELL_EN, 1);
3126 		} else {
3127 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3128 					    DOORBELL_EN, 0);
3129 		}
3130 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3131 
3132 		if (adev->asic_type == CHIP_TONGA) {
3133 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3134 					    DOORBELL_RANGE_LOWER,
3135 					    AMDGPU_DOORBELL_GFX_RING0);
3136 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3137 
3138 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3139 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3140 		}
3141 
3142 	}
3143 
3144 	/* start the ring */
3145 	gfx_v8_0_cp_gfx_start(adev);
3146 	ring->ready = true;
3147 	r = amdgpu_ring_test_ring(ring);
3148 	if (r) {
3149 		ring->ready = false;
3150 		return r;
3151 	}
3152 
3153 	return 0;
3154 }
3155 
3156 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3157 {
3158 	int i;
3159 
3160 	if (enable) {
3161 		WREG32(mmCP_MEC_CNTL, 0);
3162 	} else {
3163 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3164 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3165 			adev->gfx.compute_ring[i].ready = false;
3166 	}
3167 	udelay(50);
3168 }
3169 
3170 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3171 {
3172 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3173 	const __le32 *fw_data;
3174 	unsigned i, fw_size;
3175 
3176 	if (!adev->gfx.mec_fw)
3177 		return -EINVAL;
3178 
3179 	gfx_v8_0_cp_compute_enable(adev, false);
3180 
3181 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3182 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3183 
3184 	fw_data = (const __le32 *)
3185 		(adev->gfx.mec_fw->data +
3186 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3187 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3188 
3189 	/* MEC1 */
3190 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3191 	for (i = 0; i < fw_size; i++)
3192 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3193 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3194 
3195 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3196 	if (adev->gfx.mec2_fw) {
3197 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3198 
3199 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3200 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3201 
3202 		fw_data = (const __le32 *)
3203 			(adev->gfx.mec2_fw->data +
3204 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3205 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3206 
3207 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3208 		for (i = 0; i < fw_size; i++)
3209 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3210 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3211 	}
3212 
3213 	return 0;
3214 }
3215 
3216 struct vi_mqd {
3217 	uint32_t header;  /* ordinal0 */
3218 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3219 	uint32_t compute_dim_x;  /* ordinal2 */
3220 	uint32_t compute_dim_y;  /* ordinal3 */
3221 	uint32_t compute_dim_z;  /* ordinal4 */
3222 	uint32_t compute_start_x;  /* ordinal5 */
3223 	uint32_t compute_start_y;  /* ordinal6 */
3224 	uint32_t compute_start_z;  /* ordinal7 */
3225 	uint32_t compute_num_thread_x;  /* ordinal8 */
3226 	uint32_t compute_num_thread_y;  /* ordinal9 */
3227 	uint32_t compute_num_thread_z;  /* ordinal10 */
3228 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3229 	uint32_t compute_perfcount_enable;  /* ordinal12 */
3230 	uint32_t compute_pgm_lo;  /* ordinal13 */
3231 	uint32_t compute_pgm_hi;  /* ordinal14 */
3232 	uint32_t compute_tba_lo;  /* ordinal15 */
3233 	uint32_t compute_tba_hi;  /* ordinal16 */
3234 	uint32_t compute_tma_lo;  /* ordinal17 */
3235 	uint32_t compute_tma_hi;  /* ordinal18 */
3236 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3237 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3238 	uint32_t compute_vmid;  /* ordinal21 */
3239 	uint32_t compute_resource_limits;  /* ordinal22 */
3240 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3241 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3242 	uint32_t compute_tmpring_size;  /* ordinal25 */
3243 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3244 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3245 	uint32_t compute_restart_x;  /* ordinal28 */
3246 	uint32_t compute_restart_y;  /* ordinal29 */
3247 	uint32_t compute_restart_z;  /* ordinal30 */
3248 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3249 	uint32_t compute_misc_reserved;  /* ordinal32 */
3250 	uint32_t compute_dispatch_id;  /* ordinal33 */
3251 	uint32_t compute_threadgroup_id;  /* ordinal34 */
3252 	uint32_t compute_relaunch;  /* ordinal35 */
3253 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3254 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3255 	uint32_t compute_wave_restore_control;  /* ordinal38 */
3256 	uint32_t reserved9;  /* ordinal39 */
3257 	uint32_t reserved10;  /* ordinal40 */
3258 	uint32_t reserved11;  /* ordinal41 */
3259 	uint32_t reserved12;  /* ordinal42 */
3260 	uint32_t reserved13;  /* ordinal43 */
3261 	uint32_t reserved14;  /* ordinal44 */
3262 	uint32_t reserved15;  /* ordinal45 */
3263 	uint32_t reserved16;  /* ordinal46 */
3264 	uint32_t reserved17;  /* ordinal47 */
3265 	uint32_t reserved18;  /* ordinal48 */
3266 	uint32_t reserved19;  /* ordinal49 */
3267 	uint32_t reserved20;  /* ordinal50 */
3268 	uint32_t reserved21;  /* ordinal51 */
3269 	uint32_t reserved22;  /* ordinal52 */
3270 	uint32_t reserved23;  /* ordinal53 */
3271 	uint32_t reserved24;  /* ordinal54 */
3272 	uint32_t reserved25;  /* ordinal55 */
3273 	uint32_t reserved26;  /* ordinal56 */
3274 	uint32_t reserved27;  /* ordinal57 */
3275 	uint32_t reserved28;  /* ordinal58 */
3276 	uint32_t reserved29;  /* ordinal59 */
3277 	uint32_t reserved30;  /* ordinal60 */
3278 	uint32_t reserved31;  /* ordinal61 */
3279 	uint32_t reserved32;  /* ordinal62 */
3280 	uint32_t reserved33;  /* ordinal63 */
3281 	uint32_t reserved34;  /* ordinal64 */
3282 	uint32_t compute_user_data_0;  /* ordinal65 */
3283 	uint32_t compute_user_data_1;  /* ordinal66 */
3284 	uint32_t compute_user_data_2;  /* ordinal67 */
3285 	uint32_t compute_user_data_3;  /* ordinal68 */
3286 	uint32_t compute_user_data_4;  /* ordinal69 */
3287 	uint32_t compute_user_data_5;  /* ordinal70 */
3288 	uint32_t compute_user_data_6;  /* ordinal71 */
3289 	uint32_t compute_user_data_7;  /* ordinal72 */
3290 	uint32_t compute_user_data_8;  /* ordinal73 */
3291 	uint32_t compute_user_data_9;  /* ordinal74 */
3292 	uint32_t compute_user_data_10;  /* ordinal75 */
3293 	uint32_t compute_user_data_11;  /* ordinal76 */
3294 	uint32_t compute_user_data_12;  /* ordinal77 */
3295 	uint32_t compute_user_data_13;  /* ordinal78 */
3296 	uint32_t compute_user_data_14;  /* ordinal79 */
3297 	uint32_t compute_user_data_15;  /* ordinal80 */
3298 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3299 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3300 	uint32_t reserved35;  /* ordinal83 */
3301 	uint32_t reserved36;  /* ordinal84 */
3302 	uint32_t reserved37;  /* ordinal85 */
3303 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3304 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3305 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3306 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3307 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3308 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3309 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3310 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3311 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3312 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3313 	uint32_t reserved38;  /* ordinal96 */
3314 	uint32_t reserved39;  /* ordinal97 */
3315 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3316 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3317 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3318 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3319 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3320 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3321 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3322 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3323 	uint32_t reserved40;  /* ordinal106 */
3324 	uint32_t reserved41;  /* ordinal107 */
3325 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3326 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3327 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3328 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3329 	uint32_t reserved42;  /* ordinal112 */
3330 	uint32_t reserved43;  /* ordinal113 */
3331 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3332 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3333 	uint32_t cp_packet_id_lo;  /* ordinal116 */
3334 	uint32_t cp_packet_id_hi;  /* ordinal117 */
3335 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3336 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3337 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3338 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3339 	uint32_t gds_save_mask_lo;  /* ordinal122 */
3340 	uint32_t gds_save_mask_hi;  /* ordinal123 */
3341 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3342 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3343 	uint32_t reserved44;  /* ordinal126 */
3344 	uint32_t reserved45;  /* ordinal127 */
3345 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3346 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3347 	uint32_t cp_hqd_active;  /* ordinal130 */
3348 	uint32_t cp_hqd_vmid;  /* ordinal131 */
3349 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3350 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3351 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3352 	uint32_t cp_hqd_quantum;  /* ordinal135 */
3353 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3354 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3355 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3356 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3357 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3358 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3359 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3360 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3361 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3362 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3363 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3364 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3365 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3366 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3367 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3368 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3369 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3370 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3371 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3372 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3373 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3374 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3375 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3376 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3377 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3378 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3379 	uint32_t cp_mqd_control;  /* ordinal162 */
3380 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3381 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3382 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3383 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3384 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3385 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3386 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3387 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3388 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3389 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3390 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3391 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3392 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3393 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3394 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3395 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3396 	uint32_t cp_hqd_error;  /* ordinal179 */
3397 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3398 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3399 	uint32_t reserved46;  /* ordinal182 */
3400 	uint32_t reserved47;  /* ordinal183 */
3401 	uint32_t reserved48;  /* ordinal184 */
3402 	uint32_t reserved49;  /* ordinal185 */
3403 	uint32_t reserved50;  /* ordinal186 */
3404 	uint32_t reserved51;  /* ordinal187 */
3405 	uint32_t reserved52;  /* ordinal188 */
3406 	uint32_t reserved53;  /* ordinal189 */
3407 	uint32_t reserved54;  /* ordinal190 */
3408 	uint32_t reserved55;  /* ordinal191 */
3409 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3410 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3411 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3412 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3413 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3414 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3415 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3416 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3417 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3418 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3419 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3420 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3421 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3422 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3423 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3424 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3425 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3426 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3427 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3428 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3429 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3430 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3431 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3432 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3433 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3434 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3435 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3436 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3437 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3438 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3439 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3440 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3441 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3442 	uint32_t reserved56;  /* ordinal225 */
3443 	uint32_t reserved57;  /* ordinal226 */
3444 	uint32_t reserved58;  /* ordinal227 */
3445 	uint32_t set_resources_header;  /* ordinal228 */
3446 	uint32_t set_resources_dw1;  /* ordinal229 */
3447 	uint32_t set_resources_dw2;  /* ordinal230 */
3448 	uint32_t set_resources_dw3;  /* ordinal231 */
3449 	uint32_t set_resources_dw4;  /* ordinal232 */
3450 	uint32_t set_resources_dw5;  /* ordinal233 */
3451 	uint32_t set_resources_dw6;  /* ordinal234 */
3452 	uint32_t set_resources_dw7;  /* ordinal235 */
3453 	uint32_t reserved59;  /* ordinal236 */
3454 	uint32_t reserved60;  /* ordinal237 */
3455 	uint32_t reserved61;  /* ordinal238 */
3456 	uint32_t reserved62;  /* ordinal239 */
3457 	uint32_t reserved63;  /* ordinal240 */
3458 	uint32_t reserved64;  /* ordinal241 */
3459 	uint32_t reserved65;  /* ordinal242 */
3460 	uint32_t reserved66;  /* ordinal243 */
3461 	uint32_t reserved67;  /* ordinal244 */
3462 	uint32_t reserved68;  /* ordinal245 */
3463 	uint32_t reserved69;  /* ordinal246 */
3464 	uint32_t reserved70;  /* ordinal247 */
3465 	uint32_t reserved71;  /* ordinal248 */
3466 	uint32_t reserved72;  /* ordinal249 */
3467 	uint32_t reserved73;  /* ordinal250 */
3468 	uint32_t reserved74;  /* ordinal251 */
3469 	uint32_t reserved75;  /* ordinal252 */
3470 	uint32_t reserved76;  /* ordinal253 */
3471 	uint32_t reserved77;  /* ordinal254 */
3472 	uint32_t reserved78;  /* ordinal255 */
3473 
3474 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3475 };
3476 
3477 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3478 {
3479 	int i, r;
3480 
3481 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3482 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3483 
3484 		if (ring->mqd_obj) {
3485 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3486 			if (unlikely(r != 0))
3487 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3488 
3489 			amdgpu_bo_unpin(ring->mqd_obj);
3490 			amdgpu_bo_unreserve(ring->mqd_obj);
3491 
3492 			amdgpu_bo_unref(&ring->mqd_obj);
3493 			ring->mqd_obj = NULL;
3494 		}
3495 	}
3496 }
3497 
3498 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3499 {
3500 	int r, i, j;
3501 	u32 tmp;
3502 	bool use_doorbell = true;
3503 	u64 hqd_gpu_addr;
3504 	u64 mqd_gpu_addr;
3505 	u64 eop_gpu_addr;
3506 	u64 wb_gpu_addr;
3507 	u32 *buf;
3508 	struct vi_mqd *mqd;
3509 
3510 	/* init the pipes */
3511 	mutex_lock(&adev->srbm_mutex);
3512 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3513 		int me = (i < 4) ? 1 : 2;
3514 		int pipe = (i < 4) ? i : (i - 4);
3515 
3516 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3517 		eop_gpu_addr >>= 8;
3518 
3519 		vi_srbm_select(adev, me, pipe, 0, 0);
3520 
3521 		/* write the EOP addr */
3522 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3523 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3524 
3525 		/* set the VMID assigned */
3526 		WREG32(mmCP_HQD_VMID, 0);
3527 
3528 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3529 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3530 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3531 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3532 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3533 	}
3534 	vi_srbm_select(adev, 0, 0, 0, 0);
3535 	mutex_unlock(&adev->srbm_mutex);
3536 
3537 	/* init the queues.  Just two for now. */
3538 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3539 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3540 
3541 		if (ring->mqd_obj == NULL) {
3542 			r = amdgpu_bo_create(adev,
3543 					     sizeof(struct vi_mqd),
3544 					     PAGE_SIZE, true,
3545 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3546 					     NULL, &ring->mqd_obj);
3547 			if (r) {
3548 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3549 				return r;
3550 			}
3551 		}
3552 
3553 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3554 		if (unlikely(r != 0)) {
3555 			gfx_v8_0_cp_compute_fini(adev);
3556 			return r;
3557 		}
3558 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3559 				  &mqd_gpu_addr);
3560 		if (r) {
3561 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3562 			gfx_v8_0_cp_compute_fini(adev);
3563 			return r;
3564 		}
3565 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3566 		if (r) {
3567 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3568 			gfx_v8_0_cp_compute_fini(adev);
3569 			return r;
3570 		}
3571 
3572 		/* init the mqd struct */
3573 		memset(buf, 0, sizeof(struct vi_mqd));
3574 
3575 		mqd = (struct vi_mqd *)buf;
3576 		mqd->header = 0xC0310800;
3577 		mqd->compute_pipelinestat_enable = 0x00000001;
3578 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3579 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3580 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3581 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3582 		mqd->compute_misc_reserved = 0x00000003;
3583 
3584 		mutex_lock(&adev->srbm_mutex);
3585 		vi_srbm_select(adev, ring->me,
3586 			       ring->pipe,
3587 			       ring->queue, 0);
3588 
3589 		/* disable wptr polling */
3590 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3591 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3592 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3593 
3594 		mqd->cp_hqd_eop_base_addr_lo =
3595 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3596 		mqd->cp_hqd_eop_base_addr_hi =
3597 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3598 
3599 		/* enable doorbell? */
3600 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3601 		if (use_doorbell) {
3602 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3603 		} else {
3604 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3605 		}
3606 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3607 		mqd->cp_hqd_pq_doorbell_control = tmp;
3608 
3609 		/* disable the queue if it's active */
3610 		mqd->cp_hqd_dequeue_request = 0;
3611 		mqd->cp_hqd_pq_rptr = 0;
3612 		mqd->cp_hqd_pq_wptr= 0;
3613 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3614 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3615 			for (j = 0; j < adev->usec_timeout; j++) {
3616 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3617 					break;
3618 				udelay(1);
3619 			}
3620 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3621 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3622 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3623 		}
3624 
3625 		/* set the pointer to the MQD */
3626 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3627 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3628 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3629 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3630 
3631 		/* set MQD vmid to 0 */
3632 		tmp = RREG32(mmCP_MQD_CONTROL);
3633 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3634 		WREG32(mmCP_MQD_CONTROL, tmp);
3635 		mqd->cp_mqd_control = tmp;
3636 
3637 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3638 		hqd_gpu_addr = ring->gpu_addr >> 8;
3639 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3640 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3641 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3642 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3643 
3644 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3645 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3646 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3647 				    (order_base_2(ring->ring_size / 4) - 1));
3648 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3649 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3650 #ifdef __BIG_ENDIAN
3651 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3652 #endif
3653 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3654 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3655 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3656 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3657 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3658 		mqd->cp_hqd_pq_control = tmp;
3659 
3660 		/* set the wb address wether it's enabled or not */
3661 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3662 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3663 		mqd->cp_hqd_pq_rptr_report_addr_hi =
3664 			upper_32_bits(wb_gpu_addr) & 0xffff;
3665 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3666 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3667 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3668 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3669 
3670 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3671 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3672 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3673 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3674 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3675 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3676 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3677 
3678 		/* enable the doorbell if requested */
3679 		if (use_doorbell) {
3680 			if ((adev->asic_type == CHIP_CARRIZO) ||
3681 			    (adev->asic_type == CHIP_FIJI) ||
3682 			    (adev->asic_type == CHIP_STONEY)) {
3683 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3684 				       AMDGPU_DOORBELL_KIQ << 2);
3685 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3686 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3687 			}
3688 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3689 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3690 					    DOORBELL_OFFSET, ring->doorbell_index);
3691 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3692 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3693 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3694 			mqd->cp_hqd_pq_doorbell_control = tmp;
3695 
3696 		} else {
3697 			mqd->cp_hqd_pq_doorbell_control = 0;
3698 		}
3699 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3700 		       mqd->cp_hqd_pq_doorbell_control);
3701 
3702 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3703 		ring->wptr = 0;
3704 		mqd->cp_hqd_pq_wptr = ring->wptr;
3705 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3706 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3707 
3708 		/* set the vmid for the queue */
3709 		mqd->cp_hqd_vmid = 0;
3710 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3711 
3712 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3713 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3714 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3715 		mqd->cp_hqd_persistent_state = tmp;
3716 		if (adev->asic_type == CHIP_STONEY) {
3717 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3718 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3719 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3720 		}
3721 
3722 		/* activate the queue */
3723 		mqd->cp_hqd_active = 1;
3724 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3725 
3726 		vi_srbm_select(adev, 0, 0, 0, 0);
3727 		mutex_unlock(&adev->srbm_mutex);
3728 
3729 		amdgpu_bo_kunmap(ring->mqd_obj);
3730 		amdgpu_bo_unreserve(ring->mqd_obj);
3731 	}
3732 
3733 	if (use_doorbell) {
3734 		tmp = RREG32(mmCP_PQ_STATUS);
3735 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3736 		WREG32(mmCP_PQ_STATUS, tmp);
3737 	}
3738 
3739 	gfx_v8_0_cp_compute_enable(adev, true);
3740 
3741 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3742 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3743 
3744 		ring->ready = true;
3745 		r = amdgpu_ring_test_ring(ring);
3746 		if (r)
3747 			ring->ready = false;
3748 	}
3749 
3750 	return 0;
3751 }
3752 
3753 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3754 {
3755 	int r;
3756 
3757 	if (!(adev->flags & AMD_IS_APU))
3758 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3759 
3760 	if (!adev->pp_enabled) {
3761 		if (!adev->firmware.smu_load) {
3762 			/* legacy firmware loading */
3763 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3764 			if (r)
3765 				return r;
3766 
3767 			r = gfx_v8_0_cp_compute_load_microcode(adev);
3768 			if (r)
3769 				return r;
3770 		} else {
3771 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3772 							AMDGPU_UCODE_ID_CP_CE);
3773 			if (r)
3774 				return -EINVAL;
3775 
3776 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3777 							AMDGPU_UCODE_ID_CP_PFP);
3778 			if (r)
3779 				return -EINVAL;
3780 
3781 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3782 							AMDGPU_UCODE_ID_CP_ME);
3783 			if (r)
3784 				return -EINVAL;
3785 
3786 			if (adev->asic_type == CHIP_TOPAZ) {
3787 				r = gfx_v8_0_cp_compute_load_microcode(adev);
3788 				if (r)
3789 					return r;
3790 			} else {
3791 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3792 										 AMDGPU_UCODE_ID_CP_MEC1);
3793 				if (r)
3794 					return -EINVAL;
3795 			}
3796 		}
3797 	}
3798 
3799 	r = gfx_v8_0_cp_gfx_resume(adev);
3800 	if (r)
3801 		return r;
3802 
3803 	r = gfx_v8_0_cp_compute_resume(adev);
3804 	if (r)
3805 		return r;
3806 
3807 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3808 
3809 	return 0;
3810 }
3811 
3812 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3813 {
3814 	gfx_v8_0_cp_gfx_enable(adev, enable);
3815 	gfx_v8_0_cp_compute_enable(adev, enable);
3816 }
3817 
3818 static int gfx_v8_0_hw_init(void *handle)
3819 {
3820 	int r;
3821 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3822 
3823 	gfx_v8_0_init_golden_registers(adev);
3824 
3825 	gfx_v8_0_gpu_init(adev);
3826 
3827 	r = gfx_v8_0_rlc_resume(adev);
3828 	if (r)
3829 		return r;
3830 
3831 	r = gfx_v8_0_cp_resume(adev);
3832 	if (r)
3833 		return r;
3834 
3835 	return r;
3836 }
3837 
3838 static int gfx_v8_0_hw_fini(void *handle)
3839 {
3840 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3841 
3842 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3843 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3844 	gfx_v8_0_cp_enable(adev, false);
3845 	gfx_v8_0_rlc_stop(adev);
3846 	gfx_v8_0_cp_compute_fini(adev);
3847 
3848 	return 0;
3849 }
3850 
3851 static int gfx_v8_0_suspend(void *handle)
3852 {
3853 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3854 
3855 	return gfx_v8_0_hw_fini(adev);
3856 }
3857 
3858 static int gfx_v8_0_resume(void *handle)
3859 {
3860 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3861 
3862 	return gfx_v8_0_hw_init(adev);
3863 }
3864 
3865 static bool gfx_v8_0_is_idle(void *handle)
3866 {
3867 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3868 
3869 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3870 		return false;
3871 	else
3872 		return true;
3873 }
3874 
3875 static int gfx_v8_0_wait_for_idle(void *handle)
3876 {
3877 	unsigned i;
3878 	u32 tmp;
3879 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3880 
3881 	for (i = 0; i < adev->usec_timeout; i++) {
3882 		/* read MC_STATUS */
3883 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3884 
3885 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3886 			return 0;
3887 		udelay(1);
3888 	}
3889 	return -ETIMEDOUT;
3890 }
3891 
3892 static void gfx_v8_0_print_status(void *handle)
3893 {
3894 	int i;
3895 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3896 
3897 	dev_info(adev->dev, "GFX 8.x registers\n");
3898 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3899 		 RREG32(mmGRBM_STATUS));
3900 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3901 		 RREG32(mmGRBM_STATUS2));
3902 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3903 		 RREG32(mmGRBM_STATUS_SE0));
3904 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3905 		 RREG32(mmGRBM_STATUS_SE1));
3906 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3907 		 RREG32(mmGRBM_STATUS_SE2));
3908 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3909 		 RREG32(mmGRBM_STATUS_SE3));
3910 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3911 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3912 		 RREG32(mmCP_STALLED_STAT1));
3913 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3914 		 RREG32(mmCP_STALLED_STAT2));
3915 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3916 		 RREG32(mmCP_STALLED_STAT3));
3917 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3918 		 RREG32(mmCP_CPF_BUSY_STAT));
3919 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3920 		 RREG32(mmCP_CPF_STALLED_STAT1));
3921 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3922 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3923 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3924 		 RREG32(mmCP_CPC_STALLED_STAT1));
3925 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3926 
3927 	for (i = 0; i < 32; i++) {
3928 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3929 			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3930 	}
3931 	for (i = 0; i < 16; i++) {
3932 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3933 			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3934 	}
3935 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3936 		dev_info(adev->dev, "  se: %d\n", i);
3937 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3938 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3939 			 RREG32(mmPA_SC_RASTER_CONFIG));
3940 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3941 			 RREG32(mmPA_SC_RASTER_CONFIG_1));
3942 	}
3943 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3944 
3945 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3946 		 RREG32(mmGB_ADDR_CONFIG));
3947 	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3948 		 RREG32(mmHDP_ADDR_CONFIG));
3949 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3950 		 RREG32(mmDMIF_ADDR_CALC));
3951 
3952 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3953 		 RREG32(mmCP_MEQ_THRESHOLDS));
3954 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3955 		 RREG32(mmSX_DEBUG_1));
3956 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3957 		 RREG32(mmTA_CNTL_AUX));
3958 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3959 		 RREG32(mmSPI_CONFIG_CNTL));
3960 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3961 		 RREG32(mmSQ_CONFIG));
3962 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3963 		 RREG32(mmDB_DEBUG));
3964 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3965 		 RREG32(mmDB_DEBUG2));
3966 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3967 		 RREG32(mmDB_DEBUG3));
3968 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3969 		 RREG32(mmCB_HW_CONTROL));
3970 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3971 		 RREG32(mmSPI_CONFIG_CNTL_1));
3972 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3973 		 RREG32(mmPA_SC_FIFO_SIZE));
3974 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3975 		 RREG32(mmVGT_NUM_INSTANCES));
3976 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3977 		 RREG32(mmCP_PERFMON_CNTL));
3978 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3979 		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3980 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3981 		 RREG32(mmVGT_CACHE_INVALIDATION));
3982 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3983 		 RREG32(mmVGT_GS_VERTEX_REUSE));
3984 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3985 		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3986 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3987 		 RREG32(mmPA_CL_ENHANCE));
3988 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3989 		 RREG32(mmPA_SC_ENHANCE));
3990 
3991 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3992 		 RREG32(mmCP_ME_CNTL));
3993 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3994 		 RREG32(mmCP_MAX_CONTEXT));
3995 	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3996 		 RREG32(mmCP_ENDIAN_SWAP));
3997 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3998 		 RREG32(mmCP_DEVICE_ID));
3999 
4000 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4001 		 RREG32(mmCP_SEM_WAIT_TIMER));
4002 
4003 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4004 		 RREG32(mmCP_RB_WPTR_DELAY));
4005 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4006 		 RREG32(mmCP_RB_VMID));
4007 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4008 		 RREG32(mmCP_RB0_CNTL));
4009 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4010 		 RREG32(mmCP_RB0_WPTR));
4011 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4012 		 RREG32(mmCP_RB0_RPTR_ADDR));
4013 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4014 		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4015 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4016 		 RREG32(mmCP_RB0_CNTL));
4017 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4018 		 RREG32(mmCP_RB0_BASE));
4019 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4020 		 RREG32(mmCP_RB0_BASE_HI));
4021 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4022 		 RREG32(mmCP_MEC_CNTL));
4023 	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4024 		 RREG32(mmCP_CPF_DEBUG));
4025 
4026 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4027 		 RREG32(mmSCRATCH_ADDR));
4028 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4029 		 RREG32(mmSCRATCH_UMSK));
4030 
4031 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4032 		 RREG32(mmCP_INT_CNTL_RING0));
4033 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4034 		 RREG32(mmRLC_LB_CNTL));
4035 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4036 		 RREG32(mmRLC_CNTL));
4037 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4038 		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4039 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4040 		 RREG32(mmRLC_LB_CNTR_INIT));
4041 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4042 		 RREG32(mmRLC_LB_CNTR_MAX));
4043 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4044 		 RREG32(mmRLC_LB_INIT_CU_MASK));
4045 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4046 		 RREG32(mmRLC_LB_PARAMS));
4047 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4048 		 RREG32(mmRLC_LB_CNTL));
4049 	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4050 		 RREG32(mmRLC_MC_CNTL));
4051 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4052 		 RREG32(mmRLC_UCODE_CNTL));
4053 
4054 	mutex_lock(&adev->srbm_mutex);
4055 	for (i = 0; i < 16; i++) {
4056 		vi_srbm_select(adev, 0, 0, 0, i);
4057 		dev_info(adev->dev, "  VM %d:\n", i);
4058 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4059 			 RREG32(mmSH_MEM_CONFIG));
4060 		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4061 			 RREG32(mmSH_MEM_APE1_BASE));
4062 		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4063 			 RREG32(mmSH_MEM_APE1_LIMIT));
4064 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4065 			 RREG32(mmSH_MEM_BASES));
4066 	}
4067 	vi_srbm_select(adev, 0, 0, 0, 0);
4068 	mutex_unlock(&adev->srbm_mutex);
4069 }
4070 
4071 static int gfx_v8_0_soft_reset(void *handle)
4072 {
4073 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4074 	u32 tmp;
4075 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4076 
4077 	/* GRBM_STATUS */
4078 	tmp = RREG32(mmGRBM_STATUS);
4079 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4080 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4081 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4082 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4083 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4084 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4085 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4086 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4087 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4088 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4089 	}
4090 
4091 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4092 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4093 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4094 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4095 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4096 	}
4097 
4098 	/* GRBM_STATUS2 */
4099 	tmp = RREG32(mmGRBM_STATUS2);
4100 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4101 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4102 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4103 
4104 	/* SRBM_STATUS */
4105 	tmp = RREG32(mmSRBM_STATUS);
4106 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4107 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4108 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4109 
4110 	if (grbm_soft_reset || srbm_soft_reset) {
4111 		gfx_v8_0_print_status((void *)adev);
4112 		/* stop the rlc */
4113 		gfx_v8_0_rlc_stop(adev);
4114 
4115 		/* Disable GFX parsing/prefetching */
4116 		gfx_v8_0_cp_gfx_enable(adev, false);
4117 
4118 		/* Disable MEC parsing/prefetching */
4119 		gfx_v8_0_cp_compute_enable(adev, false);
4120 
4121 		if (grbm_soft_reset || srbm_soft_reset) {
4122 			tmp = RREG32(mmGMCON_DEBUG);
4123 			tmp = REG_SET_FIELD(tmp,
4124 					    GMCON_DEBUG, GFX_STALL, 1);
4125 			tmp = REG_SET_FIELD(tmp,
4126 					    GMCON_DEBUG, GFX_CLEAR, 1);
4127 			WREG32(mmGMCON_DEBUG, tmp);
4128 
4129 			udelay(50);
4130 		}
4131 
4132 		if (grbm_soft_reset) {
4133 			tmp = RREG32(mmGRBM_SOFT_RESET);
4134 			tmp |= grbm_soft_reset;
4135 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4136 			WREG32(mmGRBM_SOFT_RESET, tmp);
4137 			tmp = RREG32(mmGRBM_SOFT_RESET);
4138 
4139 			udelay(50);
4140 
4141 			tmp &= ~grbm_soft_reset;
4142 			WREG32(mmGRBM_SOFT_RESET, tmp);
4143 			tmp = RREG32(mmGRBM_SOFT_RESET);
4144 		}
4145 
4146 		if (srbm_soft_reset) {
4147 			tmp = RREG32(mmSRBM_SOFT_RESET);
4148 			tmp |= srbm_soft_reset;
4149 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4150 			WREG32(mmSRBM_SOFT_RESET, tmp);
4151 			tmp = RREG32(mmSRBM_SOFT_RESET);
4152 
4153 			udelay(50);
4154 
4155 			tmp &= ~srbm_soft_reset;
4156 			WREG32(mmSRBM_SOFT_RESET, tmp);
4157 			tmp = RREG32(mmSRBM_SOFT_RESET);
4158 		}
4159 
4160 		if (grbm_soft_reset || srbm_soft_reset) {
4161 			tmp = RREG32(mmGMCON_DEBUG);
4162 			tmp = REG_SET_FIELD(tmp,
4163 					    GMCON_DEBUG, GFX_STALL, 0);
4164 			tmp = REG_SET_FIELD(tmp,
4165 					    GMCON_DEBUG, GFX_CLEAR, 0);
4166 			WREG32(mmGMCON_DEBUG, tmp);
4167 		}
4168 
4169 		/* Wait a little for things to settle down */
4170 		udelay(50);
4171 		gfx_v8_0_print_status((void *)adev);
4172 	}
4173 	return 0;
4174 }
4175 
4176 /**
4177  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4178  *
4179  * @adev: amdgpu_device pointer
4180  *
4181  * Fetches a GPU clock counter snapshot.
4182  * Returns the 64 bit clock counter snapshot.
4183  */
4184 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4185 {
4186 	uint64_t clock;
4187 
4188 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4189 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4190 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4191 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4192 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4193 	return clock;
4194 }
4195 
4196 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4197 					  uint32_t vmid,
4198 					  uint32_t gds_base, uint32_t gds_size,
4199 					  uint32_t gws_base, uint32_t gws_size,
4200 					  uint32_t oa_base, uint32_t oa_size)
4201 {
4202 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4203 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4204 
4205 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4206 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4207 
4208 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4209 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4210 
4211 	/* GDS Base */
4212 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4213 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4214 				WRITE_DATA_DST_SEL(0)));
4215 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4216 	amdgpu_ring_write(ring, 0);
4217 	amdgpu_ring_write(ring, gds_base);
4218 
4219 	/* GDS Size */
4220 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4221 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4222 				WRITE_DATA_DST_SEL(0)));
4223 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4224 	amdgpu_ring_write(ring, 0);
4225 	amdgpu_ring_write(ring, gds_size);
4226 
4227 	/* GWS */
4228 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4229 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4230 				WRITE_DATA_DST_SEL(0)));
4231 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4232 	amdgpu_ring_write(ring, 0);
4233 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4234 
4235 	/* OA */
4236 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4237 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4238 				WRITE_DATA_DST_SEL(0)));
4239 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4240 	amdgpu_ring_write(ring, 0);
4241 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4242 }
4243 
4244 static int gfx_v8_0_early_init(void *handle)
4245 {
4246 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4247 
4248 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4249 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4250 	gfx_v8_0_set_ring_funcs(adev);
4251 	gfx_v8_0_set_irq_funcs(adev);
4252 	gfx_v8_0_set_gds_init(adev);
4253 
4254 	return 0;
4255 }
4256 
4257 static int gfx_v8_0_late_init(void *handle)
4258 {
4259 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4260 	int r;
4261 
4262 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4263 	if (r)
4264 		return r;
4265 
4266 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4267 	if (r)
4268 		return r;
4269 
4270 	/* requires IBs so do in late init after IB pool is initialized */
4271 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4272 	if (r)
4273 		return r;
4274 
4275 	return 0;
4276 }
4277 
4278 static int gfx_v8_0_set_powergating_state(void *handle,
4279 					  enum amd_powergating_state state)
4280 {
4281 	return 0;
4282 }
4283 
4284 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4285 		uint32_t reg_addr, uint32_t cmd)
4286 {
4287 	uint32_t data;
4288 
4289 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4290 
4291 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4292 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4293 
4294 	data = RREG32(mmRLC_SERDES_WR_CTRL);
4295 	data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4296 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4297 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4298 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4299 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4300 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4301 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4302 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4303 			RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4304 			RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4305 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4306 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4307 			(cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4308 			(reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4309 			(0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4310 
4311 	WREG32(mmRLC_SERDES_WR_CTRL, data);
4312 }
4313 
4314 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4315 		bool enable)
4316 {
4317 	uint32_t temp, data;
4318 
4319 	/* It is disabled by HW by default */
4320 	if (enable) {
4321 		/* 1 - RLC memory Light sleep */
4322 		temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4323 		data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4324 		if (temp != data)
4325 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4326 
4327 		/* 2 - CP memory Light sleep */
4328 		temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4329 		data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4330 		if (temp != data)
4331 			WREG32(mmCP_MEM_SLP_CNTL, data);
4332 
4333 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4334 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4335 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4336 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4337 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4338 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4339 
4340 		if (temp != data)
4341 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4342 
4343 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4344 		gfx_v8_0_wait_for_rlc_serdes(adev);
4345 
4346 		/* 5 - clear mgcg override */
4347 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4348 
4349 		/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4350 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4351 		data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4352 		data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4353 		data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4354 		data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4355 		data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4356 		data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4357 		data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4358 		if (temp != data)
4359 			WREG32(mmCGTS_SM_CTRL_REG, data);
4360 		udelay(50);
4361 
4362 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4363 		gfx_v8_0_wait_for_rlc_serdes(adev);
4364 	} else {
4365 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4366 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4367 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4368 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4369 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4370 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4371 		if (temp != data)
4372 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4373 
4374 		/* 2 - disable MGLS in RLC */
4375 		data = RREG32(mmRLC_MEM_SLP_CNTL);
4376 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4377 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4378 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4379 		}
4380 
4381 		/* 3 - disable MGLS in CP */
4382 		data = RREG32(mmCP_MEM_SLP_CNTL);
4383 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4384 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4385 			WREG32(mmCP_MEM_SLP_CNTL, data);
4386 		}
4387 
4388 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4389 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4390 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4391 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4392 		if (temp != data)
4393 			WREG32(mmCGTS_SM_CTRL_REG, data);
4394 
4395 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4396 		gfx_v8_0_wait_for_rlc_serdes(adev);
4397 
4398 		/* 6 - set mgcg override */
4399 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4400 
4401 		udelay(50);
4402 
4403 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4404 		gfx_v8_0_wait_for_rlc_serdes(adev);
4405 	}
4406 }
4407 
4408 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4409 		bool enable)
4410 {
4411 	uint32_t temp, temp1, data, data1;
4412 
4413 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4414 
4415 	if (enable) {
4416 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4417 		 * Cmp_busy/GFX_Idle interrupts
4418 		 */
4419 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4420 
4421 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4422 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4423 		if (temp1 != data1)
4424 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4425 
4426 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4427 		gfx_v8_0_wait_for_rlc_serdes(adev);
4428 
4429 		/* 3 - clear cgcg override */
4430 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4431 
4432 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4433 		gfx_v8_0_wait_for_rlc_serdes(adev);
4434 
4435 		/* 4 - write cmd to set CGLS */
4436 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4437 
4438 		/* 5 - enable cgcg */
4439 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4440 
4441 		/* enable cgls*/
4442 		data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4443 
4444 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4445 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4446 
4447 		if (temp1 != data1)
4448 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4449 
4450 		if (temp != data)
4451 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4452 	} else {
4453 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4454 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4455 
4456 		/* TEST CGCG */
4457 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4458 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4459 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4460 		if (temp1 != data1)
4461 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4462 
4463 		/* read gfx register to wake up cgcg */
4464 		RREG32(mmCB_CGTT_SCLK_CTRL);
4465 		RREG32(mmCB_CGTT_SCLK_CTRL);
4466 		RREG32(mmCB_CGTT_SCLK_CTRL);
4467 		RREG32(mmCB_CGTT_SCLK_CTRL);
4468 
4469 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4470 		gfx_v8_0_wait_for_rlc_serdes(adev);
4471 
4472 		/* write cmd to Set CGCG Overrride */
4473 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4474 
4475 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4476 		gfx_v8_0_wait_for_rlc_serdes(adev);
4477 
4478 		/* write cmd to Clear CGLS */
4479 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4480 
4481 		/* disable cgcg, cgls should be disabled too. */
4482 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4483 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4484 		if (temp != data)
4485 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4486 	}
4487 }
4488 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4489 		bool enable)
4490 {
4491 	if (enable) {
4492 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4493 		 * ===  MGCG + MGLS + TS(CG/LS) ===
4494 		 */
4495 		fiji_update_medium_grain_clock_gating(adev, enable);
4496 		fiji_update_coarse_grain_clock_gating(adev, enable);
4497 	} else {
4498 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4499 		 * ===  CGCG + CGLS ===
4500 		 */
4501 		fiji_update_coarse_grain_clock_gating(adev, enable);
4502 		fiji_update_medium_grain_clock_gating(adev, enable);
4503 	}
4504 	return 0;
4505 }
4506 
4507 static int gfx_v8_0_set_clockgating_state(void *handle,
4508 					  enum amd_clockgating_state state)
4509 {
4510 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4511 
4512 	switch (adev->asic_type) {
4513 	case CHIP_FIJI:
4514 		fiji_update_gfx_clock_gating(adev,
4515 				state == AMD_CG_STATE_GATE ? true : false);
4516 		break;
4517 	default:
4518 		break;
4519 	}
4520 	return 0;
4521 }
4522 
4523 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4524 {
4525 	u32 rptr;
4526 
4527 	rptr = ring->adev->wb.wb[ring->rptr_offs];
4528 
4529 	return rptr;
4530 }
4531 
4532 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4533 {
4534 	struct amdgpu_device *adev = ring->adev;
4535 	u32 wptr;
4536 
4537 	if (ring->use_doorbell)
4538 		/* XXX check if swapping is necessary on BE */
4539 		wptr = ring->adev->wb.wb[ring->wptr_offs];
4540 	else
4541 		wptr = RREG32(mmCP_RB0_WPTR);
4542 
4543 	return wptr;
4544 }
4545 
4546 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4547 {
4548 	struct amdgpu_device *adev = ring->adev;
4549 
4550 	if (ring->use_doorbell) {
4551 		/* XXX check if swapping is necessary on BE */
4552 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4553 		WDOORBELL32(ring->doorbell_index, ring->wptr);
4554 	} else {
4555 		WREG32(mmCP_RB0_WPTR, ring->wptr);
4556 		(void)RREG32(mmCP_RB0_WPTR);
4557 	}
4558 }
4559 
4560 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4561 {
4562 	u32 ref_and_mask, reg_mem_engine;
4563 
4564 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4565 		switch (ring->me) {
4566 		case 1:
4567 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4568 			break;
4569 		case 2:
4570 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4571 			break;
4572 		default:
4573 			return;
4574 		}
4575 		reg_mem_engine = 0;
4576 	} else {
4577 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4578 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4579 	}
4580 
4581 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4582 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4583 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4584 				 reg_mem_engine));
4585 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4586 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4587 	amdgpu_ring_write(ring, ref_and_mask);
4588 	amdgpu_ring_write(ring, ref_and_mask);
4589 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4590 }
4591 
4592 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4593 				  struct amdgpu_ib *ib)
4594 {
4595 	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4596 	u32 header, control = 0;
4597 	u32 next_rptr = ring->wptr + 5;
4598 
4599 	/* drop the CE preamble IB for the same context */
4600 	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4601 		return;
4602 
4603 	if (need_ctx_switch)
4604 		next_rptr += 2;
4605 
4606 	next_rptr += 4;
4607 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4608 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4609 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4610 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4611 	amdgpu_ring_write(ring, next_rptr);
4612 
4613 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4614 	if (need_ctx_switch) {
4615 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4616 		amdgpu_ring_write(ring, 0);
4617 	}
4618 
4619 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4620 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4621 	else
4622 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4623 
4624 	control |= ib->length_dw | (ib->vm_id << 24);
4625 
4626 	amdgpu_ring_write(ring, header);
4627 	amdgpu_ring_write(ring,
4628 #ifdef __BIG_ENDIAN
4629 			  (2 << 0) |
4630 #endif
4631 			  (ib->gpu_addr & 0xFFFFFFFC));
4632 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4633 	amdgpu_ring_write(ring, control);
4634 }
4635 
4636 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4637 				  struct amdgpu_ib *ib)
4638 {
4639 	u32 header, control = 0;
4640 	u32 next_rptr = ring->wptr + 5;
4641 
4642 	control |= INDIRECT_BUFFER_VALID;
4643 
4644 	next_rptr += 4;
4645 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4646 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4647 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4648 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4649 	amdgpu_ring_write(ring, next_rptr);
4650 
4651 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4652 
4653 	control |= ib->length_dw | (ib->vm_id << 24);
4654 
4655 	amdgpu_ring_write(ring, header);
4656 	amdgpu_ring_write(ring,
4657 #ifdef __BIG_ENDIAN
4658 					  (2 << 0) |
4659 #endif
4660 					  (ib->gpu_addr & 0xFFFFFFFC));
4661 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4662 	amdgpu_ring_write(ring, control);
4663 }
4664 
4665 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4666 					 u64 seq, unsigned flags)
4667 {
4668 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4669 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4670 
4671 	/* EVENT_WRITE_EOP - flush caches, send int */
4672 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4673 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4674 				 EOP_TC_ACTION_EN |
4675 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4676 				 EVENT_INDEX(5)));
4677 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4678 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4679 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4680 	amdgpu_ring_write(ring, lower_32_bits(seq));
4681 	amdgpu_ring_write(ring, upper_32_bits(seq));
4682 
4683 }
4684 
4685 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4686 					unsigned vm_id, uint64_t pd_addr)
4687 {
4688 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4689 	uint32_t seq = ring->fence_drv.sync_seq;
4690 	uint64_t addr = ring->fence_drv.gpu_addr;
4691 
4692 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4693 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4694 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
4695 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4696 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4697 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4698 	amdgpu_ring_write(ring, seq);
4699 	amdgpu_ring_write(ring, 0xffffffff);
4700 	amdgpu_ring_write(ring, 4); /* poll interval */
4701 
4702 	if (usepfp) {
4703 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4704 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4705 		amdgpu_ring_write(ring, 0);
4706 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4707 		amdgpu_ring_write(ring, 0);
4708 	}
4709 
4710 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4711 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4712 				 WRITE_DATA_DST_SEL(0)) |
4713 				 WR_CONFIRM);
4714 	if (vm_id < 8) {
4715 		amdgpu_ring_write(ring,
4716 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4717 	} else {
4718 		amdgpu_ring_write(ring,
4719 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4720 	}
4721 	amdgpu_ring_write(ring, 0);
4722 	amdgpu_ring_write(ring, pd_addr >> 12);
4723 
4724 	/* bits 0-15 are the VM contexts0-15 */
4725 	/* invalidate the cache */
4726 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4727 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4728 				 WRITE_DATA_DST_SEL(0)));
4729 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4730 	amdgpu_ring_write(ring, 0);
4731 	amdgpu_ring_write(ring, 1 << vm_id);
4732 
4733 	/* wait for the invalidate to complete */
4734 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4735 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4736 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4737 				 WAIT_REG_MEM_ENGINE(0))); /* me */
4738 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4739 	amdgpu_ring_write(ring, 0);
4740 	amdgpu_ring_write(ring, 0); /* ref */
4741 	amdgpu_ring_write(ring, 0); /* mask */
4742 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4743 
4744 	/* compute doesn't have PFP */
4745 	if (usepfp) {
4746 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4747 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4748 		amdgpu_ring_write(ring, 0x0);
4749 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4750 		amdgpu_ring_write(ring, 0);
4751 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4752 		amdgpu_ring_write(ring, 0);
4753 	}
4754 }
4755 
4756 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4757 {
4758 	return ring->adev->wb.wb[ring->rptr_offs];
4759 }
4760 
4761 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4762 {
4763 	return ring->adev->wb.wb[ring->wptr_offs];
4764 }
4765 
4766 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4767 {
4768 	struct amdgpu_device *adev = ring->adev;
4769 
4770 	/* XXX check if swapping is necessary on BE */
4771 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4772 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4773 }
4774 
4775 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4776 					     u64 addr, u64 seq,
4777 					     unsigned flags)
4778 {
4779 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4780 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4781 
4782 	/* RELEASE_MEM - flush caches, send int */
4783 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4784 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4785 				 EOP_TC_ACTION_EN |
4786 				 EOP_TC_WB_ACTION_EN |
4787 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4788 				 EVENT_INDEX(5)));
4789 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4790 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4791 	amdgpu_ring_write(ring, upper_32_bits(addr));
4792 	amdgpu_ring_write(ring, lower_32_bits(seq));
4793 	amdgpu_ring_write(ring, upper_32_bits(seq));
4794 }
4795 
4796 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4797 						 enum amdgpu_interrupt_state state)
4798 {
4799 	u32 cp_int_cntl;
4800 
4801 	switch (state) {
4802 	case AMDGPU_IRQ_STATE_DISABLE:
4803 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4804 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4805 					    TIME_STAMP_INT_ENABLE, 0);
4806 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4807 		break;
4808 	case AMDGPU_IRQ_STATE_ENABLE:
4809 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4810 		cp_int_cntl =
4811 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4812 				      TIME_STAMP_INT_ENABLE, 1);
4813 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4814 		break;
4815 	default:
4816 		break;
4817 	}
4818 }
4819 
4820 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4821 						     int me, int pipe,
4822 						     enum amdgpu_interrupt_state state)
4823 {
4824 	u32 mec_int_cntl, mec_int_cntl_reg;
4825 
4826 	/*
4827 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4828 	 * handles the setting of interrupts for this specific pipe. All other
4829 	 * pipes' interrupts are set by amdkfd.
4830 	 */
4831 
4832 	if (me == 1) {
4833 		switch (pipe) {
4834 		case 0:
4835 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4836 			break;
4837 		default:
4838 			DRM_DEBUG("invalid pipe %d\n", pipe);
4839 			return;
4840 		}
4841 	} else {
4842 		DRM_DEBUG("invalid me %d\n", me);
4843 		return;
4844 	}
4845 
4846 	switch (state) {
4847 	case AMDGPU_IRQ_STATE_DISABLE:
4848 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4849 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4850 					     TIME_STAMP_INT_ENABLE, 0);
4851 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4852 		break;
4853 	case AMDGPU_IRQ_STATE_ENABLE:
4854 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4855 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4856 					     TIME_STAMP_INT_ENABLE, 1);
4857 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4858 		break;
4859 	default:
4860 		break;
4861 	}
4862 }
4863 
4864 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4865 					     struct amdgpu_irq_src *source,
4866 					     unsigned type,
4867 					     enum amdgpu_interrupt_state state)
4868 {
4869 	u32 cp_int_cntl;
4870 
4871 	switch (state) {
4872 	case AMDGPU_IRQ_STATE_DISABLE:
4873 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4874 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4875 					    PRIV_REG_INT_ENABLE, 0);
4876 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4877 		break;
4878 	case AMDGPU_IRQ_STATE_ENABLE:
4879 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4880 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4881 					    PRIV_REG_INT_ENABLE, 1);
4882 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4883 		break;
4884 	default:
4885 		break;
4886 	}
4887 
4888 	return 0;
4889 }
4890 
4891 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4892 					      struct amdgpu_irq_src *source,
4893 					      unsigned type,
4894 					      enum amdgpu_interrupt_state state)
4895 {
4896 	u32 cp_int_cntl;
4897 
4898 	switch (state) {
4899 	case AMDGPU_IRQ_STATE_DISABLE:
4900 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4901 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4902 					    PRIV_INSTR_INT_ENABLE, 0);
4903 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4904 		break;
4905 	case AMDGPU_IRQ_STATE_ENABLE:
4906 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4907 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4908 					    PRIV_INSTR_INT_ENABLE, 1);
4909 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4910 		break;
4911 	default:
4912 		break;
4913 	}
4914 
4915 	return 0;
4916 }
4917 
4918 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4919 					    struct amdgpu_irq_src *src,
4920 					    unsigned type,
4921 					    enum amdgpu_interrupt_state state)
4922 {
4923 	switch (type) {
4924 	case AMDGPU_CP_IRQ_GFX_EOP:
4925 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4926 		break;
4927 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4928 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4929 		break;
4930 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4931 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4932 		break;
4933 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4934 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4935 		break;
4936 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4937 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4938 		break;
4939 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4940 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4941 		break;
4942 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4943 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4944 		break;
4945 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4946 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4947 		break;
4948 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4949 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4950 		break;
4951 	default:
4952 		break;
4953 	}
4954 	return 0;
4955 }
4956 
4957 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4958 			    struct amdgpu_irq_src *source,
4959 			    struct amdgpu_iv_entry *entry)
4960 {
4961 	int i;
4962 	u8 me_id, pipe_id, queue_id;
4963 	struct amdgpu_ring *ring;
4964 
4965 	DRM_DEBUG("IH: CP EOP\n");
4966 	me_id = (entry->ring_id & 0x0c) >> 2;
4967 	pipe_id = (entry->ring_id & 0x03) >> 0;
4968 	queue_id = (entry->ring_id & 0x70) >> 4;
4969 
4970 	switch (me_id) {
4971 	case 0:
4972 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4973 		break;
4974 	case 1:
4975 	case 2:
4976 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4977 			ring = &adev->gfx.compute_ring[i];
4978 			/* Per-queue interrupt is supported for MEC starting from VI.
4979 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
4980 			  */
4981 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4982 				amdgpu_fence_process(ring);
4983 		}
4984 		break;
4985 	}
4986 	return 0;
4987 }
4988 
4989 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4990 				 struct amdgpu_irq_src *source,
4991 				 struct amdgpu_iv_entry *entry)
4992 {
4993 	DRM_ERROR("Illegal register access in command stream\n");
4994 	schedule_work(&adev->reset_work);
4995 	return 0;
4996 }
4997 
4998 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4999 				  struct amdgpu_irq_src *source,
5000 				  struct amdgpu_iv_entry *entry)
5001 {
5002 	DRM_ERROR("Illegal instruction in command stream\n");
5003 	schedule_work(&adev->reset_work);
5004 	return 0;
5005 }
5006 
5007 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5008 	.early_init = gfx_v8_0_early_init,
5009 	.late_init = gfx_v8_0_late_init,
5010 	.sw_init = gfx_v8_0_sw_init,
5011 	.sw_fini = gfx_v8_0_sw_fini,
5012 	.hw_init = gfx_v8_0_hw_init,
5013 	.hw_fini = gfx_v8_0_hw_fini,
5014 	.suspend = gfx_v8_0_suspend,
5015 	.resume = gfx_v8_0_resume,
5016 	.is_idle = gfx_v8_0_is_idle,
5017 	.wait_for_idle = gfx_v8_0_wait_for_idle,
5018 	.soft_reset = gfx_v8_0_soft_reset,
5019 	.print_status = gfx_v8_0_print_status,
5020 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5021 	.set_powergating_state = gfx_v8_0_set_powergating_state,
5022 };
5023 
5024 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5025 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5026 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5027 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5028 	.parse_cs = NULL,
5029 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5030 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5031 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5032 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5033 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5034 	.test_ring = gfx_v8_0_ring_test_ring,
5035 	.test_ib = gfx_v8_0_ring_test_ib,
5036 	.insert_nop = amdgpu_ring_insert_nop,
5037 	.pad_ib = amdgpu_ring_generic_pad_ib,
5038 };
5039 
5040 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5041 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
5042 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5043 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5044 	.parse_cs = NULL,
5045 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5046 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5047 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5048 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5049 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5050 	.test_ring = gfx_v8_0_ring_test_ring,
5051 	.test_ib = gfx_v8_0_ring_test_ib,
5052 	.insert_nop = amdgpu_ring_insert_nop,
5053 	.pad_ib = amdgpu_ring_generic_pad_ib,
5054 };
5055 
5056 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5057 {
5058 	int i;
5059 
5060 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5061 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5062 
5063 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5064 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5065 }
5066 
5067 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5068 	.set = gfx_v8_0_set_eop_interrupt_state,
5069 	.process = gfx_v8_0_eop_irq,
5070 };
5071 
5072 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5073 	.set = gfx_v8_0_set_priv_reg_fault_state,
5074 	.process = gfx_v8_0_priv_reg_irq,
5075 };
5076 
5077 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5078 	.set = gfx_v8_0_set_priv_inst_fault_state,
5079 	.process = gfx_v8_0_priv_inst_irq,
5080 };
5081 
5082 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5083 {
5084 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5085 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5086 
5087 	adev->gfx.priv_reg_irq.num_types = 1;
5088 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5089 
5090 	adev->gfx.priv_inst_irq.num_types = 1;
5091 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5092 }
5093 
5094 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5095 {
5096 	/* init asci gds info */
5097 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5098 	adev->gds.gws.total_size = 64;
5099 	adev->gds.oa.total_size = 16;
5100 
5101 	if (adev->gds.mem.total_size == 64 * 1024) {
5102 		adev->gds.mem.gfx_partition_size = 4096;
5103 		adev->gds.mem.cs_partition_size = 4096;
5104 
5105 		adev->gds.gws.gfx_partition_size = 4;
5106 		adev->gds.gws.cs_partition_size = 4;
5107 
5108 		adev->gds.oa.gfx_partition_size = 4;
5109 		adev->gds.oa.cs_partition_size = 1;
5110 	} else {
5111 		adev->gds.mem.gfx_partition_size = 1024;
5112 		adev->gds.mem.cs_partition_size = 1024;
5113 
5114 		adev->gds.gws.gfx_partition_size = 16;
5115 		adev->gds.gws.cs_partition_size = 16;
5116 
5117 		adev->gds.oa.gfx_partition_size = 4;
5118 		adev->gds.oa.cs_partition_size = 4;
5119 	}
5120 }
5121 
5122 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5123 {
5124 	u32 data, mask;
5125 
5126 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5127 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5128 
5129 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5130 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5131 
5132 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5133 
5134 	return (~data) & mask;
5135 }
5136 
5137 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5138 			 struct amdgpu_cu_info *cu_info)
5139 {
5140 	int i, j, k, counter, active_cu_number = 0;
5141 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5142 
5143 	if (!adev || !cu_info)
5144 		return -EINVAL;
5145 
5146 	memset(cu_info, 0, sizeof(*cu_info));
5147 
5148 	mutex_lock(&adev->grbm_idx_mutex);
5149 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5150 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5151 			mask = 1;
5152 			ao_bitmap = 0;
5153 			counter = 0;
5154 			gfx_v8_0_select_se_sh(adev, i, j);
5155 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5156 			cu_info->bitmap[i][j] = bitmap;
5157 
5158 			for (k = 0; k < 16; k ++) {
5159 				if (bitmap & mask) {
5160 					if (counter < 2)
5161 						ao_bitmap |= mask;
5162 					counter ++;
5163 				}
5164 				mask <<= 1;
5165 			}
5166 			active_cu_number += counter;
5167 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5168 		}
5169 	}
5170 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5171 	mutex_unlock(&adev->grbm_idx_mutex);
5172 
5173 	cu_info->number = active_cu_number;
5174 	cu_info->ao_cu_mask = ao_cu_mask;
5175 
5176 	return 0;
5177 }
5178