xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision cd4d09ec6f6c12a2cc3db5b7d8876a325a53545b)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31 
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34 
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40 
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45 
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54 
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58 
59 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68 
69 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
70 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
71 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
74 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
75 
76 /* BPM SERDES CMD */
77 #define SET_BPM_SERDES_CMD    1
78 #define CLE_BPM_SERDES_CMD    0
79 
80 /* BPM Register Address*/
81 enum {
82 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
83 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
84 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
85 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
86 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
87 	BPM_REG_FGCG_MAX
88 };
89 
90 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
102 
103 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116 
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123 
124 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
125 {
126 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
127 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
128 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
129 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
130 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
131 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
132 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
133 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
134 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
135 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
136 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
137 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
138 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
139 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
140 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
141 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
142 };
143 
144 static const u32 golden_settings_tonga_a11[] =
145 {
146 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
147 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
148 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
149 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
150 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
151 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
152 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
153 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
154 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
155 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
156 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
157 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
158 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
159 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
160 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
161 };
162 
163 static const u32 tonga_golden_common_all[] =
164 {
165 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
166 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
167 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
168 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
169 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
170 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
171 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
172 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
173 };
174 
175 static const u32 tonga_mgcg_cgcg_init[] =
176 {
177 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
178 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
179 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
180 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
181 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
182 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
183 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
184 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
185 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
186 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
187 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
188 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
189 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
190 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
191 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
192 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
193 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
194 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
195 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
196 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
197 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
198 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
199 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
201 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
202 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
203 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
204 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
207 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
208 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
209 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
210 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
211 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
212 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
213 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
214 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
215 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
216 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
217 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
218 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
219 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
220 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
221 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
222 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
223 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
224 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
225 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
226 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
227 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
228 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
229 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
230 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
231 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
232 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
233 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
234 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
235 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
236 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
237 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
238 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
239 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
240 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
241 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
242 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
243 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
244 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
245 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
246 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
247 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
248 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
249 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
250 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
251 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
252 };
253 
254 static const u32 fiji_golden_common_all[] =
255 {
256 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
257 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
258 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
259 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
260 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
261 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
262 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
263 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
264 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
265 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
266 };
267 
268 static const u32 golden_settings_fiji_a10[] =
269 {
270 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
271 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
272 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
273 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
274 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
275 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
276 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
277 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
278 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
279 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
280 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
281 };
282 
283 static const u32 fiji_mgcg_cgcg_init[] =
284 {
285 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
286 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
287 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
288 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
289 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
290 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
291 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
292 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
293 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
294 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
295 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
296 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
297 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
298 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
299 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
300 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
301 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
302 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
303 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
304 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
305 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
306 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
307 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
308 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
309 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
310 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
311 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
312 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
314 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
315 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
316 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
317 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
318 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
319 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
320 };
321 
322 static const u32 golden_settings_iceland_a11[] =
323 {
324 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
325 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
326 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
327 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
328 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
329 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
330 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
331 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
332 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
333 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
334 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
335 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
336 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
337 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
338 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
339 };
340 
341 static const u32 iceland_golden_common_all[] =
342 {
343 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
344 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
345 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
346 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
347 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
348 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
349 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
350 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
351 };
352 
353 static const u32 iceland_mgcg_cgcg_init[] =
354 {
355 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
356 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
357 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
358 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
359 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
360 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
361 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
362 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
363 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
364 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
365 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
366 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
373 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
374 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
375 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
376 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
377 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
378 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
380 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
381 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
382 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
384 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
385 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
386 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
387 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
388 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
389 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
390 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
391 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
392 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
393 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
394 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
395 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
396 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
397 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
398 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
399 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
400 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
401 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
402 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
403 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
404 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
405 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
406 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
407 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
408 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
409 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
410 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
411 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
412 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
413 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
414 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
415 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
416 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
417 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
418 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
419 };
420 
421 static const u32 cz_golden_settings_a11[] =
422 {
423 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
424 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
425 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
426 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
427 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
428 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
429 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
430 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
431 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
432 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
433 };
434 
435 static const u32 cz_golden_common_all[] =
436 {
437 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
438 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
439 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
440 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
441 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
442 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
443 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
444 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
445 };
446 
447 static const u32 cz_mgcg_cgcg_init[] =
448 {
449 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
450 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
451 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
457 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
458 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
460 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
464 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
465 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
467 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
468 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
469 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
470 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
471 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
472 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
473 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
474 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
475 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
476 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
478 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
479 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
480 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
483 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
488 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
493 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
498 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
501 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
502 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
503 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
504 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
505 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
506 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
507 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
508 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
509 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
510 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
511 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
512 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
513 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
514 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
515 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
516 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
517 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
518 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
519 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
520 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
521 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
522 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
523 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
524 };
525 
526 static const u32 stoney_golden_settings_a11[] =
527 {
528 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
529 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
530 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
531 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
532 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
533 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
534   	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
535 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
536 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
537 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
538 };
539 
540 static const u32 stoney_golden_common_all[] =
541 {
542 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
543 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
544 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
545 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
546 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
547 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
548 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
549 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
550 };
551 
552 static const u32 stoney_mgcg_cgcg_init[] =
553 {
554 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
555 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
556 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
558 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
559 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
560 };
561 
562 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
563 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
564 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
565 
566 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
567 {
568 	switch (adev->asic_type) {
569 	case CHIP_TOPAZ:
570 		amdgpu_program_register_sequence(adev,
571 						 iceland_mgcg_cgcg_init,
572 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
573 		amdgpu_program_register_sequence(adev,
574 						 golden_settings_iceland_a11,
575 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
576 		amdgpu_program_register_sequence(adev,
577 						 iceland_golden_common_all,
578 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
579 		break;
580 	case CHIP_FIJI:
581 		amdgpu_program_register_sequence(adev,
582 						 fiji_mgcg_cgcg_init,
583 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
584 		amdgpu_program_register_sequence(adev,
585 						 golden_settings_fiji_a10,
586 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
587 		amdgpu_program_register_sequence(adev,
588 						 fiji_golden_common_all,
589 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
590 		break;
591 
592 	case CHIP_TONGA:
593 		amdgpu_program_register_sequence(adev,
594 						 tonga_mgcg_cgcg_init,
595 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
596 		amdgpu_program_register_sequence(adev,
597 						 golden_settings_tonga_a11,
598 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
599 		amdgpu_program_register_sequence(adev,
600 						 tonga_golden_common_all,
601 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
602 		break;
603 	case CHIP_CARRIZO:
604 		amdgpu_program_register_sequence(adev,
605 						 cz_mgcg_cgcg_init,
606 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
607 		amdgpu_program_register_sequence(adev,
608 						 cz_golden_settings_a11,
609 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
610 		amdgpu_program_register_sequence(adev,
611 						 cz_golden_common_all,
612 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
613 		break;
614 	case CHIP_STONEY:
615 		amdgpu_program_register_sequence(adev,
616 						 stoney_mgcg_cgcg_init,
617 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
618 		amdgpu_program_register_sequence(adev,
619 						 stoney_golden_settings_a11,
620 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
621 		amdgpu_program_register_sequence(adev,
622 						 stoney_golden_common_all,
623 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
624 		break;
625 	default:
626 		break;
627 	}
628 }
629 
630 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
631 {
632 	int i;
633 
634 	adev->gfx.scratch.num_reg = 7;
635 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
636 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
637 		adev->gfx.scratch.free[i] = true;
638 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
639 	}
640 }
641 
642 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
643 {
644 	struct amdgpu_device *adev = ring->adev;
645 	uint32_t scratch;
646 	uint32_t tmp = 0;
647 	unsigned i;
648 	int r;
649 
650 	r = amdgpu_gfx_scratch_get(adev, &scratch);
651 	if (r) {
652 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
653 		return r;
654 	}
655 	WREG32(scratch, 0xCAFEDEAD);
656 	r = amdgpu_ring_lock(ring, 3);
657 	if (r) {
658 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
659 			  ring->idx, r);
660 		amdgpu_gfx_scratch_free(adev, scratch);
661 		return r;
662 	}
663 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
664 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
665 	amdgpu_ring_write(ring, 0xDEADBEEF);
666 	amdgpu_ring_unlock_commit(ring);
667 
668 	for (i = 0; i < adev->usec_timeout; i++) {
669 		tmp = RREG32(scratch);
670 		if (tmp == 0xDEADBEEF)
671 			break;
672 		DRM_UDELAY(1);
673 	}
674 	if (i < adev->usec_timeout) {
675 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
676 			 ring->idx, i);
677 	} else {
678 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
679 			  ring->idx, scratch, tmp);
680 		r = -EINVAL;
681 	}
682 	amdgpu_gfx_scratch_free(adev, scratch);
683 	return r;
684 }
685 
686 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
687 {
688 	struct amdgpu_device *adev = ring->adev;
689 	struct amdgpu_ib ib;
690 	struct fence *f = NULL;
691 	uint32_t scratch;
692 	uint32_t tmp = 0;
693 	unsigned i;
694 	int r;
695 
696 	r = amdgpu_gfx_scratch_get(adev, &scratch);
697 	if (r) {
698 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
699 		return r;
700 	}
701 	WREG32(scratch, 0xCAFEDEAD);
702 	memset(&ib, 0, sizeof(ib));
703 	r = amdgpu_ib_get(ring, NULL, 256, &ib);
704 	if (r) {
705 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
706 		goto err1;
707 	}
708 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
709 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
710 	ib.ptr[2] = 0xDEADBEEF;
711 	ib.length_dw = 3;
712 
713 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
714 						 AMDGPU_FENCE_OWNER_UNDEFINED,
715 						 &f);
716 	if (r)
717 		goto err2;
718 
719 	r = fence_wait(f, false);
720 	if (r) {
721 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
722 		goto err2;
723 	}
724 	for (i = 0; i < adev->usec_timeout; i++) {
725 		tmp = RREG32(scratch);
726 		if (tmp == 0xDEADBEEF)
727 			break;
728 		DRM_UDELAY(1);
729 	}
730 	if (i < adev->usec_timeout) {
731 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
732 			 ring->idx, i);
733 		goto err2;
734 	} else {
735 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
736 			  scratch, tmp);
737 		r = -EINVAL;
738 	}
739 err2:
740 	fence_put(f);
741 	amdgpu_ib_free(adev, &ib);
742 err1:
743 	amdgpu_gfx_scratch_free(adev, scratch);
744 	return r;
745 }
746 
747 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
748 {
749 	const char *chip_name;
750 	char fw_name[30];
751 	int err;
752 	struct amdgpu_firmware_info *info = NULL;
753 	const struct common_firmware_header *header = NULL;
754 	const struct gfx_firmware_header_v1_0 *cp_hdr;
755 
756 	DRM_DEBUG("\n");
757 
758 	switch (adev->asic_type) {
759 	case CHIP_TOPAZ:
760 		chip_name = "topaz";
761 		break;
762 	case CHIP_TONGA:
763 		chip_name = "tonga";
764 		break;
765 	case CHIP_CARRIZO:
766 		chip_name = "carrizo";
767 		break;
768 	case CHIP_FIJI:
769 		chip_name = "fiji";
770 		break;
771 	case CHIP_STONEY:
772 		chip_name = "stoney";
773 		break;
774 	default:
775 		BUG();
776 	}
777 
778 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
779 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
780 	if (err)
781 		goto out;
782 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
783 	if (err)
784 		goto out;
785 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
786 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
787 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
788 
789 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
790 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
791 	if (err)
792 		goto out;
793 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
794 	if (err)
795 		goto out;
796 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
797 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
798 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
799 
800 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
801 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
802 	if (err)
803 		goto out;
804 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
805 	if (err)
806 		goto out;
807 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
808 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
809 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
810 
811 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
812 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
813 	if (err)
814 		goto out;
815 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
816 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
817 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
818 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
819 
820 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
821 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
822 	if (err)
823 		goto out;
824 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
825 	if (err)
826 		goto out;
827 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
828 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
829 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
830 
831 	if (adev->asic_type != CHIP_STONEY) {
832 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
833 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
834 		if (!err) {
835 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
836 			if (err)
837 				goto out;
838 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
839 				adev->gfx.mec2_fw->data;
840 			adev->gfx.mec2_fw_version =
841 				le32_to_cpu(cp_hdr->header.ucode_version);
842 			adev->gfx.mec2_feature_version =
843 				le32_to_cpu(cp_hdr->ucode_feature_version);
844 		} else {
845 			err = 0;
846 			adev->gfx.mec2_fw = NULL;
847 		}
848 	}
849 
850 	if (adev->firmware.smu_load) {
851 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
852 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
853 		info->fw = adev->gfx.pfp_fw;
854 		header = (const struct common_firmware_header *)info->fw->data;
855 		adev->firmware.fw_size +=
856 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
857 
858 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
859 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
860 		info->fw = adev->gfx.me_fw;
861 		header = (const struct common_firmware_header *)info->fw->data;
862 		adev->firmware.fw_size +=
863 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
864 
865 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
866 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
867 		info->fw = adev->gfx.ce_fw;
868 		header = (const struct common_firmware_header *)info->fw->data;
869 		adev->firmware.fw_size +=
870 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
871 
872 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
873 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
874 		info->fw = adev->gfx.rlc_fw;
875 		header = (const struct common_firmware_header *)info->fw->data;
876 		adev->firmware.fw_size +=
877 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
878 
879 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
880 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
881 		info->fw = adev->gfx.mec_fw;
882 		header = (const struct common_firmware_header *)info->fw->data;
883 		adev->firmware.fw_size +=
884 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
885 
886 		if (adev->gfx.mec2_fw) {
887 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
888 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
889 			info->fw = adev->gfx.mec2_fw;
890 			header = (const struct common_firmware_header *)info->fw->data;
891 			adev->firmware.fw_size +=
892 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
893 		}
894 
895 	}
896 
897 out:
898 	if (err) {
899 		dev_err(adev->dev,
900 			"gfx8: Failed to load firmware \"%s\"\n",
901 			fw_name);
902 		release_firmware(adev->gfx.pfp_fw);
903 		adev->gfx.pfp_fw = NULL;
904 		release_firmware(adev->gfx.me_fw);
905 		adev->gfx.me_fw = NULL;
906 		release_firmware(adev->gfx.ce_fw);
907 		adev->gfx.ce_fw = NULL;
908 		release_firmware(adev->gfx.rlc_fw);
909 		adev->gfx.rlc_fw = NULL;
910 		release_firmware(adev->gfx.mec_fw);
911 		adev->gfx.mec_fw = NULL;
912 		release_firmware(adev->gfx.mec2_fw);
913 		adev->gfx.mec2_fw = NULL;
914 	}
915 	return err;
916 }
917 
918 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
919 {
920 	int r;
921 
922 	if (adev->gfx.mec.hpd_eop_obj) {
923 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
924 		if (unlikely(r != 0))
925 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
926 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
927 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
928 
929 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
930 		adev->gfx.mec.hpd_eop_obj = NULL;
931 	}
932 }
933 
934 #define MEC_HPD_SIZE 2048
935 
936 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
937 {
938 	int r;
939 	u32 *hpd;
940 
941 	/*
942 	 * we assign only 1 pipe because all other pipes will
943 	 * be handled by KFD
944 	 */
945 	adev->gfx.mec.num_mec = 1;
946 	adev->gfx.mec.num_pipe = 1;
947 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
948 
949 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
950 		r = amdgpu_bo_create(adev,
951 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
952 				     PAGE_SIZE, true,
953 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
954 				     &adev->gfx.mec.hpd_eop_obj);
955 		if (r) {
956 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
957 			return r;
958 		}
959 	}
960 
961 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
962 	if (unlikely(r != 0)) {
963 		gfx_v8_0_mec_fini(adev);
964 		return r;
965 	}
966 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
967 			  &adev->gfx.mec.hpd_eop_gpu_addr);
968 	if (r) {
969 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
970 		gfx_v8_0_mec_fini(adev);
971 		return r;
972 	}
973 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
974 	if (r) {
975 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
976 		gfx_v8_0_mec_fini(adev);
977 		return r;
978 	}
979 
980 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
981 
982 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
983 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
984 
985 	return 0;
986 }
987 
988 static const u32 vgpr_init_compute_shader[] =
989 {
990 	0x7e000209, 0x7e020208,
991 	0x7e040207, 0x7e060206,
992 	0x7e080205, 0x7e0a0204,
993 	0x7e0c0203, 0x7e0e0202,
994 	0x7e100201, 0x7e120200,
995 	0x7e140209, 0x7e160208,
996 	0x7e180207, 0x7e1a0206,
997 	0x7e1c0205, 0x7e1e0204,
998 	0x7e200203, 0x7e220202,
999 	0x7e240201, 0x7e260200,
1000 	0x7e280209, 0x7e2a0208,
1001 	0x7e2c0207, 0x7e2e0206,
1002 	0x7e300205, 0x7e320204,
1003 	0x7e340203, 0x7e360202,
1004 	0x7e380201, 0x7e3a0200,
1005 	0x7e3c0209, 0x7e3e0208,
1006 	0x7e400207, 0x7e420206,
1007 	0x7e440205, 0x7e460204,
1008 	0x7e480203, 0x7e4a0202,
1009 	0x7e4c0201, 0x7e4e0200,
1010 	0x7e500209, 0x7e520208,
1011 	0x7e540207, 0x7e560206,
1012 	0x7e580205, 0x7e5a0204,
1013 	0x7e5c0203, 0x7e5e0202,
1014 	0x7e600201, 0x7e620200,
1015 	0x7e640209, 0x7e660208,
1016 	0x7e680207, 0x7e6a0206,
1017 	0x7e6c0205, 0x7e6e0204,
1018 	0x7e700203, 0x7e720202,
1019 	0x7e740201, 0x7e760200,
1020 	0x7e780209, 0x7e7a0208,
1021 	0x7e7c0207, 0x7e7e0206,
1022 	0xbf8a0000, 0xbf810000,
1023 };
1024 
1025 static const u32 sgpr_init_compute_shader[] =
1026 {
1027 	0xbe8a0100, 0xbe8c0102,
1028 	0xbe8e0104, 0xbe900106,
1029 	0xbe920108, 0xbe940100,
1030 	0xbe960102, 0xbe980104,
1031 	0xbe9a0106, 0xbe9c0108,
1032 	0xbe9e0100, 0xbea00102,
1033 	0xbea20104, 0xbea40106,
1034 	0xbea60108, 0xbea80100,
1035 	0xbeaa0102, 0xbeac0104,
1036 	0xbeae0106, 0xbeb00108,
1037 	0xbeb20100, 0xbeb40102,
1038 	0xbeb60104, 0xbeb80106,
1039 	0xbeba0108, 0xbebc0100,
1040 	0xbebe0102, 0xbec00104,
1041 	0xbec20106, 0xbec40108,
1042 	0xbec60100, 0xbec80102,
1043 	0xbee60004, 0xbee70005,
1044 	0xbeea0006, 0xbeeb0007,
1045 	0xbee80008, 0xbee90009,
1046 	0xbefc0000, 0xbf8a0000,
1047 	0xbf810000, 0x00000000,
1048 };
1049 
1050 static const u32 vgpr_init_regs[] =
1051 {
1052 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1053 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1054 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1055 	mmCOMPUTE_NUM_THREAD_Y, 1,
1056 	mmCOMPUTE_NUM_THREAD_Z, 1,
1057 	mmCOMPUTE_PGM_RSRC2, 20,
1058 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1059 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1060 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1061 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1062 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1063 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1064 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1065 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1066 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1067 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1068 };
1069 
1070 static const u32 sgpr1_init_regs[] =
1071 {
1072 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1073 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1074 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1075 	mmCOMPUTE_NUM_THREAD_Y, 1,
1076 	mmCOMPUTE_NUM_THREAD_Z, 1,
1077 	mmCOMPUTE_PGM_RSRC2, 20,
1078 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1079 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1080 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1081 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1082 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1083 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1084 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1085 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1086 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1087 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1088 };
1089 
1090 static const u32 sgpr2_init_regs[] =
1091 {
1092 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1093 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1094 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1095 	mmCOMPUTE_NUM_THREAD_Y, 1,
1096 	mmCOMPUTE_NUM_THREAD_Z, 1,
1097 	mmCOMPUTE_PGM_RSRC2, 20,
1098 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1099 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1100 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1101 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1102 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1103 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1104 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1105 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1106 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1107 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1108 };
1109 
1110 static const u32 sec_ded_counter_registers[] =
1111 {
1112 	mmCPC_EDC_ATC_CNT,
1113 	mmCPC_EDC_SCRATCH_CNT,
1114 	mmCPC_EDC_UCODE_CNT,
1115 	mmCPF_EDC_ATC_CNT,
1116 	mmCPF_EDC_ROQ_CNT,
1117 	mmCPF_EDC_TAG_CNT,
1118 	mmCPG_EDC_ATC_CNT,
1119 	mmCPG_EDC_DMA_CNT,
1120 	mmCPG_EDC_TAG_CNT,
1121 	mmDC_EDC_CSINVOC_CNT,
1122 	mmDC_EDC_RESTORE_CNT,
1123 	mmDC_EDC_STATE_CNT,
1124 	mmGDS_EDC_CNT,
1125 	mmGDS_EDC_GRBM_CNT,
1126 	mmGDS_EDC_OA_DED,
1127 	mmSPI_EDC_CNT,
1128 	mmSQC_ATC_EDC_GATCL1_CNT,
1129 	mmSQC_EDC_CNT,
1130 	mmSQ_EDC_DED_CNT,
1131 	mmSQ_EDC_INFO,
1132 	mmSQ_EDC_SEC_CNT,
1133 	mmTCC_EDC_CNT,
1134 	mmTCP_ATC_EDC_GATCL1_CNT,
1135 	mmTCP_EDC_CNT,
1136 	mmTD_EDC_CNT
1137 };
1138 
1139 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1140 {
1141 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1142 	struct amdgpu_ib ib;
1143 	struct fence *f = NULL;
1144 	int r, i;
1145 	u32 tmp;
1146 	unsigned total_size, vgpr_offset, sgpr_offset;
1147 	u64 gpu_addr;
1148 
1149 	/* only supported on CZ */
1150 	if (adev->asic_type != CHIP_CARRIZO)
1151 		return 0;
1152 
1153 	/* bail if the compute ring is not ready */
1154 	if (!ring->ready)
1155 		return 0;
1156 
1157 	tmp = RREG32(mmGB_EDC_MODE);
1158 	WREG32(mmGB_EDC_MODE, 0);
1159 
1160 	total_size =
1161 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162 	total_size +=
1163 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1164 	total_size +=
1165 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1166 	total_size = ALIGN(total_size, 256);
1167 	vgpr_offset = total_size;
1168 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1169 	sgpr_offset = total_size;
1170 	total_size += sizeof(sgpr_init_compute_shader);
1171 
1172 	/* allocate an indirect buffer to put the commands in */
1173 	memset(&ib, 0, sizeof(ib));
1174 	r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1175 	if (r) {
1176 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1177 		return r;
1178 	}
1179 
1180 	/* load the compute shaders */
1181 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1182 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1183 
1184 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1185 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1186 
1187 	/* init the ib length to 0 */
1188 	ib.length_dw = 0;
1189 
1190 	/* VGPR */
1191 	/* write the register state for the compute dispatch */
1192 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1193 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1194 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1195 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1196 	}
1197 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1198 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1199 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1200 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1201 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1202 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1203 
1204 	/* write dispatch packet */
1205 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1206 	ib.ptr[ib.length_dw++] = 8; /* x */
1207 	ib.ptr[ib.length_dw++] = 1; /* y */
1208 	ib.ptr[ib.length_dw++] = 1; /* z */
1209 	ib.ptr[ib.length_dw++] =
1210 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1211 
1212 	/* write CS partial flush packet */
1213 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1214 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1215 
1216 	/* SGPR1 */
1217 	/* write the register state for the compute dispatch */
1218 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1219 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1220 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1221 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1222 	}
1223 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1224 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1225 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1226 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1227 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1228 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1229 
1230 	/* write dispatch packet */
1231 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1232 	ib.ptr[ib.length_dw++] = 8; /* x */
1233 	ib.ptr[ib.length_dw++] = 1; /* y */
1234 	ib.ptr[ib.length_dw++] = 1; /* z */
1235 	ib.ptr[ib.length_dw++] =
1236 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1237 
1238 	/* write CS partial flush packet */
1239 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1240 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1241 
1242 	/* SGPR2 */
1243 	/* write the register state for the compute dispatch */
1244 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1245 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1246 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1247 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1248 	}
1249 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1250 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1251 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1252 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1253 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1254 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1255 
1256 	/* write dispatch packet */
1257 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1258 	ib.ptr[ib.length_dw++] = 8; /* x */
1259 	ib.ptr[ib.length_dw++] = 1; /* y */
1260 	ib.ptr[ib.length_dw++] = 1; /* z */
1261 	ib.ptr[ib.length_dw++] =
1262 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1263 
1264 	/* write CS partial flush packet */
1265 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1266 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1267 
1268 	/* shedule the ib on the ring */
1269 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1270 						 AMDGPU_FENCE_OWNER_UNDEFINED,
1271 						 &f);
1272 	if (r) {
1273 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1274 		goto fail;
1275 	}
1276 
1277 	/* wait for the GPU to finish processing the IB */
1278 	r = fence_wait(f, false);
1279 	if (r) {
1280 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1281 		goto fail;
1282 	}
1283 
1284 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1285 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1286 	WREG32(mmGB_EDC_MODE, tmp);
1287 
1288 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1289 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1290 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1291 
1292 
1293 	/* read back registers to clear the counters */
1294 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1295 		RREG32(sec_ded_counter_registers[i]);
1296 
1297 fail:
1298 	fence_put(f);
1299 	amdgpu_ib_free(adev, &ib);
1300 
1301 	return r;
1302 }
1303 
1304 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1305 {
1306 	u32 gb_addr_config;
1307 	u32 mc_shared_chmap, mc_arb_ramcfg;
1308 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1309 	u32 tmp;
1310 
1311 	switch (adev->asic_type) {
1312 	case CHIP_TOPAZ:
1313 		adev->gfx.config.max_shader_engines = 1;
1314 		adev->gfx.config.max_tile_pipes = 2;
1315 		adev->gfx.config.max_cu_per_sh = 6;
1316 		adev->gfx.config.max_sh_per_se = 1;
1317 		adev->gfx.config.max_backends_per_se = 2;
1318 		adev->gfx.config.max_texture_channel_caches = 2;
1319 		adev->gfx.config.max_gprs = 256;
1320 		adev->gfx.config.max_gs_threads = 32;
1321 		adev->gfx.config.max_hw_contexts = 8;
1322 
1323 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1327 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1328 		break;
1329 	case CHIP_FIJI:
1330 		adev->gfx.config.max_shader_engines = 4;
1331 		adev->gfx.config.max_tile_pipes = 16;
1332 		adev->gfx.config.max_cu_per_sh = 16;
1333 		adev->gfx.config.max_sh_per_se = 1;
1334 		adev->gfx.config.max_backends_per_se = 4;
1335 		adev->gfx.config.max_texture_channel_caches = 16;
1336 		adev->gfx.config.max_gprs = 256;
1337 		adev->gfx.config.max_gs_threads = 32;
1338 		adev->gfx.config.max_hw_contexts = 8;
1339 
1340 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1344 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1345 		break;
1346 	case CHIP_TONGA:
1347 		adev->gfx.config.max_shader_engines = 4;
1348 		adev->gfx.config.max_tile_pipes = 8;
1349 		adev->gfx.config.max_cu_per_sh = 8;
1350 		adev->gfx.config.max_sh_per_se = 1;
1351 		adev->gfx.config.max_backends_per_se = 2;
1352 		adev->gfx.config.max_texture_channel_caches = 8;
1353 		adev->gfx.config.max_gprs = 256;
1354 		adev->gfx.config.max_gs_threads = 32;
1355 		adev->gfx.config.max_hw_contexts = 8;
1356 
1357 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1361 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1362 		break;
1363 	case CHIP_CARRIZO:
1364 		adev->gfx.config.max_shader_engines = 1;
1365 		adev->gfx.config.max_tile_pipes = 2;
1366 		adev->gfx.config.max_sh_per_se = 1;
1367 		adev->gfx.config.max_backends_per_se = 2;
1368 
1369 		switch (adev->pdev->revision) {
1370 		case 0xc4:
1371 		case 0x84:
1372 		case 0xc8:
1373 		case 0xcc:
1374 		case 0xe1:
1375 		case 0xe3:
1376 			/* B10 */
1377 			adev->gfx.config.max_cu_per_sh = 8;
1378 			break;
1379 		case 0xc5:
1380 		case 0x81:
1381 		case 0x85:
1382 		case 0xc9:
1383 		case 0xcd:
1384 		case 0xe2:
1385 		case 0xe4:
1386 			/* B8 */
1387 			adev->gfx.config.max_cu_per_sh = 6;
1388 			break;
1389 		case 0xc6:
1390 		case 0xca:
1391 		case 0xce:
1392 		case 0x88:
1393 			/* B6 */
1394 			adev->gfx.config.max_cu_per_sh = 6;
1395 			break;
1396 		case 0xc7:
1397 		case 0x87:
1398 		case 0xcb:
1399 		case 0xe5:
1400 		case 0x89:
1401 		default:
1402 			/* B4 */
1403 			adev->gfx.config.max_cu_per_sh = 4;
1404 			break;
1405 		}
1406 
1407 		adev->gfx.config.max_texture_channel_caches = 2;
1408 		adev->gfx.config.max_gprs = 256;
1409 		adev->gfx.config.max_gs_threads = 32;
1410 		adev->gfx.config.max_hw_contexts = 8;
1411 
1412 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1413 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1414 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1415 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1416 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1417 		break;
1418 	case CHIP_STONEY:
1419 		adev->gfx.config.max_shader_engines = 1;
1420 		adev->gfx.config.max_tile_pipes = 2;
1421 		adev->gfx.config.max_sh_per_se = 1;
1422 		adev->gfx.config.max_backends_per_se = 1;
1423 
1424 		switch (adev->pdev->revision) {
1425 		case 0xc0:
1426 		case 0xc1:
1427 		case 0xc2:
1428 		case 0xc4:
1429 		case 0xc8:
1430 		case 0xc9:
1431 			adev->gfx.config.max_cu_per_sh = 3;
1432 			break;
1433 		case 0xd0:
1434 		case 0xd1:
1435 		case 0xd2:
1436 		default:
1437 			adev->gfx.config.max_cu_per_sh = 2;
1438 			break;
1439 		}
1440 
1441 		adev->gfx.config.max_texture_channel_caches = 2;
1442 		adev->gfx.config.max_gprs = 256;
1443 		adev->gfx.config.max_gs_threads = 16;
1444 		adev->gfx.config.max_hw_contexts = 8;
1445 
1446 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1447 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1448 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1449 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1450 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1451 		break;
1452 	default:
1453 		adev->gfx.config.max_shader_engines = 2;
1454 		adev->gfx.config.max_tile_pipes = 4;
1455 		adev->gfx.config.max_cu_per_sh = 2;
1456 		adev->gfx.config.max_sh_per_se = 1;
1457 		adev->gfx.config.max_backends_per_se = 2;
1458 		adev->gfx.config.max_texture_channel_caches = 4;
1459 		adev->gfx.config.max_gprs = 256;
1460 		adev->gfx.config.max_gs_threads = 32;
1461 		adev->gfx.config.max_hw_contexts = 8;
1462 
1463 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1464 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1465 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1466 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1467 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1468 		break;
1469 	}
1470 
1471 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1472 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1473 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1474 
1475 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1476 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1477 	if (adev->flags & AMD_IS_APU) {
1478 		/* Get memory bank mapping mode. */
1479 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1480 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482 
1483 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1484 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1485 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1486 
1487 		/* Validate settings in case only one DIMM installed. */
1488 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1489 			dimm00_addr_map = 0;
1490 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1491 			dimm01_addr_map = 0;
1492 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1493 			dimm10_addr_map = 0;
1494 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1495 			dimm11_addr_map = 0;
1496 
1497 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1498 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1499 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1500 			adev->gfx.config.mem_row_size_in_kb = 2;
1501 		else
1502 			adev->gfx.config.mem_row_size_in_kb = 1;
1503 	} else {
1504 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1505 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1506 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1507 			adev->gfx.config.mem_row_size_in_kb = 4;
1508 	}
1509 
1510 	adev->gfx.config.shader_engine_tile_size = 32;
1511 	adev->gfx.config.num_gpus = 1;
1512 	adev->gfx.config.multi_gpu_tile_size = 64;
1513 
1514 	/* fix up row size */
1515 	switch (adev->gfx.config.mem_row_size_in_kb) {
1516 	case 1:
1517 	default:
1518 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1519 		break;
1520 	case 2:
1521 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1522 		break;
1523 	case 4:
1524 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1525 		break;
1526 	}
1527 	adev->gfx.config.gb_addr_config = gb_addr_config;
1528 }
1529 
1530 static int gfx_v8_0_sw_init(void *handle)
1531 {
1532 	int i, r;
1533 	struct amdgpu_ring *ring;
1534 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1535 
1536 	/* EOP Event */
1537 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1538 	if (r)
1539 		return r;
1540 
1541 	/* Privileged reg */
1542 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1543 	if (r)
1544 		return r;
1545 
1546 	/* Privileged inst */
1547 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1548 	if (r)
1549 		return r;
1550 
1551 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1552 
1553 	gfx_v8_0_scratch_init(adev);
1554 
1555 	r = gfx_v8_0_init_microcode(adev);
1556 	if (r) {
1557 		DRM_ERROR("Failed to load gfx firmware!\n");
1558 		return r;
1559 	}
1560 
1561 	r = gfx_v8_0_mec_init(adev);
1562 	if (r) {
1563 		DRM_ERROR("Failed to init MEC BOs!\n");
1564 		return r;
1565 	}
1566 
1567 	/* set up the gfx ring */
1568 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1569 		ring = &adev->gfx.gfx_ring[i];
1570 		ring->ring_obj = NULL;
1571 		sprintf(ring->name, "gfx");
1572 		/* no gfx doorbells on iceland */
1573 		if (adev->asic_type != CHIP_TOPAZ) {
1574 			ring->use_doorbell = true;
1575 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1576 		}
1577 
1578 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1579 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1580 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1581 				     AMDGPU_RING_TYPE_GFX);
1582 		if (r)
1583 			return r;
1584 	}
1585 
1586 	/* set up the compute queues */
1587 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1588 		unsigned irq_type;
1589 
1590 		/* max 32 queues per MEC */
1591 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1592 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1593 			break;
1594 		}
1595 		ring = &adev->gfx.compute_ring[i];
1596 		ring->ring_obj = NULL;
1597 		ring->use_doorbell = true;
1598 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1599 		ring->me = 1; /* first MEC */
1600 		ring->pipe = i / 8;
1601 		ring->queue = i % 8;
1602 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1603 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1604 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1605 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1606 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1607 				     &adev->gfx.eop_irq, irq_type,
1608 				     AMDGPU_RING_TYPE_COMPUTE);
1609 		if (r)
1610 			return r;
1611 	}
1612 
1613 	/* reserve GDS, GWS and OA resource for gfx */
1614 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1615 			PAGE_SIZE, true,
1616 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1617 			NULL, &adev->gds.gds_gfx_bo);
1618 	if (r)
1619 		return r;
1620 
1621 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1622 		PAGE_SIZE, true,
1623 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1624 		NULL, &adev->gds.gws_gfx_bo);
1625 	if (r)
1626 		return r;
1627 
1628 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1629 			PAGE_SIZE, true,
1630 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1631 			NULL, &adev->gds.oa_gfx_bo);
1632 	if (r)
1633 		return r;
1634 
1635 	adev->gfx.ce_ram_size = 0x8000;
1636 
1637 	gfx_v8_0_gpu_early_init(adev);
1638 
1639 	return 0;
1640 }
1641 
1642 static int gfx_v8_0_sw_fini(void *handle)
1643 {
1644 	int i;
1645 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1646 
1647 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1648 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1649 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1650 
1651 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1652 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1653 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1654 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1655 
1656 	gfx_v8_0_mec_fini(adev);
1657 
1658 	return 0;
1659 }
1660 
1661 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1662 {
1663 	uint32_t *modearray, *mod2array;
1664 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1665 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1666 	u32 reg_offset;
1667 
1668 	modearray = adev->gfx.config.tile_mode_array;
1669 	mod2array = adev->gfx.config.macrotile_mode_array;
1670 
1671 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1672 		modearray[reg_offset] = 0;
1673 
1674 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1675 		mod2array[reg_offset] = 0;
1676 
1677 	switch (adev->asic_type) {
1678 	case CHIP_TOPAZ:
1679 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680 				PIPE_CONFIG(ADDR_SURF_P2) |
1681 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1682 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684 				PIPE_CONFIG(ADDR_SURF_P2) |
1685 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1686 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 				PIPE_CONFIG(ADDR_SURF_P2) |
1689 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1690 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692 				PIPE_CONFIG(ADDR_SURF_P2) |
1693 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1694 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1696 				PIPE_CONFIG(ADDR_SURF_P2) |
1697 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1700 				PIPE_CONFIG(ADDR_SURF_P2) |
1701 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1704 				PIPE_CONFIG(ADDR_SURF_P2) |
1705 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1707 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1708 				PIPE_CONFIG(ADDR_SURF_P2));
1709 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1710 				PIPE_CONFIG(ADDR_SURF_P2) |
1711 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1714 				 PIPE_CONFIG(ADDR_SURF_P2) |
1715 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1717 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1718 				 PIPE_CONFIG(ADDR_SURF_P2) |
1719 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1720 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1721 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1722 				 PIPE_CONFIG(ADDR_SURF_P2) |
1723 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1726 				 PIPE_CONFIG(ADDR_SURF_P2) |
1727 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1730 				 PIPE_CONFIG(ADDR_SURF_P2) |
1731 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1733 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1734 				 PIPE_CONFIG(ADDR_SURF_P2) |
1735 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1737 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738 				 PIPE_CONFIG(ADDR_SURF_P2) |
1739 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1740 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1742 				 PIPE_CONFIG(ADDR_SURF_P2) |
1743 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1746 				 PIPE_CONFIG(ADDR_SURF_P2) |
1747 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1750 				 PIPE_CONFIG(ADDR_SURF_P2) |
1751 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1754 				 PIPE_CONFIG(ADDR_SURF_P2) |
1755 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1758 				 PIPE_CONFIG(ADDR_SURF_P2) |
1759 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1760 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1762 				 PIPE_CONFIG(ADDR_SURF_P2) |
1763 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1766 				 PIPE_CONFIG(ADDR_SURF_P2) |
1767 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1768 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1769 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1770 				 PIPE_CONFIG(ADDR_SURF_P2) |
1771 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774 				 PIPE_CONFIG(ADDR_SURF_P2) |
1775 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1777 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778 				 PIPE_CONFIG(ADDR_SURF_P2) |
1779 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1781 
1782 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785 				NUM_BANKS(ADDR_SURF_8_BANK));
1786 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1787 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 				NUM_BANKS(ADDR_SURF_8_BANK));
1790 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1791 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1793 				NUM_BANKS(ADDR_SURF_8_BANK));
1794 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1797 				NUM_BANKS(ADDR_SURF_8_BANK));
1798 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1800 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 				NUM_BANKS(ADDR_SURF_8_BANK));
1802 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805 				NUM_BANKS(ADDR_SURF_8_BANK));
1806 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1807 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1808 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1809 				NUM_BANKS(ADDR_SURF_8_BANK));
1810 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1812 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 				NUM_BANKS(ADDR_SURF_16_BANK));
1814 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1815 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817 				NUM_BANKS(ADDR_SURF_16_BANK));
1818 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1820 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821 				 NUM_BANKS(ADDR_SURF_16_BANK));
1822 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 				 NUM_BANKS(ADDR_SURF_16_BANK));
1826 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1828 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829 				 NUM_BANKS(ADDR_SURF_16_BANK));
1830 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1833 				 NUM_BANKS(ADDR_SURF_16_BANK));
1834 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1837 				 NUM_BANKS(ADDR_SURF_8_BANK));
1838 
1839 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1840 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1841 			    reg_offset != 23)
1842 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1843 
1844 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1845 			if (reg_offset != 7)
1846 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1847 
1848 		break;
1849 	case CHIP_FIJI:
1850 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1857 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1861 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1865 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1867 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1871 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1876 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1879 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1880 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1881 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1882 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1883 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1884 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1885 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1889 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1892 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1894 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1897 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1898 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1899 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1900 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1901 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1905 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1909 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1912 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1914 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1917 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1920 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1923 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1925 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1929 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1933 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1941 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1943 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1945 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1947 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1949 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1953 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1955 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1956 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1957 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1961 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1966 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1969 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1970 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1971 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1972 
1973 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976 				NUM_BANKS(ADDR_SURF_8_BANK));
1977 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980 				NUM_BANKS(ADDR_SURF_8_BANK));
1981 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984 				NUM_BANKS(ADDR_SURF_8_BANK));
1985 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1987 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1988 				NUM_BANKS(ADDR_SURF_8_BANK));
1989 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1991 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992 				NUM_BANKS(ADDR_SURF_8_BANK));
1993 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996 				NUM_BANKS(ADDR_SURF_8_BANK));
1997 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1999 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2000 				NUM_BANKS(ADDR_SURF_8_BANK));
2001 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2003 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004 				NUM_BANKS(ADDR_SURF_8_BANK));
2005 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2007 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2008 				NUM_BANKS(ADDR_SURF_8_BANK));
2009 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012 				 NUM_BANKS(ADDR_SURF_8_BANK));
2013 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2015 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2016 				 NUM_BANKS(ADDR_SURF_8_BANK));
2017 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2019 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020 				 NUM_BANKS(ADDR_SURF_8_BANK));
2021 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2024 				 NUM_BANKS(ADDR_SURF_8_BANK));
2025 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2027 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2028 				 NUM_BANKS(ADDR_SURF_4_BANK));
2029 
2030 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2031 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2032 
2033 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2034 			if (reg_offset != 7)
2035 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2036 
2037 		break;
2038 	case CHIP_TONGA:
2039 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2042 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2046 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2050 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2054 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2065 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2070 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2071 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2072 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2073 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2083 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2086 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2088 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2089 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2098 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2103 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2107 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2109 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2112 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2114 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2118 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2122 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2127 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2130 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2131 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2132 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2134 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2138 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2142 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2144 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2145 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2159 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2160 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161 
2162 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165 				NUM_BANKS(ADDR_SURF_16_BANK));
2166 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169 				NUM_BANKS(ADDR_SURF_16_BANK));
2170 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173 				NUM_BANKS(ADDR_SURF_16_BANK));
2174 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2177 				NUM_BANKS(ADDR_SURF_16_BANK));
2178 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181 				NUM_BANKS(ADDR_SURF_16_BANK));
2182 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 				NUM_BANKS(ADDR_SURF_16_BANK));
2186 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189 				NUM_BANKS(ADDR_SURF_16_BANK));
2190 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2192 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193 				NUM_BANKS(ADDR_SURF_16_BANK));
2194 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2197 				NUM_BANKS(ADDR_SURF_16_BANK));
2198 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2200 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201 				 NUM_BANKS(ADDR_SURF_16_BANK));
2202 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205 				 NUM_BANKS(ADDR_SURF_16_BANK));
2206 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 				 NUM_BANKS(ADDR_SURF_8_BANK));
2210 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213 				 NUM_BANKS(ADDR_SURF_4_BANK));
2214 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2217 				 NUM_BANKS(ADDR_SURF_4_BANK));
2218 
2219 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2221 
2222 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223 			if (reg_offset != 7)
2224 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2225 
2226 		break;
2227 	case CHIP_STONEY:
2228 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 				PIPE_CONFIG(ADDR_SURF_P2) |
2230 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 				PIPE_CONFIG(ADDR_SURF_P2) |
2234 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2235 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 				PIPE_CONFIG(ADDR_SURF_P2) |
2238 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2239 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 				PIPE_CONFIG(ADDR_SURF_P2) |
2242 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2243 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 				PIPE_CONFIG(ADDR_SURF_P2) |
2246 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 				PIPE_CONFIG(ADDR_SURF_P2) |
2250 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2253 				PIPE_CONFIG(ADDR_SURF_P2) |
2254 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2257 				PIPE_CONFIG(ADDR_SURF_P2));
2258 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2259 				PIPE_CONFIG(ADDR_SURF_P2) |
2260 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 				 PIPE_CONFIG(ADDR_SURF_P2) |
2264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 				 PIPE_CONFIG(ADDR_SURF_P2) |
2268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271 				 PIPE_CONFIG(ADDR_SURF_P2) |
2272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275 				 PIPE_CONFIG(ADDR_SURF_P2) |
2276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2279 				 PIPE_CONFIG(ADDR_SURF_P2) |
2280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 				 PIPE_CONFIG(ADDR_SURF_P2) |
2284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 				 PIPE_CONFIG(ADDR_SURF_P2) |
2288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 				 PIPE_CONFIG(ADDR_SURF_P2) |
2292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295 				 PIPE_CONFIG(ADDR_SURF_P2) |
2296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299 				 PIPE_CONFIG(ADDR_SURF_P2) |
2300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303 				 PIPE_CONFIG(ADDR_SURF_P2) |
2304 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2307 				 PIPE_CONFIG(ADDR_SURF_P2) |
2308 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2311 				 PIPE_CONFIG(ADDR_SURF_P2) |
2312 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2315 				 PIPE_CONFIG(ADDR_SURF_P2) |
2316 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319 				 PIPE_CONFIG(ADDR_SURF_P2) |
2320 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 				 PIPE_CONFIG(ADDR_SURF_P2) |
2324 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327 				 PIPE_CONFIG(ADDR_SURF_P2) |
2328 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330 
2331 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334 				NUM_BANKS(ADDR_SURF_8_BANK));
2335 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2338 				NUM_BANKS(ADDR_SURF_8_BANK));
2339 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 				NUM_BANKS(ADDR_SURF_8_BANK));
2343 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 				NUM_BANKS(ADDR_SURF_8_BANK));
2347 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 				NUM_BANKS(ADDR_SURF_8_BANK));
2351 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 				NUM_BANKS(ADDR_SURF_8_BANK));
2355 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 				NUM_BANKS(ADDR_SURF_8_BANK));
2359 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2361 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 				NUM_BANKS(ADDR_SURF_16_BANK));
2363 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 				NUM_BANKS(ADDR_SURF_16_BANK));
2367 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 				 NUM_BANKS(ADDR_SURF_16_BANK));
2371 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 				 NUM_BANKS(ADDR_SURF_16_BANK));
2375 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 				 NUM_BANKS(ADDR_SURF_16_BANK));
2379 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 				 NUM_BANKS(ADDR_SURF_16_BANK));
2383 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 				 NUM_BANKS(ADDR_SURF_8_BANK));
2387 
2388 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2389 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2390 			    reg_offset != 23)
2391 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2392 
2393 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2394 			if (reg_offset != 7)
2395 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2396 
2397 		break;
2398 	default:
2399 		dev_warn(adev->dev,
2400 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2401 			 adev->asic_type);
2402 
2403 	case CHIP_CARRIZO:
2404 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 				PIPE_CONFIG(ADDR_SURF_P2) |
2406 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 				PIPE_CONFIG(ADDR_SURF_P2) |
2410 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2411 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 				PIPE_CONFIG(ADDR_SURF_P2) |
2414 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 				PIPE_CONFIG(ADDR_SURF_P2) |
2418 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2419 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 				PIPE_CONFIG(ADDR_SURF_P2) |
2422 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 				PIPE_CONFIG(ADDR_SURF_P2) |
2426 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429 				PIPE_CONFIG(ADDR_SURF_P2) |
2430 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433 				PIPE_CONFIG(ADDR_SURF_P2));
2434 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 				PIPE_CONFIG(ADDR_SURF_P2) |
2436 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 				 PIPE_CONFIG(ADDR_SURF_P2) |
2440 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 				 PIPE_CONFIG(ADDR_SURF_P2) |
2444 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 				 PIPE_CONFIG(ADDR_SURF_P2) |
2448 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 				 PIPE_CONFIG(ADDR_SURF_P2) |
2452 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2455 				 PIPE_CONFIG(ADDR_SURF_P2) |
2456 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 				 PIPE_CONFIG(ADDR_SURF_P2) |
2460 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463 				 PIPE_CONFIG(ADDR_SURF_P2) |
2464 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2467 				 PIPE_CONFIG(ADDR_SURF_P2) |
2468 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2471 				 PIPE_CONFIG(ADDR_SURF_P2) |
2472 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2475 				 PIPE_CONFIG(ADDR_SURF_P2) |
2476 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2479 				 PIPE_CONFIG(ADDR_SURF_P2) |
2480 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2483 				 PIPE_CONFIG(ADDR_SURF_P2) |
2484 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2487 				 PIPE_CONFIG(ADDR_SURF_P2) |
2488 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2491 				 PIPE_CONFIG(ADDR_SURF_P2) |
2492 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495 				 PIPE_CONFIG(ADDR_SURF_P2) |
2496 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 				 PIPE_CONFIG(ADDR_SURF_P2) |
2500 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 				 PIPE_CONFIG(ADDR_SURF_P2) |
2504 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2505 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2506 
2507 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 				NUM_BANKS(ADDR_SURF_8_BANK));
2511 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 				NUM_BANKS(ADDR_SURF_8_BANK));
2515 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 				NUM_BANKS(ADDR_SURF_8_BANK));
2519 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 				NUM_BANKS(ADDR_SURF_8_BANK));
2523 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 				NUM_BANKS(ADDR_SURF_8_BANK));
2527 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530 				NUM_BANKS(ADDR_SURF_8_BANK));
2531 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534 				NUM_BANKS(ADDR_SURF_8_BANK));
2535 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 				NUM_BANKS(ADDR_SURF_16_BANK));
2539 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2540 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 				NUM_BANKS(ADDR_SURF_16_BANK));
2543 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 				 NUM_BANKS(ADDR_SURF_16_BANK));
2547 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2548 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550 				 NUM_BANKS(ADDR_SURF_16_BANK));
2551 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 				 NUM_BANKS(ADDR_SURF_16_BANK));
2555 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558 				 NUM_BANKS(ADDR_SURF_16_BANK));
2559 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562 				 NUM_BANKS(ADDR_SURF_8_BANK));
2563 
2564 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2566 			    reg_offset != 23)
2567 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2568 
2569 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2570 			if (reg_offset != 7)
2571 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2572 
2573 		break;
2574 	}
2575 }
2576 
2577 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2578 {
2579 	return (u32)((1ULL << bit_width) - 1);
2580 }
2581 
2582 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2583 {
2584 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2585 
2586 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2587 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2588 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2589 	} else if (se_num == 0xffffffff) {
2590 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2591 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2592 	} else if (sh_num == 0xffffffff) {
2593 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2594 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2595 	} else {
2596 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2597 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2598 	}
2599 	WREG32(mmGRBM_GFX_INDEX, data);
2600 }
2601 
2602 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2603 				    u32 max_rb_num_per_se,
2604 				    u32 sh_per_se)
2605 {
2606 	u32 data, mask;
2607 
2608 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2609 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2610 
2611 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2612 
2613 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2614 
2615 	mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2616 
2617 	return data & mask;
2618 }
2619 
2620 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2621 			      u32 se_num, u32 sh_per_se,
2622 			      u32 max_rb_num_per_se)
2623 {
2624 	int i, j;
2625 	u32 data, mask;
2626 	u32 disabled_rbs = 0;
2627 	u32 enabled_rbs = 0;
2628 
2629 	mutex_lock(&adev->grbm_idx_mutex);
2630 	for (i = 0; i < se_num; i++) {
2631 		for (j = 0; j < sh_per_se; j++) {
2632 			gfx_v8_0_select_se_sh(adev, i, j);
2633 			data = gfx_v8_0_get_rb_disabled(adev,
2634 					      max_rb_num_per_se, sh_per_se);
2635 			disabled_rbs |= data << ((i * sh_per_se + j) *
2636 						 RB_BITMAP_WIDTH_PER_SH);
2637 		}
2638 	}
2639 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640 	mutex_unlock(&adev->grbm_idx_mutex);
2641 
2642 	mask = 1;
2643 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2644 		if (!(disabled_rbs & mask))
2645 			enabled_rbs |= mask;
2646 		mask <<= 1;
2647 	}
2648 
2649 	adev->gfx.config.backend_enable_mask = enabled_rbs;
2650 
2651 	mutex_lock(&adev->grbm_idx_mutex);
2652 	for (i = 0; i < se_num; i++) {
2653 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2654 		data = RREG32(mmPA_SC_RASTER_CONFIG);
2655 		for (j = 0; j < sh_per_se; j++) {
2656 			switch (enabled_rbs & 3) {
2657 			case 0:
2658 				if (j == 0)
2659 					data |= (RASTER_CONFIG_RB_MAP_3 <<
2660 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2661 				else
2662 					data |= (RASTER_CONFIG_RB_MAP_0 <<
2663 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2664 				break;
2665 			case 1:
2666 				data |= (RASTER_CONFIG_RB_MAP_0 <<
2667 					 (i * sh_per_se + j) * 2);
2668 				break;
2669 			case 2:
2670 				data |= (RASTER_CONFIG_RB_MAP_3 <<
2671 					 (i * sh_per_se + j) * 2);
2672 				break;
2673 			case 3:
2674 			default:
2675 				data |= (RASTER_CONFIG_RB_MAP_2 <<
2676 					 (i * sh_per_se + j) * 2);
2677 				break;
2678 			}
2679 			enabled_rbs >>= 2;
2680 		}
2681 		WREG32(mmPA_SC_RASTER_CONFIG, data);
2682 	}
2683 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684 	mutex_unlock(&adev->grbm_idx_mutex);
2685 }
2686 
2687 /**
2688  * gfx_v8_0_init_compute_vmid - gart enable
2689  *
2690  * @rdev: amdgpu_device pointer
2691  *
2692  * Initialize compute vmid sh_mem registers
2693  *
2694  */
2695 #define DEFAULT_SH_MEM_BASES	(0x6000)
2696 #define FIRST_COMPUTE_VMID	(8)
2697 #define LAST_COMPUTE_VMID	(16)
2698 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2699 {
2700 	int i;
2701 	uint32_t sh_mem_config;
2702 	uint32_t sh_mem_bases;
2703 
2704 	/*
2705 	 * Configure apertures:
2706 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2707 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2708 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2709 	 */
2710 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2711 
2712 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2713 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2714 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2715 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2716 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2717 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2718 
2719 	mutex_lock(&adev->srbm_mutex);
2720 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2721 		vi_srbm_select(adev, 0, 0, 0, i);
2722 		/* CP and shaders */
2723 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2724 		WREG32(mmSH_MEM_APE1_BASE, 1);
2725 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2726 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2727 	}
2728 	vi_srbm_select(adev, 0, 0, 0, 0);
2729 	mutex_unlock(&adev->srbm_mutex);
2730 }
2731 
2732 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2733 {
2734 	u32 tmp;
2735 	int i;
2736 
2737 	tmp = RREG32(mmGRBM_CNTL);
2738 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2739 	WREG32(mmGRBM_CNTL, tmp);
2740 
2741 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2744 	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2745 	       adev->gfx.config.gb_addr_config & 0x70);
2746 	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2747 	       adev->gfx.config.gb_addr_config & 0x70);
2748 	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749 	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750 	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2751 
2752 	gfx_v8_0_tiling_mode_table_init(adev);
2753 
2754 	gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2755 				 adev->gfx.config.max_sh_per_se,
2756 				 adev->gfx.config.max_backends_per_se);
2757 
2758 	/* XXX SH_MEM regs */
2759 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2760 	mutex_lock(&adev->srbm_mutex);
2761 	for (i = 0; i < 16; i++) {
2762 		vi_srbm_select(adev, 0, 0, 0, i);
2763 		/* CP and shaders */
2764 		if (i == 0) {
2765 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2766 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2767 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2768 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2769 			WREG32(mmSH_MEM_CONFIG, tmp);
2770 		} else {
2771 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2772 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2773 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2774 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2775 			WREG32(mmSH_MEM_CONFIG, tmp);
2776 		}
2777 
2778 		WREG32(mmSH_MEM_APE1_BASE, 1);
2779 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2780 		WREG32(mmSH_MEM_BASES, 0);
2781 	}
2782 	vi_srbm_select(adev, 0, 0, 0, 0);
2783 	mutex_unlock(&adev->srbm_mutex);
2784 
2785 	gfx_v8_0_init_compute_vmid(adev);
2786 
2787 	mutex_lock(&adev->grbm_idx_mutex);
2788 	/*
2789 	 * making sure that the following register writes will be broadcasted
2790 	 * to all the shaders
2791 	 */
2792 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2793 
2794 	WREG32(mmPA_SC_FIFO_SIZE,
2795 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2796 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2797 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2798 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2799 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2800 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2801 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2802 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2803 	mutex_unlock(&adev->grbm_idx_mutex);
2804 
2805 }
2806 
2807 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2808 {
2809 	u32 i, j, k;
2810 	u32 mask;
2811 
2812 	mutex_lock(&adev->grbm_idx_mutex);
2813 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2814 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2815 			gfx_v8_0_select_se_sh(adev, i, j);
2816 			for (k = 0; k < adev->usec_timeout; k++) {
2817 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2818 					break;
2819 				udelay(1);
2820 			}
2821 		}
2822 	}
2823 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2824 	mutex_unlock(&adev->grbm_idx_mutex);
2825 
2826 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2827 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2828 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2829 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2830 	for (k = 0; k < adev->usec_timeout; k++) {
2831 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2832 			break;
2833 		udelay(1);
2834 	}
2835 }
2836 
2837 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2838 					       bool enable)
2839 {
2840 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2841 
2842 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2843 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2844 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2845 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2846 
2847 	WREG32(mmCP_INT_CNTL_RING0, tmp);
2848 }
2849 
2850 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2851 {
2852 	u32 tmp = RREG32(mmRLC_CNTL);
2853 
2854 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2855 	WREG32(mmRLC_CNTL, tmp);
2856 
2857 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2858 
2859 	gfx_v8_0_wait_for_rlc_serdes(adev);
2860 }
2861 
2862 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2863 {
2864 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2865 
2866 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2867 	WREG32(mmGRBM_SOFT_RESET, tmp);
2868 	udelay(50);
2869 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2870 	WREG32(mmGRBM_SOFT_RESET, tmp);
2871 	udelay(50);
2872 }
2873 
2874 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2875 {
2876 	u32 tmp = RREG32(mmRLC_CNTL);
2877 
2878 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2879 	WREG32(mmRLC_CNTL, tmp);
2880 
2881 	/* carrizo do enable cp interrupt after cp inited */
2882 	if (!(adev->flags & AMD_IS_APU))
2883 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2884 
2885 	udelay(50);
2886 }
2887 
2888 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2889 {
2890 	const struct rlc_firmware_header_v2_0 *hdr;
2891 	const __le32 *fw_data;
2892 	unsigned i, fw_size;
2893 
2894 	if (!adev->gfx.rlc_fw)
2895 		return -EINVAL;
2896 
2897 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2898 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2899 
2900 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2901 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2902 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2903 
2904 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2905 	for (i = 0; i < fw_size; i++)
2906 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2907 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2908 
2909 	return 0;
2910 }
2911 
2912 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2913 {
2914 	int r;
2915 
2916 	gfx_v8_0_rlc_stop(adev);
2917 
2918 	/* disable CG */
2919 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2920 
2921 	/* disable PG */
2922 	WREG32(mmRLC_PG_CNTL, 0);
2923 
2924 	gfx_v8_0_rlc_reset(adev);
2925 
2926 	if (!adev->pp_enabled) {
2927 		if (!adev->firmware.smu_load) {
2928 			/* legacy rlc firmware loading */
2929 			r = gfx_v8_0_rlc_load_microcode(adev);
2930 			if (r)
2931 				return r;
2932 		} else {
2933 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2934 							AMDGPU_UCODE_ID_RLC_G);
2935 			if (r)
2936 				return -EINVAL;
2937 		}
2938 	}
2939 
2940 	gfx_v8_0_rlc_start(adev);
2941 
2942 	return 0;
2943 }
2944 
2945 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2946 {
2947 	int i;
2948 	u32 tmp = RREG32(mmCP_ME_CNTL);
2949 
2950 	if (enable) {
2951 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2952 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2953 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2954 	} else {
2955 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2956 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2957 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2958 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2959 			adev->gfx.gfx_ring[i].ready = false;
2960 	}
2961 	WREG32(mmCP_ME_CNTL, tmp);
2962 	udelay(50);
2963 }
2964 
2965 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2966 {
2967 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2969 	const struct gfx_firmware_header_v1_0 *me_hdr;
2970 	const __le32 *fw_data;
2971 	unsigned i, fw_size;
2972 
2973 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2974 		return -EINVAL;
2975 
2976 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977 		adev->gfx.pfp_fw->data;
2978 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979 		adev->gfx.ce_fw->data;
2980 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981 		adev->gfx.me_fw->data;
2982 
2983 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2986 
2987 	gfx_v8_0_cp_gfx_enable(adev, false);
2988 
2989 	/* PFP */
2990 	fw_data = (const __le32 *)
2991 		(adev->gfx.pfp_fw->data +
2992 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2995 	for (i = 0; i < fw_size; i++)
2996 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2998 
2999 	/* CE */
3000 	fw_data = (const __le32 *)
3001 		(adev->gfx.ce_fw->data +
3002 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3005 	for (i = 0; i < fw_size; i++)
3006 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3008 
3009 	/* ME */
3010 	fw_data = (const __le32 *)
3011 		(adev->gfx.me_fw->data +
3012 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014 	WREG32(mmCP_ME_RAM_WADDR, 0);
3015 	for (i = 0; i < fw_size; i++)
3016 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3018 
3019 	return 0;
3020 }
3021 
3022 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3023 {
3024 	u32 count = 0;
3025 	const struct cs_section_def *sect = NULL;
3026 	const struct cs_extent_def *ext = NULL;
3027 
3028 	/* begin clear state */
3029 	count += 2;
3030 	/* context control state */
3031 	count += 3;
3032 
3033 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3034 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3035 			if (sect->id == SECT_CONTEXT)
3036 				count += 2 + ext->reg_count;
3037 			else
3038 				return 0;
3039 		}
3040 	}
3041 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3042 	count += 4;
3043 	/* end clear state */
3044 	count += 2;
3045 	/* clear state */
3046 	count += 2;
3047 
3048 	return count;
3049 }
3050 
3051 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3052 {
3053 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3054 	const struct cs_section_def *sect = NULL;
3055 	const struct cs_extent_def *ext = NULL;
3056 	int r, i;
3057 
3058 	/* init the CP */
3059 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3060 	WREG32(mmCP_ENDIAN_SWAP, 0);
3061 	WREG32(mmCP_DEVICE_ID, 1);
3062 
3063 	gfx_v8_0_cp_gfx_enable(adev, true);
3064 
3065 	r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3066 	if (r) {
3067 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068 		return r;
3069 	}
3070 
3071 	/* clear state buffer */
3072 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3074 
3075 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3076 	amdgpu_ring_write(ring, 0x80000000);
3077 	amdgpu_ring_write(ring, 0x80000000);
3078 
3079 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3080 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3081 			if (sect->id == SECT_CONTEXT) {
3082 				amdgpu_ring_write(ring,
3083 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3084 					       ext->reg_count));
3085 				amdgpu_ring_write(ring,
3086 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3087 				for (i = 0; i < ext->reg_count; i++)
3088 					amdgpu_ring_write(ring, ext->extent[i]);
3089 			}
3090 		}
3091 	}
3092 
3093 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3094 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3095 	switch (adev->asic_type) {
3096 	case CHIP_TONGA:
3097 		amdgpu_ring_write(ring, 0x16000012);
3098 		amdgpu_ring_write(ring, 0x0000002A);
3099 		break;
3100 	case CHIP_FIJI:
3101 		amdgpu_ring_write(ring, 0x3a00161a);
3102 		amdgpu_ring_write(ring, 0x0000002e);
3103 		break;
3104 	case CHIP_TOPAZ:
3105 	case CHIP_CARRIZO:
3106 		amdgpu_ring_write(ring, 0x00000002);
3107 		amdgpu_ring_write(ring, 0x00000000);
3108 		break;
3109 	case CHIP_STONEY:
3110 		amdgpu_ring_write(ring, 0x00000000);
3111 		amdgpu_ring_write(ring, 0x00000000);
3112 		break;
3113 	default:
3114 		BUG();
3115 	}
3116 
3117 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3118 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3119 
3120 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3121 	amdgpu_ring_write(ring, 0);
3122 
3123 	/* init the CE partitions */
3124 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3125 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3126 	amdgpu_ring_write(ring, 0x8000);
3127 	amdgpu_ring_write(ring, 0x8000);
3128 
3129 	amdgpu_ring_unlock_commit(ring);
3130 
3131 	return 0;
3132 }
3133 
3134 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3135 {
3136 	struct amdgpu_ring *ring;
3137 	u32 tmp;
3138 	u32 rb_bufsz;
3139 	u64 rb_addr, rptr_addr;
3140 	int r;
3141 
3142 	/* Set the write pointer delay */
3143 	WREG32(mmCP_RB_WPTR_DELAY, 0);
3144 
3145 	/* set the RB to use vmid 0 */
3146 	WREG32(mmCP_RB_VMID, 0);
3147 
3148 	/* Set ring buffer size */
3149 	ring = &adev->gfx.gfx_ring[0];
3150 	rb_bufsz = order_base_2(ring->ring_size / 8);
3151 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3152 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3153 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3154 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3155 #ifdef __BIG_ENDIAN
3156 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3157 #endif
3158 	WREG32(mmCP_RB0_CNTL, tmp);
3159 
3160 	/* Initialize the ring buffer's read and write pointers */
3161 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3162 	ring->wptr = 0;
3163 	WREG32(mmCP_RB0_WPTR, ring->wptr);
3164 
3165 	/* set the wb address wether it's enabled or not */
3166 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3167 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3168 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3169 
3170 	mdelay(1);
3171 	WREG32(mmCP_RB0_CNTL, tmp);
3172 
3173 	rb_addr = ring->gpu_addr >> 8;
3174 	WREG32(mmCP_RB0_BASE, rb_addr);
3175 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3176 
3177 	/* no gfx doorbells on iceland */
3178 	if (adev->asic_type != CHIP_TOPAZ) {
3179 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3180 		if (ring->use_doorbell) {
3181 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182 					    DOORBELL_OFFSET, ring->doorbell_index);
3183 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3184 					    DOORBELL_EN, 1);
3185 		} else {
3186 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3187 					    DOORBELL_EN, 0);
3188 		}
3189 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3190 
3191 		if (adev->asic_type == CHIP_TONGA) {
3192 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3193 					    DOORBELL_RANGE_LOWER,
3194 					    AMDGPU_DOORBELL_GFX_RING0);
3195 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3196 
3197 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3198 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3199 		}
3200 
3201 	}
3202 
3203 	/* start the ring */
3204 	gfx_v8_0_cp_gfx_start(adev);
3205 	ring->ready = true;
3206 	r = amdgpu_ring_test_ring(ring);
3207 	if (r) {
3208 		ring->ready = false;
3209 		return r;
3210 	}
3211 
3212 	return 0;
3213 }
3214 
3215 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3216 {
3217 	int i;
3218 
3219 	if (enable) {
3220 		WREG32(mmCP_MEC_CNTL, 0);
3221 	} else {
3222 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3223 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3224 			adev->gfx.compute_ring[i].ready = false;
3225 	}
3226 	udelay(50);
3227 }
3228 
3229 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3230 {
3231 	gfx_v8_0_cp_compute_enable(adev, true);
3232 
3233 	return 0;
3234 }
3235 
3236 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3237 {
3238 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3239 	const __le32 *fw_data;
3240 	unsigned i, fw_size;
3241 
3242 	if (!adev->gfx.mec_fw)
3243 		return -EINVAL;
3244 
3245 	gfx_v8_0_cp_compute_enable(adev, false);
3246 
3247 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3248 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3249 
3250 	fw_data = (const __le32 *)
3251 		(adev->gfx.mec_fw->data +
3252 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3253 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3254 
3255 	/* MEC1 */
3256 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3257 	for (i = 0; i < fw_size; i++)
3258 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3259 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3260 
3261 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3262 	if (adev->gfx.mec2_fw) {
3263 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3264 
3265 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3266 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3267 
3268 		fw_data = (const __le32 *)
3269 			(adev->gfx.mec2_fw->data +
3270 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3271 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3272 
3273 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3274 		for (i = 0; i < fw_size; i++)
3275 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3276 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3277 	}
3278 
3279 	return 0;
3280 }
3281 
3282 struct vi_mqd {
3283 	uint32_t header;  /* ordinal0 */
3284 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3285 	uint32_t compute_dim_x;  /* ordinal2 */
3286 	uint32_t compute_dim_y;  /* ordinal3 */
3287 	uint32_t compute_dim_z;  /* ordinal4 */
3288 	uint32_t compute_start_x;  /* ordinal5 */
3289 	uint32_t compute_start_y;  /* ordinal6 */
3290 	uint32_t compute_start_z;  /* ordinal7 */
3291 	uint32_t compute_num_thread_x;  /* ordinal8 */
3292 	uint32_t compute_num_thread_y;  /* ordinal9 */
3293 	uint32_t compute_num_thread_z;  /* ordinal10 */
3294 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3295 	uint32_t compute_perfcount_enable;  /* ordinal12 */
3296 	uint32_t compute_pgm_lo;  /* ordinal13 */
3297 	uint32_t compute_pgm_hi;  /* ordinal14 */
3298 	uint32_t compute_tba_lo;  /* ordinal15 */
3299 	uint32_t compute_tba_hi;  /* ordinal16 */
3300 	uint32_t compute_tma_lo;  /* ordinal17 */
3301 	uint32_t compute_tma_hi;  /* ordinal18 */
3302 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3303 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3304 	uint32_t compute_vmid;  /* ordinal21 */
3305 	uint32_t compute_resource_limits;  /* ordinal22 */
3306 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3307 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3308 	uint32_t compute_tmpring_size;  /* ordinal25 */
3309 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3310 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3311 	uint32_t compute_restart_x;  /* ordinal28 */
3312 	uint32_t compute_restart_y;  /* ordinal29 */
3313 	uint32_t compute_restart_z;  /* ordinal30 */
3314 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3315 	uint32_t compute_misc_reserved;  /* ordinal32 */
3316 	uint32_t compute_dispatch_id;  /* ordinal33 */
3317 	uint32_t compute_threadgroup_id;  /* ordinal34 */
3318 	uint32_t compute_relaunch;  /* ordinal35 */
3319 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3320 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3321 	uint32_t compute_wave_restore_control;  /* ordinal38 */
3322 	uint32_t reserved9;  /* ordinal39 */
3323 	uint32_t reserved10;  /* ordinal40 */
3324 	uint32_t reserved11;  /* ordinal41 */
3325 	uint32_t reserved12;  /* ordinal42 */
3326 	uint32_t reserved13;  /* ordinal43 */
3327 	uint32_t reserved14;  /* ordinal44 */
3328 	uint32_t reserved15;  /* ordinal45 */
3329 	uint32_t reserved16;  /* ordinal46 */
3330 	uint32_t reserved17;  /* ordinal47 */
3331 	uint32_t reserved18;  /* ordinal48 */
3332 	uint32_t reserved19;  /* ordinal49 */
3333 	uint32_t reserved20;  /* ordinal50 */
3334 	uint32_t reserved21;  /* ordinal51 */
3335 	uint32_t reserved22;  /* ordinal52 */
3336 	uint32_t reserved23;  /* ordinal53 */
3337 	uint32_t reserved24;  /* ordinal54 */
3338 	uint32_t reserved25;  /* ordinal55 */
3339 	uint32_t reserved26;  /* ordinal56 */
3340 	uint32_t reserved27;  /* ordinal57 */
3341 	uint32_t reserved28;  /* ordinal58 */
3342 	uint32_t reserved29;  /* ordinal59 */
3343 	uint32_t reserved30;  /* ordinal60 */
3344 	uint32_t reserved31;  /* ordinal61 */
3345 	uint32_t reserved32;  /* ordinal62 */
3346 	uint32_t reserved33;  /* ordinal63 */
3347 	uint32_t reserved34;  /* ordinal64 */
3348 	uint32_t compute_user_data_0;  /* ordinal65 */
3349 	uint32_t compute_user_data_1;  /* ordinal66 */
3350 	uint32_t compute_user_data_2;  /* ordinal67 */
3351 	uint32_t compute_user_data_3;  /* ordinal68 */
3352 	uint32_t compute_user_data_4;  /* ordinal69 */
3353 	uint32_t compute_user_data_5;  /* ordinal70 */
3354 	uint32_t compute_user_data_6;  /* ordinal71 */
3355 	uint32_t compute_user_data_7;  /* ordinal72 */
3356 	uint32_t compute_user_data_8;  /* ordinal73 */
3357 	uint32_t compute_user_data_9;  /* ordinal74 */
3358 	uint32_t compute_user_data_10;  /* ordinal75 */
3359 	uint32_t compute_user_data_11;  /* ordinal76 */
3360 	uint32_t compute_user_data_12;  /* ordinal77 */
3361 	uint32_t compute_user_data_13;  /* ordinal78 */
3362 	uint32_t compute_user_data_14;  /* ordinal79 */
3363 	uint32_t compute_user_data_15;  /* ordinal80 */
3364 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3365 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3366 	uint32_t reserved35;  /* ordinal83 */
3367 	uint32_t reserved36;  /* ordinal84 */
3368 	uint32_t reserved37;  /* ordinal85 */
3369 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3370 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3371 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3372 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3373 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3374 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3375 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3376 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3377 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3378 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3379 	uint32_t reserved38;  /* ordinal96 */
3380 	uint32_t reserved39;  /* ordinal97 */
3381 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3382 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3383 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3384 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3385 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3386 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3387 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3388 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3389 	uint32_t reserved40;  /* ordinal106 */
3390 	uint32_t reserved41;  /* ordinal107 */
3391 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3392 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3393 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3394 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3395 	uint32_t reserved42;  /* ordinal112 */
3396 	uint32_t reserved43;  /* ordinal113 */
3397 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3398 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3399 	uint32_t cp_packet_id_lo;  /* ordinal116 */
3400 	uint32_t cp_packet_id_hi;  /* ordinal117 */
3401 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3402 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3403 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3404 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3405 	uint32_t gds_save_mask_lo;  /* ordinal122 */
3406 	uint32_t gds_save_mask_hi;  /* ordinal123 */
3407 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3408 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3409 	uint32_t reserved44;  /* ordinal126 */
3410 	uint32_t reserved45;  /* ordinal127 */
3411 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3412 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3413 	uint32_t cp_hqd_active;  /* ordinal130 */
3414 	uint32_t cp_hqd_vmid;  /* ordinal131 */
3415 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3416 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3417 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3418 	uint32_t cp_hqd_quantum;  /* ordinal135 */
3419 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3420 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3421 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3422 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3423 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3424 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3425 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3426 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3427 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3428 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3429 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3430 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3431 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3432 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3433 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3434 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3435 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3436 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3437 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3438 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3439 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3440 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3441 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3442 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3443 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3444 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3445 	uint32_t cp_mqd_control;  /* ordinal162 */
3446 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3447 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3448 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3449 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3450 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3451 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3452 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3453 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3454 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3455 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3456 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3457 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3458 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3459 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3460 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3461 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3462 	uint32_t cp_hqd_error;  /* ordinal179 */
3463 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3464 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3465 	uint32_t reserved46;  /* ordinal182 */
3466 	uint32_t reserved47;  /* ordinal183 */
3467 	uint32_t reserved48;  /* ordinal184 */
3468 	uint32_t reserved49;  /* ordinal185 */
3469 	uint32_t reserved50;  /* ordinal186 */
3470 	uint32_t reserved51;  /* ordinal187 */
3471 	uint32_t reserved52;  /* ordinal188 */
3472 	uint32_t reserved53;  /* ordinal189 */
3473 	uint32_t reserved54;  /* ordinal190 */
3474 	uint32_t reserved55;  /* ordinal191 */
3475 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3476 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3477 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3478 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3479 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3480 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3481 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3482 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3483 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3484 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3485 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3486 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3487 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3488 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3489 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3490 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3491 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3492 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3493 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3494 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3495 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3496 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3497 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3498 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3499 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3500 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3501 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3502 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3503 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3504 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3505 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3506 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3507 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3508 	uint32_t reserved56;  /* ordinal225 */
3509 	uint32_t reserved57;  /* ordinal226 */
3510 	uint32_t reserved58;  /* ordinal227 */
3511 	uint32_t set_resources_header;  /* ordinal228 */
3512 	uint32_t set_resources_dw1;  /* ordinal229 */
3513 	uint32_t set_resources_dw2;  /* ordinal230 */
3514 	uint32_t set_resources_dw3;  /* ordinal231 */
3515 	uint32_t set_resources_dw4;  /* ordinal232 */
3516 	uint32_t set_resources_dw5;  /* ordinal233 */
3517 	uint32_t set_resources_dw6;  /* ordinal234 */
3518 	uint32_t set_resources_dw7;  /* ordinal235 */
3519 	uint32_t reserved59;  /* ordinal236 */
3520 	uint32_t reserved60;  /* ordinal237 */
3521 	uint32_t reserved61;  /* ordinal238 */
3522 	uint32_t reserved62;  /* ordinal239 */
3523 	uint32_t reserved63;  /* ordinal240 */
3524 	uint32_t reserved64;  /* ordinal241 */
3525 	uint32_t reserved65;  /* ordinal242 */
3526 	uint32_t reserved66;  /* ordinal243 */
3527 	uint32_t reserved67;  /* ordinal244 */
3528 	uint32_t reserved68;  /* ordinal245 */
3529 	uint32_t reserved69;  /* ordinal246 */
3530 	uint32_t reserved70;  /* ordinal247 */
3531 	uint32_t reserved71;  /* ordinal248 */
3532 	uint32_t reserved72;  /* ordinal249 */
3533 	uint32_t reserved73;  /* ordinal250 */
3534 	uint32_t reserved74;  /* ordinal251 */
3535 	uint32_t reserved75;  /* ordinal252 */
3536 	uint32_t reserved76;  /* ordinal253 */
3537 	uint32_t reserved77;  /* ordinal254 */
3538 	uint32_t reserved78;  /* ordinal255 */
3539 
3540 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3541 };
3542 
3543 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3544 {
3545 	int i, r;
3546 
3547 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3548 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3549 
3550 		if (ring->mqd_obj) {
3551 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3552 			if (unlikely(r != 0))
3553 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3554 
3555 			amdgpu_bo_unpin(ring->mqd_obj);
3556 			amdgpu_bo_unreserve(ring->mqd_obj);
3557 
3558 			amdgpu_bo_unref(&ring->mqd_obj);
3559 			ring->mqd_obj = NULL;
3560 		}
3561 	}
3562 }
3563 
3564 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3565 {
3566 	int r, i, j;
3567 	u32 tmp;
3568 	bool use_doorbell = true;
3569 	u64 hqd_gpu_addr;
3570 	u64 mqd_gpu_addr;
3571 	u64 eop_gpu_addr;
3572 	u64 wb_gpu_addr;
3573 	u32 *buf;
3574 	struct vi_mqd *mqd;
3575 
3576 	/* init the pipes */
3577 	mutex_lock(&adev->srbm_mutex);
3578 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3579 		int me = (i < 4) ? 1 : 2;
3580 		int pipe = (i < 4) ? i : (i - 4);
3581 
3582 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3583 		eop_gpu_addr >>= 8;
3584 
3585 		vi_srbm_select(adev, me, pipe, 0, 0);
3586 
3587 		/* write the EOP addr */
3588 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3589 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3590 
3591 		/* set the VMID assigned */
3592 		WREG32(mmCP_HQD_VMID, 0);
3593 
3594 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3595 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3596 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3597 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3598 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3599 	}
3600 	vi_srbm_select(adev, 0, 0, 0, 0);
3601 	mutex_unlock(&adev->srbm_mutex);
3602 
3603 	/* init the queues.  Just two for now. */
3604 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3605 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3606 
3607 		if (ring->mqd_obj == NULL) {
3608 			r = amdgpu_bo_create(adev,
3609 					     sizeof(struct vi_mqd),
3610 					     PAGE_SIZE, true,
3611 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3612 					     NULL, &ring->mqd_obj);
3613 			if (r) {
3614 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3615 				return r;
3616 			}
3617 		}
3618 
3619 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3620 		if (unlikely(r != 0)) {
3621 			gfx_v8_0_cp_compute_fini(adev);
3622 			return r;
3623 		}
3624 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3625 				  &mqd_gpu_addr);
3626 		if (r) {
3627 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3628 			gfx_v8_0_cp_compute_fini(adev);
3629 			return r;
3630 		}
3631 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3632 		if (r) {
3633 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3634 			gfx_v8_0_cp_compute_fini(adev);
3635 			return r;
3636 		}
3637 
3638 		/* init the mqd struct */
3639 		memset(buf, 0, sizeof(struct vi_mqd));
3640 
3641 		mqd = (struct vi_mqd *)buf;
3642 		mqd->header = 0xC0310800;
3643 		mqd->compute_pipelinestat_enable = 0x00000001;
3644 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3645 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3646 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3647 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3648 		mqd->compute_misc_reserved = 0x00000003;
3649 
3650 		mutex_lock(&adev->srbm_mutex);
3651 		vi_srbm_select(adev, ring->me,
3652 			       ring->pipe,
3653 			       ring->queue, 0);
3654 
3655 		/* disable wptr polling */
3656 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3657 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3658 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3659 
3660 		mqd->cp_hqd_eop_base_addr_lo =
3661 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3662 		mqd->cp_hqd_eop_base_addr_hi =
3663 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3664 
3665 		/* enable doorbell? */
3666 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3667 		if (use_doorbell) {
3668 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3669 		} else {
3670 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3671 		}
3672 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3673 		mqd->cp_hqd_pq_doorbell_control = tmp;
3674 
3675 		/* disable the queue if it's active */
3676 		mqd->cp_hqd_dequeue_request = 0;
3677 		mqd->cp_hqd_pq_rptr = 0;
3678 		mqd->cp_hqd_pq_wptr= 0;
3679 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3680 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3681 			for (j = 0; j < adev->usec_timeout; j++) {
3682 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3683 					break;
3684 				udelay(1);
3685 			}
3686 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3687 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3688 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3689 		}
3690 
3691 		/* set the pointer to the MQD */
3692 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3693 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3694 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3695 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3696 
3697 		/* set MQD vmid to 0 */
3698 		tmp = RREG32(mmCP_MQD_CONTROL);
3699 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3700 		WREG32(mmCP_MQD_CONTROL, tmp);
3701 		mqd->cp_mqd_control = tmp;
3702 
3703 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3704 		hqd_gpu_addr = ring->gpu_addr >> 8;
3705 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3706 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3707 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3708 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3709 
3710 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3711 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3712 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3713 				    (order_base_2(ring->ring_size / 4) - 1));
3714 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3715 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3716 #ifdef __BIG_ENDIAN
3717 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3718 #endif
3719 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3720 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3721 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3722 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3723 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3724 		mqd->cp_hqd_pq_control = tmp;
3725 
3726 		/* set the wb address wether it's enabled or not */
3727 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3728 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3729 		mqd->cp_hqd_pq_rptr_report_addr_hi =
3730 			upper_32_bits(wb_gpu_addr) & 0xffff;
3731 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3732 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3733 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3734 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3735 
3736 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3737 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3738 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3739 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3740 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3741 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3742 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3743 
3744 		/* enable the doorbell if requested */
3745 		if (use_doorbell) {
3746 			if ((adev->asic_type == CHIP_CARRIZO) ||
3747 			    (adev->asic_type == CHIP_FIJI) ||
3748 			    (adev->asic_type == CHIP_STONEY)) {
3749 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3750 				       AMDGPU_DOORBELL_KIQ << 2);
3751 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3752 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3753 			}
3754 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3755 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3756 					    DOORBELL_OFFSET, ring->doorbell_index);
3757 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3758 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3759 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3760 			mqd->cp_hqd_pq_doorbell_control = tmp;
3761 
3762 		} else {
3763 			mqd->cp_hqd_pq_doorbell_control = 0;
3764 		}
3765 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3766 		       mqd->cp_hqd_pq_doorbell_control);
3767 
3768 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3769 		ring->wptr = 0;
3770 		mqd->cp_hqd_pq_wptr = ring->wptr;
3771 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3772 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3773 
3774 		/* set the vmid for the queue */
3775 		mqd->cp_hqd_vmid = 0;
3776 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3777 
3778 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3779 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3780 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3781 		mqd->cp_hqd_persistent_state = tmp;
3782 		if (adev->asic_type == CHIP_STONEY) {
3783 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3784 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3785 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3786 		}
3787 
3788 		/* activate the queue */
3789 		mqd->cp_hqd_active = 1;
3790 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3791 
3792 		vi_srbm_select(adev, 0, 0, 0, 0);
3793 		mutex_unlock(&adev->srbm_mutex);
3794 
3795 		amdgpu_bo_kunmap(ring->mqd_obj);
3796 		amdgpu_bo_unreserve(ring->mqd_obj);
3797 	}
3798 
3799 	if (use_doorbell) {
3800 		tmp = RREG32(mmCP_PQ_STATUS);
3801 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3802 		WREG32(mmCP_PQ_STATUS, tmp);
3803 	}
3804 
3805 	r = gfx_v8_0_cp_compute_start(adev);
3806 	if (r)
3807 		return r;
3808 
3809 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3810 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3811 
3812 		ring->ready = true;
3813 		r = amdgpu_ring_test_ring(ring);
3814 		if (r)
3815 			ring->ready = false;
3816 	}
3817 
3818 	return 0;
3819 }
3820 
3821 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3822 {
3823 	int r;
3824 
3825 	if (!(adev->flags & AMD_IS_APU))
3826 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3827 
3828 	if (!adev->pp_enabled) {
3829 		if (!adev->firmware.smu_load) {
3830 			/* legacy firmware loading */
3831 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
3832 			if (r)
3833 				return r;
3834 
3835 			r = gfx_v8_0_cp_compute_load_microcode(adev);
3836 			if (r)
3837 				return r;
3838 		} else {
3839 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3840 							AMDGPU_UCODE_ID_CP_CE);
3841 			if (r)
3842 				return -EINVAL;
3843 
3844 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3845 							AMDGPU_UCODE_ID_CP_PFP);
3846 			if (r)
3847 				return -EINVAL;
3848 
3849 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3850 							AMDGPU_UCODE_ID_CP_ME);
3851 			if (r)
3852 				return -EINVAL;
3853 
3854 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3855 							AMDGPU_UCODE_ID_CP_MEC1);
3856 			if (r)
3857 				return -EINVAL;
3858 		}
3859 	}
3860 
3861 	r = gfx_v8_0_cp_gfx_resume(adev);
3862 	if (r)
3863 		return r;
3864 
3865 	r = gfx_v8_0_cp_compute_resume(adev);
3866 	if (r)
3867 		return r;
3868 
3869 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3870 
3871 	return 0;
3872 }
3873 
3874 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3875 {
3876 	gfx_v8_0_cp_gfx_enable(adev, enable);
3877 	gfx_v8_0_cp_compute_enable(adev, enable);
3878 }
3879 
3880 static int gfx_v8_0_hw_init(void *handle)
3881 {
3882 	int r;
3883 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3884 
3885 	gfx_v8_0_init_golden_registers(adev);
3886 
3887 	gfx_v8_0_gpu_init(adev);
3888 
3889 	r = gfx_v8_0_rlc_resume(adev);
3890 	if (r)
3891 		return r;
3892 
3893 	r = gfx_v8_0_cp_resume(adev);
3894 	if (r)
3895 		return r;
3896 
3897 	return r;
3898 }
3899 
3900 static int gfx_v8_0_hw_fini(void *handle)
3901 {
3902 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3903 
3904 	gfx_v8_0_cp_enable(adev, false);
3905 	gfx_v8_0_rlc_stop(adev);
3906 	gfx_v8_0_cp_compute_fini(adev);
3907 
3908 	return 0;
3909 }
3910 
3911 static int gfx_v8_0_suspend(void *handle)
3912 {
3913 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3914 
3915 	return gfx_v8_0_hw_fini(adev);
3916 }
3917 
3918 static int gfx_v8_0_resume(void *handle)
3919 {
3920 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3921 
3922 	return gfx_v8_0_hw_init(adev);
3923 }
3924 
3925 static bool gfx_v8_0_is_idle(void *handle)
3926 {
3927 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3928 
3929 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3930 		return false;
3931 	else
3932 		return true;
3933 }
3934 
3935 static int gfx_v8_0_wait_for_idle(void *handle)
3936 {
3937 	unsigned i;
3938 	u32 tmp;
3939 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3940 
3941 	for (i = 0; i < adev->usec_timeout; i++) {
3942 		/* read MC_STATUS */
3943 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3944 
3945 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3946 			return 0;
3947 		udelay(1);
3948 	}
3949 	return -ETIMEDOUT;
3950 }
3951 
3952 static void gfx_v8_0_print_status(void *handle)
3953 {
3954 	int i;
3955 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3956 
3957 	dev_info(adev->dev, "GFX 8.x registers\n");
3958 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3959 		 RREG32(mmGRBM_STATUS));
3960 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3961 		 RREG32(mmGRBM_STATUS2));
3962 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3963 		 RREG32(mmGRBM_STATUS_SE0));
3964 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3965 		 RREG32(mmGRBM_STATUS_SE1));
3966 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3967 		 RREG32(mmGRBM_STATUS_SE2));
3968 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3969 		 RREG32(mmGRBM_STATUS_SE3));
3970 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3971 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3972 		 RREG32(mmCP_STALLED_STAT1));
3973 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3974 		 RREG32(mmCP_STALLED_STAT2));
3975 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3976 		 RREG32(mmCP_STALLED_STAT3));
3977 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3978 		 RREG32(mmCP_CPF_BUSY_STAT));
3979 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3980 		 RREG32(mmCP_CPF_STALLED_STAT1));
3981 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3982 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3983 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3984 		 RREG32(mmCP_CPC_STALLED_STAT1));
3985 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3986 
3987 	for (i = 0; i < 32; i++) {
3988 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3989 			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3990 	}
3991 	for (i = 0; i < 16; i++) {
3992 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3993 			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3994 	}
3995 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3996 		dev_info(adev->dev, "  se: %d\n", i);
3997 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3998 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3999 			 RREG32(mmPA_SC_RASTER_CONFIG));
4000 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
4001 			 RREG32(mmPA_SC_RASTER_CONFIG_1));
4002 	}
4003 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4004 
4005 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
4006 		 RREG32(mmGB_ADDR_CONFIG));
4007 	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
4008 		 RREG32(mmHDP_ADDR_CONFIG));
4009 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
4010 		 RREG32(mmDMIF_ADDR_CALC));
4011 	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
4012 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4013 	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
4014 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4015 	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4016 		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4017 	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4018 		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4019 	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4020 		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4021 
4022 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
4023 		 RREG32(mmCP_MEQ_THRESHOLDS));
4024 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
4025 		 RREG32(mmSX_DEBUG_1));
4026 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
4027 		 RREG32(mmTA_CNTL_AUX));
4028 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
4029 		 RREG32(mmSPI_CONFIG_CNTL));
4030 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
4031 		 RREG32(mmSQ_CONFIG));
4032 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
4033 		 RREG32(mmDB_DEBUG));
4034 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
4035 		 RREG32(mmDB_DEBUG2));
4036 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
4037 		 RREG32(mmDB_DEBUG3));
4038 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
4039 		 RREG32(mmCB_HW_CONTROL));
4040 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
4041 		 RREG32(mmSPI_CONFIG_CNTL_1));
4042 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
4043 		 RREG32(mmPA_SC_FIFO_SIZE));
4044 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
4045 		 RREG32(mmVGT_NUM_INSTANCES));
4046 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
4047 		 RREG32(mmCP_PERFMON_CNTL));
4048 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4049 		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4050 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
4051 		 RREG32(mmVGT_CACHE_INVALIDATION));
4052 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
4053 		 RREG32(mmVGT_GS_VERTEX_REUSE));
4054 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4055 		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4056 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
4057 		 RREG32(mmPA_CL_ENHANCE));
4058 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
4059 		 RREG32(mmPA_SC_ENHANCE));
4060 
4061 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
4062 		 RREG32(mmCP_ME_CNTL));
4063 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
4064 		 RREG32(mmCP_MAX_CONTEXT));
4065 	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
4066 		 RREG32(mmCP_ENDIAN_SWAP));
4067 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
4068 		 RREG32(mmCP_DEVICE_ID));
4069 
4070 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4071 		 RREG32(mmCP_SEM_WAIT_TIMER));
4072 
4073 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4074 		 RREG32(mmCP_RB_WPTR_DELAY));
4075 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4076 		 RREG32(mmCP_RB_VMID));
4077 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4078 		 RREG32(mmCP_RB0_CNTL));
4079 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4080 		 RREG32(mmCP_RB0_WPTR));
4081 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4082 		 RREG32(mmCP_RB0_RPTR_ADDR));
4083 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4084 		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4085 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4086 		 RREG32(mmCP_RB0_CNTL));
4087 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4088 		 RREG32(mmCP_RB0_BASE));
4089 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4090 		 RREG32(mmCP_RB0_BASE_HI));
4091 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4092 		 RREG32(mmCP_MEC_CNTL));
4093 	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4094 		 RREG32(mmCP_CPF_DEBUG));
4095 
4096 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4097 		 RREG32(mmSCRATCH_ADDR));
4098 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4099 		 RREG32(mmSCRATCH_UMSK));
4100 
4101 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4102 		 RREG32(mmCP_INT_CNTL_RING0));
4103 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4104 		 RREG32(mmRLC_LB_CNTL));
4105 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4106 		 RREG32(mmRLC_CNTL));
4107 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4108 		 RREG32(mmRLC_CGCG_CGLS_CTRL));
4109 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4110 		 RREG32(mmRLC_LB_CNTR_INIT));
4111 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4112 		 RREG32(mmRLC_LB_CNTR_MAX));
4113 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4114 		 RREG32(mmRLC_LB_INIT_CU_MASK));
4115 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4116 		 RREG32(mmRLC_LB_PARAMS));
4117 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4118 		 RREG32(mmRLC_LB_CNTL));
4119 	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4120 		 RREG32(mmRLC_MC_CNTL));
4121 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4122 		 RREG32(mmRLC_UCODE_CNTL));
4123 
4124 	mutex_lock(&adev->srbm_mutex);
4125 	for (i = 0; i < 16; i++) {
4126 		vi_srbm_select(adev, 0, 0, 0, i);
4127 		dev_info(adev->dev, "  VM %d:\n", i);
4128 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4129 			 RREG32(mmSH_MEM_CONFIG));
4130 		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4131 			 RREG32(mmSH_MEM_APE1_BASE));
4132 		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4133 			 RREG32(mmSH_MEM_APE1_LIMIT));
4134 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4135 			 RREG32(mmSH_MEM_BASES));
4136 	}
4137 	vi_srbm_select(adev, 0, 0, 0, 0);
4138 	mutex_unlock(&adev->srbm_mutex);
4139 }
4140 
4141 static int gfx_v8_0_soft_reset(void *handle)
4142 {
4143 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4144 	u32 tmp;
4145 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4146 
4147 	/* GRBM_STATUS */
4148 	tmp = RREG32(mmGRBM_STATUS);
4149 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4150 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4151 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4152 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4153 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4154 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4155 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4156 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4157 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4158 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4159 	}
4160 
4161 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4162 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4163 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4164 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4165 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4166 	}
4167 
4168 	/* GRBM_STATUS2 */
4169 	tmp = RREG32(mmGRBM_STATUS2);
4170 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4171 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4172 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4173 
4174 	/* SRBM_STATUS */
4175 	tmp = RREG32(mmSRBM_STATUS);
4176 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4177 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4178 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4179 
4180 	if (grbm_soft_reset || srbm_soft_reset) {
4181 		gfx_v8_0_print_status((void *)adev);
4182 		/* stop the rlc */
4183 		gfx_v8_0_rlc_stop(adev);
4184 
4185 		/* Disable GFX parsing/prefetching */
4186 		gfx_v8_0_cp_gfx_enable(adev, false);
4187 
4188 		/* Disable MEC parsing/prefetching */
4189 		/* XXX todo */
4190 
4191 		if (grbm_soft_reset) {
4192 			tmp = RREG32(mmGRBM_SOFT_RESET);
4193 			tmp |= grbm_soft_reset;
4194 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4195 			WREG32(mmGRBM_SOFT_RESET, tmp);
4196 			tmp = RREG32(mmGRBM_SOFT_RESET);
4197 
4198 			udelay(50);
4199 
4200 			tmp &= ~grbm_soft_reset;
4201 			WREG32(mmGRBM_SOFT_RESET, tmp);
4202 			tmp = RREG32(mmGRBM_SOFT_RESET);
4203 		}
4204 
4205 		if (srbm_soft_reset) {
4206 			tmp = RREG32(mmSRBM_SOFT_RESET);
4207 			tmp |= srbm_soft_reset;
4208 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4209 			WREG32(mmSRBM_SOFT_RESET, tmp);
4210 			tmp = RREG32(mmSRBM_SOFT_RESET);
4211 
4212 			udelay(50);
4213 
4214 			tmp &= ~srbm_soft_reset;
4215 			WREG32(mmSRBM_SOFT_RESET, tmp);
4216 			tmp = RREG32(mmSRBM_SOFT_RESET);
4217 		}
4218 		/* Wait a little for things to settle down */
4219 		udelay(50);
4220 		gfx_v8_0_print_status((void *)adev);
4221 	}
4222 	return 0;
4223 }
4224 
4225 /**
4226  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4227  *
4228  * @adev: amdgpu_device pointer
4229  *
4230  * Fetches a GPU clock counter snapshot.
4231  * Returns the 64 bit clock counter snapshot.
4232  */
4233 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4234 {
4235 	uint64_t clock;
4236 
4237 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4238 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4239 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4240 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4241 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4242 	return clock;
4243 }
4244 
4245 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4246 					  uint32_t vmid,
4247 					  uint32_t gds_base, uint32_t gds_size,
4248 					  uint32_t gws_base, uint32_t gws_size,
4249 					  uint32_t oa_base, uint32_t oa_size)
4250 {
4251 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4252 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4253 
4254 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4255 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4256 
4257 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4258 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4259 
4260 	/* GDS Base */
4261 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4262 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4263 				WRITE_DATA_DST_SEL(0)));
4264 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4265 	amdgpu_ring_write(ring, 0);
4266 	amdgpu_ring_write(ring, gds_base);
4267 
4268 	/* GDS Size */
4269 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4270 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4271 				WRITE_DATA_DST_SEL(0)));
4272 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4273 	amdgpu_ring_write(ring, 0);
4274 	amdgpu_ring_write(ring, gds_size);
4275 
4276 	/* GWS */
4277 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4278 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4279 				WRITE_DATA_DST_SEL(0)));
4280 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4281 	amdgpu_ring_write(ring, 0);
4282 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4283 
4284 	/* OA */
4285 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4286 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4287 				WRITE_DATA_DST_SEL(0)));
4288 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4289 	amdgpu_ring_write(ring, 0);
4290 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4291 }
4292 
4293 static int gfx_v8_0_early_init(void *handle)
4294 {
4295 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4296 
4297 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4298 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4299 	gfx_v8_0_set_ring_funcs(adev);
4300 	gfx_v8_0_set_irq_funcs(adev);
4301 	gfx_v8_0_set_gds_init(adev);
4302 
4303 	return 0;
4304 }
4305 
4306 static int gfx_v8_0_late_init(void *handle)
4307 {
4308 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4309 	int r;
4310 
4311 	/* requires IBs so do in late init after IB pool is initialized */
4312 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4313 	if (r)
4314 		return r;
4315 
4316 	return 0;
4317 }
4318 
4319 static int gfx_v8_0_set_powergating_state(void *handle,
4320 					  enum amd_powergating_state state)
4321 {
4322 	return 0;
4323 }
4324 
4325 static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4326 		uint32_t reg_addr, uint32_t cmd)
4327 {
4328 	uint32_t data;
4329 
4330 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4331 
4332 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4333 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4334 
4335 	data = RREG32(mmRLC_SERDES_WR_CTRL);
4336 	data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4337 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4338 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4339 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4340 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4341 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4342 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4343 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4344 			RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4345 			RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4346 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4347 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4348 			(cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4349 			(reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4350 			(0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4351 
4352 	WREG32(mmRLC_SERDES_WR_CTRL, data);
4353 }
4354 
4355 static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4356 		bool enable)
4357 {
4358 	uint32_t temp, data;
4359 
4360 	/* It is disabled by HW by default */
4361 	if (enable) {
4362 		/* 1 - RLC memory Light sleep */
4363 		temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4364 		data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4365 		if (temp != data)
4366 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4367 
4368 		/* 2 - CP memory Light sleep */
4369 		temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4370 		data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4371 		if (temp != data)
4372 			WREG32(mmCP_MEM_SLP_CNTL, data);
4373 
4374 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
4375 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4376 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4377 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4378 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4379 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4380 
4381 		if (temp != data)
4382 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4383 
4384 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4385 		gfx_v8_0_wait_for_rlc_serdes(adev);
4386 
4387 		/* 5 - clear mgcg override */
4388 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4389 
4390 		/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4391 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4392 		data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4393 		data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4394 		data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4395 		data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4396 		data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4397 		data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4398 		data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4399 		if (temp != data)
4400 			WREG32(mmCGTS_SM_CTRL_REG, data);
4401 		udelay(50);
4402 
4403 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4404 		gfx_v8_0_wait_for_rlc_serdes(adev);
4405 	} else {
4406 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4407 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4408 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4409 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4410 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4411 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4412 		if (temp != data)
4413 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4414 
4415 		/* 2 - disable MGLS in RLC */
4416 		data = RREG32(mmRLC_MEM_SLP_CNTL);
4417 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4418 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4419 			WREG32(mmRLC_MEM_SLP_CNTL, data);
4420 		}
4421 
4422 		/* 3 - disable MGLS in CP */
4423 		data = RREG32(mmCP_MEM_SLP_CNTL);
4424 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4425 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4426 			WREG32(mmCP_MEM_SLP_CNTL, data);
4427 		}
4428 
4429 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4430 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4431 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4432 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4433 		if (temp != data)
4434 			WREG32(mmCGTS_SM_CTRL_REG, data);
4435 
4436 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4437 		gfx_v8_0_wait_for_rlc_serdes(adev);
4438 
4439 		/* 6 - set mgcg override */
4440 		fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4441 
4442 		udelay(50);
4443 
4444 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4445 		gfx_v8_0_wait_for_rlc_serdes(adev);
4446 	}
4447 }
4448 
4449 static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4450 		bool enable)
4451 {
4452 	uint32_t temp, temp1, data, data1;
4453 
4454 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4455 
4456 	if (enable) {
4457 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4458 		 * Cmp_busy/GFX_Idle interrupts
4459 		 */
4460 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4461 
4462 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4463 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4464 		if (temp1 != data1)
4465 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4466 
4467 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4468 		gfx_v8_0_wait_for_rlc_serdes(adev);
4469 
4470 		/* 3 - clear cgcg override */
4471 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4472 
4473 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4474 		gfx_v8_0_wait_for_rlc_serdes(adev);
4475 
4476 		/* 4 - write cmd to set CGLS */
4477 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4478 
4479 		/* 5 - enable cgcg */
4480 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4481 
4482 		/* enable cgls*/
4483 		data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4484 
4485 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4486 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4487 
4488 		if (temp1 != data1)
4489 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4490 
4491 		if (temp != data)
4492 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4493 	} else {
4494 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
4495 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4496 
4497 		/* TEST CGCG */
4498 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4499 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4500 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4501 		if (temp1 != data1)
4502 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4503 
4504 		/* read gfx register to wake up cgcg */
4505 		RREG32(mmCB_CGTT_SCLK_CTRL);
4506 		RREG32(mmCB_CGTT_SCLK_CTRL);
4507 		RREG32(mmCB_CGTT_SCLK_CTRL);
4508 		RREG32(mmCB_CGTT_SCLK_CTRL);
4509 
4510 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4511 		gfx_v8_0_wait_for_rlc_serdes(adev);
4512 
4513 		/* write cmd to Set CGCG Overrride */
4514 		fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4515 
4516 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4517 		gfx_v8_0_wait_for_rlc_serdes(adev);
4518 
4519 		/* write cmd to Clear CGLS */
4520 		fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4521 
4522 		/* disable cgcg, cgls should be disabled too. */
4523 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4524 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4525 		if (temp != data)
4526 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4527 	}
4528 }
4529 static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4530 		bool enable)
4531 {
4532 	if (enable) {
4533 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4534 		 * ===  MGCG + MGLS + TS(CG/LS) ===
4535 		 */
4536 		fiji_update_medium_grain_clock_gating(adev, enable);
4537 		fiji_update_coarse_grain_clock_gating(adev, enable);
4538 	} else {
4539 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4540 		 * ===  CGCG + CGLS ===
4541 		 */
4542 		fiji_update_coarse_grain_clock_gating(adev, enable);
4543 		fiji_update_medium_grain_clock_gating(adev, enable);
4544 	}
4545 	return 0;
4546 }
4547 
4548 static int gfx_v8_0_set_clockgating_state(void *handle,
4549 					  enum amd_clockgating_state state)
4550 {
4551 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4552 
4553 	switch (adev->asic_type) {
4554 	case CHIP_FIJI:
4555 		fiji_update_gfx_clock_gating(adev,
4556 				state == AMD_CG_STATE_GATE ? true : false);
4557 		break;
4558 	default:
4559 		break;
4560 	}
4561 	return 0;
4562 }
4563 
4564 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4565 {
4566 	u32 rptr;
4567 
4568 	rptr = ring->adev->wb.wb[ring->rptr_offs];
4569 
4570 	return rptr;
4571 }
4572 
4573 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4574 {
4575 	struct amdgpu_device *adev = ring->adev;
4576 	u32 wptr;
4577 
4578 	if (ring->use_doorbell)
4579 		/* XXX check if swapping is necessary on BE */
4580 		wptr = ring->adev->wb.wb[ring->wptr_offs];
4581 	else
4582 		wptr = RREG32(mmCP_RB0_WPTR);
4583 
4584 	return wptr;
4585 }
4586 
4587 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4588 {
4589 	struct amdgpu_device *adev = ring->adev;
4590 
4591 	if (ring->use_doorbell) {
4592 		/* XXX check if swapping is necessary on BE */
4593 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4594 		WDOORBELL32(ring->doorbell_index, ring->wptr);
4595 	} else {
4596 		WREG32(mmCP_RB0_WPTR, ring->wptr);
4597 		(void)RREG32(mmCP_RB0_WPTR);
4598 	}
4599 }
4600 
4601 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4602 {
4603 	u32 ref_and_mask, reg_mem_engine;
4604 
4605 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4606 		switch (ring->me) {
4607 		case 1:
4608 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4609 			break;
4610 		case 2:
4611 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4612 			break;
4613 		default:
4614 			return;
4615 		}
4616 		reg_mem_engine = 0;
4617 	} else {
4618 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4619 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4620 	}
4621 
4622 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4623 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4624 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4625 				 reg_mem_engine));
4626 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4627 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4628 	amdgpu_ring_write(ring, ref_and_mask);
4629 	amdgpu_ring_write(ring, ref_and_mask);
4630 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4631 }
4632 
4633 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4634 				  struct amdgpu_ib *ib)
4635 {
4636 	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4637 	u32 header, control = 0;
4638 	u32 next_rptr = ring->wptr + 5;
4639 
4640 	/* drop the CE preamble IB for the same context */
4641 	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4642 		return;
4643 
4644 	if (need_ctx_switch)
4645 		next_rptr += 2;
4646 
4647 	next_rptr += 4;
4648 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4649 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4650 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4651 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4652 	amdgpu_ring_write(ring, next_rptr);
4653 
4654 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4655 	if (need_ctx_switch) {
4656 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4657 		amdgpu_ring_write(ring, 0);
4658 	}
4659 
4660 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4661 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4662 	else
4663 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4664 
4665 	control |= ib->length_dw |
4666 		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4667 
4668 	amdgpu_ring_write(ring, header);
4669 	amdgpu_ring_write(ring,
4670 #ifdef __BIG_ENDIAN
4671 			  (2 << 0) |
4672 #endif
4673 			  (ib->gpu_addr & 0xFFFFFFFC));
4674 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4675 	amdgpu_ring_write(ring, control);
4676 }
4677 
4678 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4679 				  struct amdgpu_ib *ib)
4680 {
4681 	u32 header, control = 0;
4682 	u32 next_rptr = ring->wptr + 5;
4683 
4684 	control |= INDIRECT_BUFFER_VALID;
4685 
4686 	next_rptr += 4;
4687 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4688 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4689 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4690 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4691 	amdgpu_ring_write(ring, next_rptr);
4692 
4693 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4694 
4695 	control |= ib->length_dw |
4696 			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4697 
4698 	amdgpu_ring_write(ring, header);
4699 	amdgpu_ring_write(ring,
4700 #ifdef __BIG_ENDIAN
4701 					  (2 << 0) |
4702 #endif
4703 					  (ib->gpu_addr & 0xFFFFFFFC));
4704 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4705 	amdgpu_ring_write(ring, control);
4706 }
4707 
4708 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4709 					 u64 seq, unsigned flags)
4710 {
4711 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4712 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4713 
4714 	/* EVENT_WRITE_EOP - flush caches, send int */
4715 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4716 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4717 				 EOP_TC_ACTION_EN |
4718 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4719 				 EVENT_INDEX(5)));
4720 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4721 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4722 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4723 	amdgpu_ring_write(ring, lower_32_bits(seq));
4724 	amdgpu_ring_write(ring, upper_32_bits(seq));
4725 
4726 }
4727 
4728 /**
4729  * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4730  *
4731  * @ring: amdgpu ring buffer object
4732  * @semaphore: amdgpu semaphore object
4733  * @emit_wait: Is this a sempahore wait?
4734  *
4735  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4736  * from running ahead of semaphore waits.
4737  */
4738 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4739 					 struct amdgpu_semaphore *semaphore,
4740 					 bool emit_wait)
4741 {
4742 	uint64_t addr = semaphore->gpu_addr;
4743 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4744 
4745 	if (ring->adev->asic_type == CHIP_TOPAZ ||
4746 	    ring->adev->asic_type == CHIP_TONGA ||
4747 	    ring->adev->asic_type == CHIP_FIJI)
4748 		/* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4749 		return false;
4750 	else {
4751 		amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4752 		amdgpu_ring_write(ring, lower_32_bits(addr));
4753 		amdgpu_ring_write(ring, upper_32_bits(addr));
4754 		amdgpu_ring_write(ring, sel);
4755 	}
4756 
4757 	if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4758 		/* Prevent the PFP from running ahead of the semaphore wait */
4759 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4760 		amdgpu_ring_write(ring, 0x0);
4761 	}
4762 
4763 	return true;
4764 }
4765 
4766 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4767 					unsigned vm_id, uint64_t pd_addr)
4768 {
4769 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4770 	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4771 	uint64_t addr = ring->fence_drv.gpu_addr;
4772 
4773 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4774 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4775 		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4776 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4777 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4778 	amdgpu_ring_write(ring, seq);
4779 	amdgpu_ring_write(ring, 0xffffffff);
4780 	amdgpu_ring_write(ring, 4); /* poll interval */
4781 
4782 	if (usepfp) {
4783 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4784 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4785 		amdgpu_ring_write(ring, 0);
4786 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4787 		amdgpu_ring_write(ring, 0);
4788 	}
4789 
4790 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4791 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4792 				 WRITE_DATA_DST_SEL(0)) |
4793 				 WR_CONFIRM);
4794 	if (vm_id < 8) {
4795 		amdgpu_ring_write(ring,
4796 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4797 	} else {
4798 		amdgpu_ring_write(ring,
4799 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4800 	}
4801 	amdgpu_ring_write(ring, 0);
4802 	amdgpu_ring_write(ring, pd_addr >> 12);
4803 
4804 	/* bits 0-15 are the VM contexts0-15 */
4805 	/* invalidate the cache */
4806 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4807 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4808 				 WRITE_DATA_DST_SEL(0)));
4809 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4810 	amdgpu_ring_write(ring, 0);
4811 	amdgpu_ring_write(ring, 1 << vm_id);
4812 
4813 	/* wait for the invalidate to complete */
4814 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4815 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4816 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4817 				 WAIT_REG_MEM_ENGINE(0))); /* me */
4818 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4819 	amdgpu_ring_write(ring, 0);
4820 	amdgpu_ring_write(ring, 0); /* ref */
4821 	amdgpu_ring_write(ring, 0); /* mask */
4822 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4823 
4824 	/* compute doesn't have PFP */
4825 	if (usepfp) {
4826 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4827 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4828 		amdgpu_ring_write(ring, 0x0);
4829 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4830 		amdgpu_ring_write(ring, 0);
4831 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4832 		amdgpu_ring_write(ring, 0);
4833 	}
4834 }
4835 
4836 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4837 {
4838 	return ring->adev->wb.wb[ring->rptr_offs];
4839 }
4840 
4841 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4842 {
4843 	return ring->adev->wb.wb[ring->wptr_offs];
4844 }
4845 
4846 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4847 {
4848 	struct amdgpu_device *adev = ring->adev;
4849 
4850 	/* XXX check if swapping is necessary on BE */
4851 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4852 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4853 }
4854 
4855 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4856 					     u64 addr, u64 seq,
4857 					     unsigned flags)
4858 {
4859 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4860 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4861 
4862 	/* RELEASE_MEM - flush caches, send int */
4863 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4864 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4865 				 EOP_TC_ACTION_EN |
4866 				 EOP_TC_WB_ACTION_EN |
4867 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4868 				 EVENT_INDEX(5)));
4869 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4870 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4871 	amdgpu_ring_write(ring, upper_32_bits(addr));
4872 	amdgpu_ring_write(ring, lower_32_bits(seq));
4873 	amdgpu_ring_write(ring, upper_32_bits(seq));
4874 }
4875 
4876 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4877 						 enum amdgpu_interrupt_state state)
4878 {
4879 	u32 cp_int_cntl;
4880 
4881 	switch (state) {
4882 	case AMDGPU_IRQ_STATE_DISABLE:
4883 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4884 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4885 					    TIME_STAMP_INT_ENABLE, 0);
4886 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4887 		break;
4888 	case AMDGPU_IRQ_STATE_ENABLE:
4889 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4890 		cp_int_cntl =
4891 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4892 				      TIME_STAMP_INT_ENABLE, 1);
4893 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4894 		break;
4895 	default:
4896 		break;
4897 	}
4898 }
4899 
4900 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4901 						     int me, int pipe,
4902 						     enum amdgpu_interrupt_state state)
4903 {
4904 	u32 mec_int_cntl, mec_int_cntl_reg;
4905 
4906 	/*
4907 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4908 	 * handles the setting of interrupts for this specific pipe. All other
4909 	 * pipes' interrupts are set by amdkfd.
4910 	 */
4911 
4912 	if (me == 1) {
4913 		switch (pipe) {
4914 		case 0:
4915 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4916 			break;
4917 		default:
4918 			DRM_DEBUG("invalid pipe %d\n", pipe);
4919 			return;
4920 		}
4921 	} else {
4922 		DRM_DEBUG("invalid me %d\n", me);
4923 		return;
4924 	}
4925 
4926 	switch (state) {
4927 	case AMDGPU_IRQ_STATE_DISABLE:
4928 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4929 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4930 					     TIME_STAMP_INT_ENABLE, 0);
4931 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4932 		break;
4933 	case AMDGPU_IRQ_STATE_ENABLE:
4934 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4935 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4936 					     TIME_STAMP_INT_ENABLE, 1);
4937 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4938 		break;
4939 	default:
4940 		break;
4941 	}
4942 }
4943 
4944 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4945 					     struct amdgpu_irq_src *source,
4946 					     unsigned type,
4947 					     enum amdgpu_interrupt_state state)
4948 {
4949 	u32 cp_int_cntl;
4950 
4951 	switch (state) {
4952 	case AMDGPU_IRQ_STATE_DISABLE:
4953 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4954 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4955 					    PRIV_REG_INT_ENABLE, 0);
4956 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4957 		break;
4958 	case AMDGPU_IRQ_STATE_ENABLE:
4959 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4960 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4961 					    PRIV_REG_INT_ENABLE, 0);
4962 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4963 		break;
4964 	default:
4965 		break;
4966 	}
4967 
4968 	return 0;
4969 }
4970 
4971 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4972 					      struct amdgpu_irq_src *source,
4973 					      unsigned type,
4974 					      enum amdgpu_interrupt_state state)
4975 {
4976 	u32 cp_int_cntl;
4977 
4978 	switch (state) {
4979 	case AMDGPU_IRQ_STATE_DISABLE:
4980 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4981 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4982 					    PRIV_INSTR_INT_ENABLE, 0);
4983 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4984 		break;
4985 	case AMDGPU_IRQ_STATE_ENABLE:
4986 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4987 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4988 					    PRIV_INSTR_INT_ENABLE, 1);
4989 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4990 		break;
4991 	default:
4992 		break;
4993 	}
4994 
4995 	return 0;
4996 }
4997 
4998 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4999 					    struct amdgpu_irq_src *src,
5000 					    unsigned type,
5001 					    enum amdgpu_interrupt_state state)
5002 {
5003 	switch (type) {
5004 	case AMDGPU_CP_IRQ_GFX_EOP:
5005 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5006 		break;
5007 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5008 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5009 		break;
5010 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5011 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5012 		break;
5013 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5014 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5015 		break;
5016 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5017 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5018 		break;
5019 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5020 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5021 		break;
5022 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5023 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5024 		break;
5025 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5026 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5027 		break;
5028 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5029 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5030 		break;
5031 	default:
5032 		break;
5033 	}
5034 	return 0;
5035 }
5036 
5037 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5038 			    struct amdgpu_irq_src *source,
5039 			    struct amdgpu_iv_entry *entry)
5040 {
5041 	int i;
5042 	u8 me_id, pipe_id, queue_id;
5043 	struct amdgpu_ring *ring;
5044 
5045 	DRM_DEBUG("IH: CP EOP\n");
5046 	me_id = (entry->ring_id & 0x0c) >> 2;
5047 	pipe_id = (entry->ring_id & 0x03) >> 0;
5048 	queue_id = (entry->ring_id & 0x70) >> 4;
5049 
5050 	switch (me_id) {
5051 	case 0:
5052 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5053 		break;
5054 	case 1:
5055 	case 2:
5056 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5057 			ring = &adev->gfx.compute_ring[i];
5058 			/* Per-queue interrupt is supported for MEC starting from VI.
5059 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5060 			  */
5061 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5062 				amdgpu_fence_process(ring);
5063 		}
5064 		break;
5065 	}
5066 	return 0;
5067 }
5068 
5069 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5070 				 struct amdgpu_irq_src *source,
5071 				 struct amdgpu_iv_entry *entry)
5072 {
5073 	DRM_ERROR("Illegal register access in command stream\n");
5074 	schedule_work(&adev->reset_work);
5075 	return 0;
5076 }
5077 
5078 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5079 				  struct amdgpu_irq_src *source,
5080 				  struct amdgpu_iv_entry *entry)
5081 {
5082 	DRM_ERROR("Illegal instruction in command stream\n");
5083 	schedule_work(&adev->reset_work);
5084 	return 0;
5085 }
5086 
5087 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5088 	.early_init = gfx_v8_0_early_init,
5089 	.late_init = gfx_v8_0_late_init,
5090 	.sw_init = gfx_v8_0_sw_init,
5091 	.sw_fini = gfx_v8_0_sw_fini,
5092 	.hw_init = gfx_v8_0_hw_init,
5093 	.hw_fini = gfx_v8_0_hw_fini,
5094 	.suspend = gfx_v8_0_suspend,
5095 	.resume = gfx_v8_0_resume,
5096 	.is_idle = gfx_v8_0_is_idle,
5097 	.wait_for_idle = gfx_v8_0_wait_for_idle,
5098 	.soft_reset = gfx_v8_0_soft_reset,
5099 	.print_status = gfx_v8_0_print_status,
5100 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
5101 	.set_powergating_state = gfx_v8_0_set_powergating_state,
5102 };
5103 
5104 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5105 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5106 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5107 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5108 	.parse_cs = NULL,
5109 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5110 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5111 	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5112 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5113 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5114 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5115 	.test_ring = gfx_v8_0_ring_test_ring,
5116 	.test_ib = gfx_v8_0_ring_test_ib,
5117 	.insert_nop = amdgpu_ring_insert_nop,
5118 };
5119 
5120 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5121 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
5122 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
5123 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
5124 	.parse_cs = NULL,
5125 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
5126 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
5127 	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
5128 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5129 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5130 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5131 	.test_ring = gfx_v8_0_ring_test_ring,
5132 	.test_ib = gfx_v8_0_ring_test_ib,
5133 	.insert_nop = amdgpu_ring_insert_nop,
5134 };
5135 
5136 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5137 {
5138 	int i;
5139 
5140 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5141 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5142 
5143 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5144 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5145 }
5146 
5147 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5148 	.set = gfx_v8_0_set_eop_interrupt_state,
5149 	.process = gfx_v8_0_eop_irq,
5150 };
5151 
5152 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5153 	.set = gfx_v8_0_set_priv_reg_fault_state,
5154 	.process = gfx_v8_0_priv_reg_irq,
5155 };
5156 
5157 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5158 	.set = gfx_v8_0_set_priv_inst_fault_state,
5159 	.process = gfx_v8_0_priv_inst_irq,
5160 };
5161 
5162 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5163 {
5164 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5165 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5166 
5167 	adev->gfx.priv_reg_irq.num_types = 1;
5168 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5169 
5170 	adev->gfx.priv_inst_irq.num_types = 1;
5171 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5172 }
5173 
5174 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5175 {
5176 	/* init asci gds info */
5177 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5178 	adev->gds.gws.total_size = 64;
5179 	adev->gds.oa.total_size = 16;
5180 
5181 	if (adev->gds.mem.total_size == 64 * 1024) {
5182 		adev->gds.mem.gfx_partition_size = 4096;
5183 		adev->gds.mem.cs_partition_size = 4096;
5184 
5185 		adev->gds.gws.gfx_partition_size = 4;
5186 		adev->gds.gws.cs_partition_size = 4;
5187 
5188 		adev->gds.oa.gfx_partition_size = 4;
5189 		adev->gds.oa.cs_partition_size = 1;
5190 	} else {
5191 		adev->gds.mem.gfx_partition_size = 1024;
5192 		adev->gds.mem.cs_partition_size = 1024;
5193 
5194 		adev->gds.gws.gfx_partition_size = 16;
5195 		adev->gds.gws.cs_partition_size = 16;
5196 
5197 		adev->gds.oa.gfx_partition_size = 4;
5198 		adev->gds.oa.cs_partition_size = 4;
5199 	}
5200 }
5201 
5202 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5203 		u32 se, u32 sh)
5204 {
5205 	u32 mask = 0, tmp, tmp1;
5206 	int i;
5207 
5208 	gfx_v8_0_select_se_sh(adev, se, sh);
5209 	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5210 	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5211 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5212 
5213 	tmp &= 0xffff0000;
5214 
5215 	tmp |= tmp1;
5216 	tmp >>= 16;
5217 
5218 	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5219 		mask <<= 1;
5220 		mask |= 1;
5221 	}
5222 
5223 	return (~tmp) & mask;
5224 }
5225 
5226 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5227 						 struct amdgpu_cu_info *cu_info)
5228 {
5229 	int i, j, k, counter, active_cu_number = 0;
5230 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5231 
5232 	if (!adev || !cu_info)
5233 		return -EINVAL;
5234 
5235 	mutex_lock(&adev->grbm_idx_mutex);
5236 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5237 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5238 			mask = 1;
5239 			ao_bitmap = 0;
5240 			counter = 0;
5241 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5242 			cu_info->bitmap[i][j] = bitmap;
5243 
5244 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5245 				if (bitmap & mask) {
5246 					if (counter < 2)
5247 						ao_bitmap |= mask;
5248 					counter ++;
5249 				}
5250 				mask <<= 1;
5251 			}
5252 			active_cu_number += counter;
5253 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5254 		}
5255 	}
5256 
5257 	cu_info->number = active_cu_number;
5258 	cu_info->ao_cu_mask = ao_cu_mask;
5259 	mutex_unlock(&adev->grbm_idx_mutex);
5260 	return 0;
5261 }
5262