xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision cc3ae7b0af27118994c1e491382b253be3b762bf)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32 
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35 
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38 
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41 
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46 
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49 
50 #include "smu/smu_7_1_3_d.h"
51 
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54 
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
58 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
59 
60 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
61 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
62 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
63 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
64 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
65 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
66 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
67 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
68 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
69 
70 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
71 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
72 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
73 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
75 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
76 
77 /* BPM SERDES CMD */
78 #define SET_BPM_SERDES_CMD    1
79 #define CLE_BPM_SERDES_CMD    0
80 
81 /* BPM Register Address*/
82 enum {
83 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
84 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
85 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
86 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
87 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
88 	BPM_REG_FGCG_MAX
89 };
90 
91 #define RLC_FormatDirectRegListLength        14
92 
93 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
99 
100 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
105 
106 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
118 
119 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
125 
126 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
132 
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
139 
140 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
141 {
142 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
143 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
144 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
145 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
146 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
147 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
148 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
149 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
150 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
151 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
152 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
153 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
154 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
155 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
156 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
157 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
158 };
159 
160 static const u32 golden_settings_tonga_a11[] =
161 {
162 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
163 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
164 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
165 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
166 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
167 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
168 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
169 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
170 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
171 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
172 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
173 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
174 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
175 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
176 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
177 };
178 
179 static const u32 tonga_golden_common_all[] =
180 {
181 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
182 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
183 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
184 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
185 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
186 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
189 };
190 
191 static const u32 tonga_mgcg_cgcg_init[] =
192 {
193 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
194 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
195 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
196 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
200 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
201 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
202 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
203 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
204 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
205 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
215 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
216 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
218 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
219 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
220 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
223 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
224 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
232 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
237 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
247 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
257 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
265 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
266 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
267 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
268 };
269 
270 static const u32 golden_settings_polaris11_a11[] =
271 {
272 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
273 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
274 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
275 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
276 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
277 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
278 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
279 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
280 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
281 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
282 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
283 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
284 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
285 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
286 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
287 };
288 
289 static const u32 polaris11_golden_common_all[] =
290 {
291 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
292 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
293 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
294 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
295 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
296 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
297 };
298 
299 static const u32 golden_settings_polaris10_a11[] =
300 {
301 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
302 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
303 	mmCB_HW_CONTROL_2, 0, 0x0f000000,
304 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
305 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
306 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
307 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
308 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
309 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
310 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
311 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
312 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
313 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
314 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
315 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
316 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317 };
318 
319 static const u32 polaris10_golden_common_all[] =
320 {
321 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
322 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
323 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
324 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
325 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
326 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
327 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
328 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
329 };
330 
331 static const u32 fiji_golden_common_all[] =
332 {
333 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
334 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
335 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
336 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
337 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
338 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
339 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
340 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
341 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
342 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
343 };
344 
345 static const u32 golden_settings_fiji_a10[] =
346 {
347 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
348 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
349 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
350 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
351 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
352 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
353 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
354 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
355 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
356 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
357 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359 
360 static const u32 fiji_mgcg_cgcg_init[] =
361 {
362 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
363 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
364 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
365 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
366 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
367 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
369 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
371 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
373 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
377 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
380 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
381 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
382 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
384 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
385 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
386 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
387 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
388 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
389 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
390 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
391 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
392 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
394 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
395 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
396 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
397 };
398 
399 static const u32 golden_settings_iceland_a11[] =
400 {
401 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
402 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
403 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
404 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
405 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
406 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
407 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
408 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
409 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
410 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
411 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
412 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
413 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
414 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
415 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
416 };
417 
418 static const u32 iceland_golden_common_all[] =
419 {
420 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
422 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
423 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
424 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
425 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
426 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
427 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
428 };
429 
430 static const u32 iceland_mgcg_cgcg_init[] =
431 {
432 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
433 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
435 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
436 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
437 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
438 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
439 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
441 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
443 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
444 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
446 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
450 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
451 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
454 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
455 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
457 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
458 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
459 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
460 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
462 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
464 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
465 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
466 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
467 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
468 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
471 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
486 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
491 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
494 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
495 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
496 };
497 
498 static const u32 cz_golden_settings_a11[] =
499 {
500 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
501 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
502 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
503 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
504 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
505 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
506 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
507 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
508 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
509 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
510 };
511 
512 static const u32 cz_golden_common_all[] =
513 {
514 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
515 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
516 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
517 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
518 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
519 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
520 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
521 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
522 };
523 
524 static const u32 cz_mgcg_cgcg_init[] =
525 {
526 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
527 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
528 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
530 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
531 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
532 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
533 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
534 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
535 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
537 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
542 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
544 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
545 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
546 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
547 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
548 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
551 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
552 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
553 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
554 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
555 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
556 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
557 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
560 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
563 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
564 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
565 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
566 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
567 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
568 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
569 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
570 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
571 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
572 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
573 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
574 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
575 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
576 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
577 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
578 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
579 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
580 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
581 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
582 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
583 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
584 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
585 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
586 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
587 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
588 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
589 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
590 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
591 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
592 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
593 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
594 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
595 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
596 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
597 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
598 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
599 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
600 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
601 };
602 
603 static const u32 stoney_golden_settings_a11[] =
604 {
605 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
606 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
607 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
608 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
609 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
610 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
611 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
612 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
613 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
614 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
615 };
616 
617 static const u32 stoney_golden_common_all[] =
618 {
619 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
620 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
621 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
622 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
623 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
624 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
625 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
626 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
627 };
628 
629 static const u32 stoney_mgcg_cgcg_init[] =
630 {
631 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
633 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
634 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
635 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
636 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
637 };
638 
639 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
640 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
641 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
642 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
643 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
644 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
645 
646 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
647 {
648 	switch (adev->asic_type) {
649 	case CHIP_TOPAZ:
650 		amdgpu_program_register_sequence(adev,
651 						 iceland_mgcg_cgcg_init,
652 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
653 		amdgpu_program_register_sequence(adev,
654 						 golden_settings_iceland_a11,
655 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
656 		amdgpu_program_register_sequence(adev,
657 						 iceland_golden_common_all,
658 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
659 		break;
660 	case CHIP_FIJI:
661 		amdgpu_program_register_sequence(adev,
662 						 fiji_mgcg_cgcg_init,
663 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
664 		amdgpu_program_register_sequence(adev,
665 						 golden_settings_fiji_a10,
666 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
667 		amdgpu_program_register_sequence(adev,
668 						 fiji_golden_common_all,
669 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
670 		break;
671 
672 	case CHIP_TONGA:
673 		amdgpu_program_register_sequence(adev,
674 						 tonga_mgcg_cgcg_init,
675 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
676 		amdgpu_program_register_sequence(adev,
677 						 golden_settings_tonga_a11,
678 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
679 		amdgpu_program_register_sequence(adev,
680 						 tonga_golden_common_all,
681 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
682 		break;
683 	case CHIP_POLARIS11:
684 		amdgpu_program_register_sequence(adev,
685 						 golden_settings_polaris11_a11,
686 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
687 		amdgpu_program_register_sequence(adev,
688 						 polaris11_golden_common_all,
689 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
690 		break;
691 	case CHIP_POLARIS10:
692 		amdgpu_program_register_sequence(adev,
693 						 golden_settings_polaris10_a11,
694 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
695 		amdgpu_program_register_sequence(adev,
696 						 polaris10_golden_common_all,
697 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
698 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
699 		break;
700 	case CHIP_CARRIZO:
701 		amdgpu_program_register_sequence(adev,
702 						 cz_mgcg_cgcg_init,
703 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
704 		amdgpu_program_register_sequence(adev,
705 						 cz_golden_settings_a11,
706 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
707 		amdgpu_program_register_sequence(adev,
708 						 cz_golden_common_all,
709 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
710 		break;
711 	case CHIP_STONEY:
712 		amdgpu_program_register_sequence(adev,
713 						 stoney_mgcg_cgcg_init,
714 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
715 		amdgpu_program_register_sequence(adev,
716 						 stoney_golden_settings_a11,
717 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
718 		amdgpu_program_register_sequence(adev,
719 						 stoney_golden_common_all,
720 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
721 		break;
722 	default:
723 		break;
724 	}
725 }
726 
727 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
728 {
729 	int i;
730 
731 	adev->gfx.scratch.num_reg = 7;
732 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
733 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
734 		adev->gfx.scratch.free[i] = true;
735 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
736 	}
737 }
738 
739 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
740 {
741 	struct amdgpu_device *adev = ring->adev;
742 	uint32_t scratch;
743 	uint32_t tmp = 0;
744 	unsigned i;
745 	int r;
746 
747 	r = amdgpu_gfx_scratch_get(adev, &scratch);
748 	if (r) {
749 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
750 		return r;
751 	}
752 	WREG32(scratch, 0xCAFEDEAD);
753 	r = amdgpu_ring_alloc(ring, 3);
754 	if (r) {
755 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
756 			  ring->idx, r);
757 		amdgpu_gfx_scratch_free(adev, scratch);
758 		return r;
759 	}
760 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
761 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
762 	amdgpu_ring_write(ring, 0xDEADBEEF);
763 	amdgpu_ring_commit(ring);
764 
765 	for (i = 0; i < adev->usec_timeout; i++) {
766 		tmp = RREG32(scratch);
767 		if (tmp == 0xDEADBEEF)
768 			break;
769 		DRM_UDELAY(1);
770 	}
771 	if (i < adev->usec_timeout) {
772 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
773 			 ring->idx, i);
774 	} else {
775 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
776 			  ring->idx, scratch, tmp);
777 		r = -EINVAL;
778 	}
779 	amdgpu_gfx_scratch_free(adev, scratch);
780 	return r;
781 }
782 
783 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
784 {
785 	struct amdgpu_device *adev = ring->adev;
786 	struct amdgpu_ib ib;
787 	struct fence *f = NULL;
788 	uint32_t scratch;
789 	uint32_t tmp = 0;
790 	unsigned i;
791 	int r;
792 
793 	r = amdgpu_gfx_scratch_get(adev, &scratch);
794 	if (r) {
795 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
796 		return r;
797 	}
798 	WREG32(scratch, 0xCAFEDEAD);
799 	memset(&ib, 0, sizeof(ib));
800 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
801 	if (r) {
802 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
803 		goto err1;
804 	}
805 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
806 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
807 	ib.ptr[2] = 0xDEADBEEF;
808 	ib.length_dw = 3;
809 
810 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
811 	if (r)
812 		goto err2;
813 
814 	r = fence_wait(f, false);
815 	if (r) {
816 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
817 		goto err2;
818 	}
819 	for (i = 0; i < adev->usec_timeout; i++) {
820 		tmp = RREG32(scratch);
821 		if (tmp == 0xDEADBEEF)
822 			break;
823 		DRM_UDELAY(1);
824 	}
825 	if (i < adev->usec_timeout) {
826 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
827 			 ring->idx, i);
828 		goto err2;
829 	} else {
830 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
831 			  scratch, tmp);
832 		r = -EINVAL;
833 	}
834 err2:
835 	fence_put(f);
836 	amdgpu_ib_free(adev, &ib, NULL);
837 	fence_put(f);
838 err1:
839 	amdgpu_gfx_scratch_free(adev, scratch);
840 	return r;
841 }
842 
843 
844 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
845 	release_firmware(adev->gfx.pfp_fw);
846 	adev->gfx.pfp_fw = NULL;
847 	release_firmware(adev->gfx.me_fw);
848 	adev->gfx.me_fw = NULL;
849 	release_firmware(adev->gfx.ce_fw);
850 	adev->gfx.ce_fw = NULL;
851 	release_firmware(adev->gfx.rlc_fw);
852 	adev->gfx.rlc_fw = NULL;
853 	release_firmware(adev->gfx.mec_fw);
854 	adev->gfx.mec_fw = NULL;
855 	if ((adev->asic_type != CHIP_STONEY) &&
856 	    (adev->asic_type != CHIP_TOPAZ))
857 		release_firmware(adev->gfx.mec2_fw);
858 	adev->gfx.mec2_fw = NULL;
859 
860 	kfree(adev->gfx.rlc.register_list_format);
861 }
862 
863 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
864 {
865 	const char *chip_name;
866 	char fw_name[30];
867 	int err;
868 	struct amdgpu_firmware_info *info = NULL;
869 	const struct common_firmware_header *header = NULL;
870 	const struct gfx_firmware_header_v1_0 *cp_hdr;
871 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
872 	unsigned int *tmp = NULL, i;
873 
874 	DRM_DEBUG("\n");
875 
876 	switch (adev->asic_type) {
877 	case CHIP_TOPAZ:
878 		chip_name = "topaz";
879 		break;
880 	case CHIP_TONGA:
881 		chip_name = "tonga";
882 		break;
883 	case CHIP_CARRIZO:
884 		chip_name = "carrizo";
885 		break;
886 	case CHIP_FIJI:
887 		chip_name = "fiji";
888 		break;
889 	case CHIP_POLARIS11:
890 		chip_name = "polaris11";
891 		break;
892 	case CHIP_POLARIS10:
893 		chip_name = "polaris10";
894 		break;
895 	case CHIP_STONEY:
896 		chip_name = "stoney";
897 		break;
898 	default:
899 		BUG();
900 	}
901 
902 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
903 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
904 	if (err)
905 		goto out;
906 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
907 	if (err)
908 		goto out;
909 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
910 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
911 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
912 
913 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
914 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
915 	if (err)
916 		goto out;
917 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
918 	if (err)
919 		goto out;
920 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
921 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
922 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923 
924 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
925 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
926 	if (err)
927 		goto out;
928 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
929 	if (err)
930 		goto out;
931 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
932 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934 
935 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
936 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
937 	if (err)
938 		goto out;
939 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
940 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
941 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
942 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
943 
944 	adev->gfx.rlc.save_and_restore_offset =
945 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
946 	adev->gfx.rlc.clear_state_descriptor_offset =
947 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
948 	adev->gfx.rlc.avail_scratch_ram_locations =
949 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
950 	adev->gfx.rlc.reg_restore_list_size =
951 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
952 	adev->gfx.rlc.reg_list_format_start =
953 			le32_to_cpu(rlc_hdr->reg_list_format_start);
954 	adev->gfx.rlc.reg_list_format_separate_start =
955 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
956 	adev->gfx.rlc.starting_offsets_start =
957 			le32_to_cpu(rlc_hdr->starting_offsets_start);
958 	adev->gfx.rlc.reg_list_format_size_bytes =
959 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
960 	adev->gfx.rlc.reg_list_size_bytes =
961 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
962 
963 	adev->gfx.rlc.register_list_format =
964 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
965 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
966 
967 	if (!adev->gfx.rlc.register_list_format) {
968 		err = -ENOMEM;
969 		goto out;
970 	}
971 
972 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
973 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
974 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
975 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
976 
977 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
978 
979 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
980 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
981 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
982 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
983 
984 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
985 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
986 	if (err)
987 		goto out;
988 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
989 	if (err)
990 		goto out;
991 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
992 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
993 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
994 
995 	if ((adev->asic_type != CHIP_STONEY) &&
996 	    (adev->asic_type != CHIP_TOPAZ)) {
997 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
998 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
999 		if (!err) {
1000 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1001 			if (err)
1002 				goto out;
1003 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1004 				adev->gfx.mec2_fw->data;
1005 			adev->gfx.mec2_fw_version =
1006 				le32_to_cpu(cp_hdr->header.ucode_version);
1007 			adev->gfx.mec2_feature_version =
1008 				le32_to_cpu(cp_hdr->ucode_feature_version);
1009 		} else {
1010 			err = 0;
1011 			adev->gfx.mec2_fw = NULL;
1012 		}
1013 	}
1014 
1015 	if (adev->firmware.smu_load) {
1016 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1017 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1018 		info->fw = adev->gfx.pfp_fw;
1019 		header = (const struct common_firmware_header *)info->fw->data;
1020 		adev->firmware.fw_size +=
1021 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1022 
1023 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1024 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1025 		info->fw = adev->gfx.me_fw;
1026 		header = (const struct common_firmware_header *)info->fw->data;
1027 		adev->firmware.fw_size +=
1028 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029 
1030 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1031 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1032 		info->fw = adev->gfx.ce_fw;
1033 		header = (const struct common_firmware_header *)info->fw->data;
1034 		adev->firmware.fw_size +=
1035 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036 
1037 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1038 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1039 		info->fw = adev->gfx.rlc_fw;
1040 		header = (const struct common_firmware_header *)info->fw->data;
1041 		adev->firmware.fw_size +=
1042 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043 
1044 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1045 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1046 		info->fw = adev->gfx.mec_fw;
1047 		header = (const struct common_firmware_header *)info->fw->data;
1048 		adev->firmware.fw_size +=
1049 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050 
1051 		if (adev->gfx.mec2_fw) {
1052 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1053 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1054 			info->fw = adev->gfx.mec2_fw;
1055 			header = (const struct common_firmware_header *)info->fw->data;
1056 			adev->firmware.fw_size +=
1057 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058 		}
1059 
1060 	}
1061 
1062 out:
1063 	if (err) {
1064 		dev_err(adev->dev,
1065 			"gfx8: Failed to load firmware \"%s\"\n",
1066 			fw_name);
1067 		release_firmware(adev->gfx.pfp_fw);
1068 		adev->gfx.pfp_fw = NULL;
1069 		release_firmware(adev->gfx.me_fw);
1070 		adev->gfx.me_fw = NULL;
1071 		release_firmware(adev->gfx.ce_fw);
1072 		adev->gfx.ce_fw = NULL;
1073 		release_firmware(adev->gfx.rlc_fw);
1074 		adev->gfx.rlc_fw = NULL;
1075 		release_firmware(adev->gfx.mec_fw);
1076 		adev->gfx.mec_fw = NULL;
1077 		release_firmware(adev->gfx.mec2_fw);
1078 		adev->gfx.mec2_fw = NULL;
1079 	}
1080 	return err;
1081 }
1082 
1083 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1084 				    volatile u32 *buffer)
1085 {
1086 	u32 count = 0, i;
1087 	const struct cs_section_def *sect = NULL;
1088 	const struct cs_extent_def *ext = NULL;
1089 
1090 	if (adev->gfx.rlc.cs_data == NULL)
1091 		return;
1092 	if (buffer == NULL)
1093 		return;
1094 
1095 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1096 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1097 
1098 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1099 	buffer[count++] = cpu_to_le32(0x80000000);
1100 	buffer[count++] = cpu_to_le32(0x80000000);
1101 
1102 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1103 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1104 			if (sect->id == SECT_CONTEXT) {
1105 				buffer[count++] =
1106 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1107 				buffer[count++] = cpu_to_le32(ext->reg_index -
1108 						PACKET3_SET_CONTEXT_REG_START);
1109 				for (i = 0; i < ext->reg_count; i++)
1110 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1111 			} else {
1112 				return;
1113 			}
1114 		}
1115 	}
1116 
1117 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1118 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1119 			PACKET3_SET_CONTEXT_REG_START);
1120 	switch (adev->asic_type) {
1121 	case CHIP_TONGA:
1122 	case CHIP_POLARIS10:
1123 		buffer[count++] = cpu_to_le32(0x16000012);
1124 		buffer[count++] = cpu_to_le32(0x0000002A);
1125 		break;
1126 	case CHIP_POLARIS11:
1127 		buffer[count++] = cpu_to_le32(0x16000012);
1128 		buffer[count++] = cpu_to_le32(0x00000000);
1129 		break;
1130 	case CHIP_FIJI:
1131 		buffer[count++] = cpu_to_le32(0x3a00161a);
1132 		buffer[count++] = cpu_to_le32(0x0000002e);
1133 		break;
1134 	case CHIP_TOPAZ:
1135 	case CHIP_CARRIZO:
1136 		buffer[count++] = cpu_to_le32(0x00000002);
1137 		buffer[count++] = cpu_to_le32(0x00000000);
1138 		break;
1139 	case CHIP_STONEY:
1140 		buffer[count++] = cpu_to_le32(0x00000000);
1141 		buffer[count++] = cpu_to_le32(0x00000000);
1142 		break;
1143 	default:
1144 		buffer[count++] = cpu_to_le32(0x00000000);
1145 		buffer[count++] = cpu_to_le32(0x00000000);
1146 		break;
1147 	}
1148 
1149 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1150 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1151 
1152 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1153 	buffer[count++] = cpu_to_le32(0);
1154 }
1155 
1156 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1157 {
1158 	int r;
1159 
1160 	/* clear state block */
1161 	if (adev->gfx.rlc.clear_state_obj) {
1162 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1163 		if (unlikely(r != 0))
1164 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1165 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1166 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1167 
1168 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1169 		adev->gfx.rlc.clear_state_obj = NULL;
1170 	}
1171 }
1172 
1173 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1174 {
1175 	volatile u32 *dst_ptr;
1176 	u32 dws;
1177 	const struct cs_section_def *cs_data;
1178 	int r;
1179 
1180 	adev->gfx.rlc.cs_data = vi_cs_data;
1181 
1182 	cs_data = adev->gfx.rlc.cs_data;
1183 
1184 	if (cs_data) {
1185 		/* clear state block */
1186 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1187 
1188 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1189 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1190 					     AMDGPU_GEM_DOMAIN_VRAM,
1191 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1192 					     NULL, NULL,
1193 					     &adev->gfx.rlc.clear_state_obj);
1194 			if (r) {
1195 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1196 				gfx_v8_0_rlc_fini(adev);
1197 				return r;
1198 			}
1199 		}
1200 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1201 		if (unlikely(r != 0)) {
1202 			gfx_v8_0_rlc_fini(adev);
1203 			return r;
1204 		}
1205 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1206 				  &adev->gfx.rlc.clear_state_gpu_addr);
1207 		if (r) {
1208 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1209 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1210 			gfx_v8_0_rlc_fini(adev);
1211 			return r;
1212 		}
1213 
1214 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1215 		if (r) {
1216 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1217 			gfx_v8_0_rlc_fini(adev);
1218 			return r;
1219 		}
1220 		/* set up the cs buffer */
1221 		dst_ptr = adev->gfx.rlc.cs_ptr;
1222 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1223 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1224 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1225 	}
1226 
1227 	return 0;
1228 }
1229 
1230 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1231 {
1232 	int r;
1233 
1234 	if (adev->gfx.mec.hpd_eop_obj) {
1235 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1236 		if (unlikely(r != 0))
1237 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1238 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1239 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1240 
1241 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1242 		adev->gfx.mec.hpd_eop_obj = NULL;
1243 	}
1244 }
1245 
1246 #define MEC_HPD_SIZE 2048
1247 
1248 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1249 {
1250 	int r;
1251 	u32 *hpd;
1252 
1253 	/*
1254 	 * we assign only 1 pipe because all other pipes will
1255 	 * be handled by KFD
1256 	 */
1257 	adev->gfx.mec.num_mec = 1;
1258 	adev->gfx.mec.num_pipe = 1;
1259 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1260 
1261 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1262 		r = amdgpu_bo_create(adev,
1263 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1264 				     PAGE_SIZE, true,
1265 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1266 				     &adev->gfx.mec.hpd_eop_obj);
1267 		if (r) {
1268 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1269 			return r;
1270 		}
1271 	}
1272 
1273 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1274 	if (unlikely(r != 0)) {
1275 		gfx_v8_0_mec_fini(adev);
1276 		return r;
1277 	}
1278 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1279 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1280 	if (r) {
1281 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1282 		gfx_v8_0_mec_fini(adev);
1283 		return r;
1284 	}
1285 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1286 	if (r) {
1287 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1288 		gfx_v8_0_mec_fini(adev);
1289 		return r;
1290 	}
1291 
1292 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1293 
1294 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1295 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1296 
1297 	return 0;
1298 }
1299 
1300 static const u32 vgpr_init_compute_shader[] =
1301 {
1302 	0x7e000209, 0x7e020208,
1303 	0x7e040207, 0x7e060206,
1304 	0x7e080205, 0x7e0a0204,
1305 	0x7e0c0203, 0x7e0e0202,
1306 	0x7e100201, 0x7e120200,
1307 	0x7e140209, 0x7e160208,
1308 	0x7e180207, 0x7e1a0206,
1309 	0x7e1c0205, 0x7e1e0204,
1310 	0x7e200203, 0x7e220202,
1311 	0x7e240201, 0x7e260200,
1312 	0x7e280209, 0x7e2a0208,
1313 	0x7e2c0207, 0x7e2e0206,
1314 	0x7e300205, 0x7e320204,
1315 	0x7e340203, 0x7e360202,
1316 	0x7e380201, 0x7e3a0200,
1317 	0x7e3c0209, 0x7e3e0208,
1318 	0x7e400207, 0x7e420206,
1319 	0x7e440205, 0x7e460204,
1320 	0x7e480203, 0x7e4a0202,
1321 	0x7e4c0201, 0x7e4e0200,
1322 	0x7e500209, 0x7e520208,
1323 	0x7e540207, 0x7e560206,
1324 	0x7e580205, 0x7e5a0204,
1325 	0x7e5c0203, 0x7e5e0202,
1326 	0x7e600201, 0x7e620200,
1327 	0x7e640209, 0x7e660208,
1328 	0x7e680207, 0x7e6a0206,
1329 	0x7e6c0205, 0x7e6e0204,
1330 	0x7e700203, 0x7e720202,
1331 	0x7e740201, 0x7e760200,
1332 	0x7e780209, 0x7e7a0208,
1333 	0x7e7c0207, 0x7e7e0206,
1334 	0xbf8a0000, 0xbf810000,
1335 };
1336 
1337 static const u32 sgpr_init_compute_shader[] =
1338 {
1339 	0xbe8a0100, 0xbe8c0102,
1340 	0xbe8e0104, 0xbe900106,
1341 	0xbe920108, 0xbe940100,
1342 	0xbe960102, 0xbe980104,
1343 	0xbe9a0106, 0xbe9c0108,
1344 	0xbe9e0100, 0xbea00102,
1345 	0xbea20104, 0xbea40106,
1346 	0xbea60108, 0xbea80100,
1347 	0xbeaa0102, 0xbeac0104,
1348 	0xbeae0106, 0xbeb00108,
1349 	0xbeb20100, 0xbeb40102,
1350 	0xbeb60104, 0xbeb80106,
1351 	0xbeba0108, 0xbebc0100,
1352 	0xbebe0102, 0xbec00104,
1353 	0xbec20106, 0xbec40108,
1354 	0xbec60100, 0xbec80102,
1355 	0xbee60004, 0xbee70005,
1356 	0xbeea0006, 0xbeeb0007,
1357 	0xbee80008, 0xbee90009,
1358 	0xbefc0000, 0xbf8a0000,
1359 	0xbf810000, 0x00000000,
1360 };
1361 
1362 static const u32 vgpr_init_regs[] =
1363 {
1364 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1365 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1366 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1367 	mmCOMPUTE_NUM_THREAD_Y, 1,
1368 	mmCOMPUTE_NUM_THREAD_Z, 1,
1369 	mmCOMPUTE_PGM_RSRC2, 20,
1370 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1371 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1372 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1373 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1374 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1375 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1376 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1377 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1378 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1379 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1380 };
1381 
1382 static const u32 sgpr1_init_regs[] =
1383 {
1384 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1385 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1386 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1387 	mmCOMPUTE_NUM_THREAD_Y, 1,
1388 	mmCOMPUTE_NUM_THREAD_Z, 1,
1389 	mmCOMPUTE_PGM_RSRC2, 20,
1390 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1391 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1392 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1393 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1394 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1395 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1396 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1397 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1398 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1399 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1400 };
1401 
1402 static const u32 sgpr2_init_regs[] =
1403 {
1404 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1405 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1406 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1407 	mmCOMPUTE_NUM_THREAD_Y, 1,
1408 	mmCOMPUTE_NUM_THREAD_Z, 1,
1409 	mmCOMPUTE_PGM_RSRC2, 20,
1410 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1411 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1412 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1413 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1414 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1415 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1416 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1417 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1418 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1419 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1420 };
1421 
1422 static const u32 sec_ded_counter_registers[] =
1423 {
1424 	mmCPC_EDC_ATC_CNT,
1425 	mmCPC_EDC_SCRATCH_CNT,
1426 	mmCPC_EDC_UCODE_CNT,
1427 	mmCPF_EDC_ATC_CNT,
1428 	mmCPF_EDC_ROQ_CNT,
1429 	mmCPF_EDC_TAG_CNT,
1430 	mmCPG_EDC_ATC_CNT,
1431 	mmCPG_EDC_DMA_CNT,
1432 	mmCPG_EDC_TAG_CNT,
1433 	mmDC_EDC_CSINVOC_CNT,
1434 	mmDC_EDC_RESTORE_CNT,
1435 	mmDC_EDC_STATE_CNT,
1436 	mmGDS_EDC_CNT,
1437 	mmGDS_EDC_GRBM_CNT,
1438 	mmGDS_EDC_OA_DED,
1439 	mmSPI_EDC_CNT,
1440 	mmSQC_ATC_EDC_GATCL1_CNT,
1441 	mmSQC_EDC_CNT,
1442 	mmSQ_EDC_DED_CNT,
1443 	mmSQ_EDC_INFO,
1444 	mmSQ_EDC_SEC_CNT,
1445 	mmTCC_EDC_CNT,
1446 	mmTCP_ATC_EDC_GATCL1_CNT,
1447 	mmTCP_EDC_CNT,
1448 	mmTD_EDC_CNT
1449 };
1450 
1451 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1452 {
1453 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1454 	struct amdgpu_ib ib;
1455 	struct fence *f = NULL;
1456 	int r, i;
1457 	u32 tmp;
1458 	unsigned total_size, vgpr_offset, sgpr_offset;
1459 	u64 gpu_addr;
1460 
1461 	/* only supported on CZ */
1462 	if (adev->asic_type != CHIP_CARRIZO)
1463 		return 0;
1464 
1465 	/* bail if the compute ring is not ready */
1466 	if (!ring->ready)
1467 		return 0;
1468 
1469 	tmp = RREG32(mmGB_EDC_MODE);
1470 	WREG32(mmGB_EDC_MODE, 0);
1471 
1472 	total_size =
1473 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1474 	total_size +=
1475 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1476 	total_size +=
1477 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1478 	total_size = ALIGN(total_size, 256);
1479 	vgpr_offset = total_size;
1480 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1481 	sgpr_offset = total_size;
1482 	total_size += sizeof(sgpr_init_compute_shader);
1483 
1484 	/* allocate an indirect buffer to put the commands in */
1485 	memset(&ib, 0, sizeof(ib));
1486 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1487 	if (r) {
1488 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1489 		return r;
1490 	}
1491 
1492 	/* load the compute shaders */
1493 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1494 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1495 
1496 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1497 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1498 
1499 	/* init the ib length to 0 */
1500 	ib.length_dw = 0;
1501 
1502 	/* VGPR */
1503 	/* write the register state for the compute dispatch */
1504 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1505 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1506 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1507 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1508 	}
1509 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1510 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1511 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1512 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1513 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1514 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1515 
1516 	/* write dispatch packet */
1517 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1518 	ib.ptr[ib.length_dw++] = 8; /* x */
1519 	ib.ptr[ib.length_dw++] = 1; /* y */
1520 	ib.ptr[ib.length_dw++] = 1; /* z */
1521 	ib.ptr[ib.length_dw++] =
1522 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1523 
1524 	/* write CS partial flush packet */
1525 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1526 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1527 
1528 	/* SGPR1 */
1529 	/* write the register state for the compute dispatch */
1530 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1531 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1532 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1533 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1534 	}
1535 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1536 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1537 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1538 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1539 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1540 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1541 
1542 	/* write dispatch packet */
1543 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1544 	ib.ptr[ib.length_dw++] = 8; /* x */
1545 	ib.ptr[ib.length_dw++] = 1; /* y */
1546 	ib.ptr[ib.length_dw++] = 1; /* z */
1547 	ib.ptr[ib.length_dw++] =
1548 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1549 
1550 	/* write CS partial flush packet */
1551 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1552 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1553 
1554 	/* SGPR2 */
1555 	/* write the register state for the compute dispatch */
1556 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1557 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1558 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1559 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1560 	}
1561 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1562 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1563 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1564 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1565 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1566 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1567 
1568 	/* write dispatch packet */
1569 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1570 	ib.ptr[ib.length_dw++] = 8; /* x */
1571 	ib.ptr[ib.length_dw++] = 1; /* y */
1572 	ib.ptr[ib.length_dw++] = 1; /* z */
1573 	ib.ptr[ib.length_dw++] =
1574 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1575 
1576 	/* write CS partial flush packet */
1577 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1578 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1579 
1580 	/* shedule the ib on the ring */
1581 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1582 	if (r) {
1583 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1584 		goto fail;
1585 	}
1586 
1587 	/* wait for the GPU to finish processing the IB */
1588 	r = fence_wait(f, false);
1589 	if (r) {
1590 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1591 		goto fail;
1592 	}
1593 
1594 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1595 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1596 	WREG32(mmGB_EDC_MODE, tmp);
1597 
1598 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1599 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1600 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1601 
1602 
1603 	/* read back registers to clear the counters */
1604 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1605 		RREG32(sec_ded_counter_registers[i]);
1606 
1607 fail:
1608 	fence_put(f);
1609 	amdgpu_ib_free(adev, &ib, NULL);
1610 	fence_put(f);
1611 
1612 	return r;
1613 }
1614 
1615 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1616 {
1617 	u32 gb_addr_config;
1618 	u32 mc_shared_chmap, mc_arb_ramcfg;
1619 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1620 	u32 tmp;
1621 	int ret;
1622 
1623 	switch (adev->asic_type) {
1624 	case CHIP_TOPAZ:
1625 		adev->gfx.config.max_shader_engines = 1;
1626 		adev->gfx.config.max_tile_pipes = 2;
1627 		adev->gfx.config.max_cu_per_sh = 6;
1628 		adev->gfx.config.max_sh_per_se = 1;
1629 		adev->gfx.config.max_backends_per_se = 2;
1630 		adev->gfx.config.max_texture_channel_caches = 2;
1631 		adev->gfx.config.max_gprs = 256;
1632 		adev->gfx.config.max_gs_threads = 32;
1633 		adev->gfx.config.max_hw_contexts = 8;
1634 
1635 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1636 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1637 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1638 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1639 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1640 		break;
1641 	case CHIP_FIJI:
1642 		adev->gfx.config.max_shader_engines = 4;
1643 		adev->gfx.config.max_tile_pipes = 16;
1644 		adev->gfx.config.max_cu_per_sh = 16;
1645 		adev->gfx.config.max_sh_per_se = 1;
1646 		adev->gfx.config.max_backends_per_se = 4;
1647 		adev->gfx.config.max_texture_channel_caches = 16;
1648 		adev->gfx.config.max_gprs = 256;
1649 		adev->gfx.config.max_gs_threads = 32;
1650 		adev->gfx.config.max_hw_contexts = 8;
1651 
1652 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1653 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1654 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1655 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1656 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1657 		break;
1658 	case CHIP_POLARIS11:
1659 		ret = amdgpu_atombios_get_gfx_info(adev);
1660 		if (ret)
1661 			return ret;
1662 		adev->gfx.config.max_gprs = 256;
1663 		adev->gfx.config.max_gs_threads = 32;
1664 		adev->gfx.config.max_hw_contexts = 8;
1665 
1666 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1667 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1668 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1669 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1670 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1671 		break;
1672 	case CHIP_POLARIS10:
1673 		ret = amdgpu_atombios_get_gfx_info(adev);
1674 		if (ret)
1675 			return ret;
1676 		adev->gfx.config.max_gprs = 256;
1677 		adev->gfx.config.max_gs_threads = 32;
1678 		adev->gfx.config.max_hw_contexts = 8;
1679 
1680 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1681 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1682 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1683 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1684 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1685 		break;
1686 	case CHIP_TONGA:
1687 		adev->gfx.config.max_shader_engines = 4;
1688 		adev->gfx.config.max_tile_pipes = 8;
1689 		adev->gfx.config.max_cu_per_sh = 8;
1690 		adev->gfx.config.max_sh_per_se = 1;
1691 		adev->gfx.config.max_backends_per_se = 2;
1692 		adev->gfx.config.max_texture_channel_caches = 8;
1693 		adev->gfx.config.max_gprs = 256;
1694 		adev->gfx.config.max_gs_threads = 32;
1695 		adev->gfx.config.max_hw_contexts = 8;
1696 
1697 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1698 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1699 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1700 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1701 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1702 		break;
1703 	case CHIP_CARRIZO:
1704 		adev->gfx.config.max_shader_engines = 1;
1705 		adev->gfx.config.max_tile_pipes = 2;
1706 		adev->gfx.config.max_sh_per_se = 1;
1707 		adev->gfx.config.max_backends_per_se = 2;
1708 
1709 		switch (adev->pdev->revision) {
1710 		case 0xc4:
1711 		case 0x84:
1712 		case 0xc8:
1713 		case 0xcc:
1714 		case 0xe1:
1715 		case 0xe3:
1716 			/* B10 */
1717 			adev->gfx.config.max_cu_per_sh = 8;
1718 			break;
1719 		case 0xc5:
1720 		case 0x81:
1721 		case 0x85:
1722 		case 0xc9:
1723 		case 0xcd:
1724 		case 0xe2:
1725 		case 0xe4:
1726 			/* B8 */
1727 			adev->gfx.config.max_cu_per_sh = 6;
1728 			break;
1729 		case 0xc6:
1730 		case 0xca:
1731 		case 0xce:
1732 		case 0x88:
1733 			/* B6 */
1734 			adev->gfx.config.max_cu_per_sh = 6;
1735 			break;
1736 		case 0xc7:
1737 		case 0x87:
1738 		case 0xcb:
1739 		case 0xe5:
1740 		case 0x89:
1741 		default:
1742 			/* B4 */
1743 			adev->gfx.config.max_cu_per_sh = 4;
1744 			break;
1745 		}
1746 
1747 		adev->gfx.config.max_texture_channel_caches = 2;
1748 		adev->gfx.config.max_gprs = 256;
1749 		adev->gfx.config.max_gs_threads = 32;
1750 		adev->gfx.config.max_hw_contexts = 8;
1751 
1752 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1753 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1754 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1755 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1756 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1757 		break;
1758 	case CHIP_STONEY:
1759 		adev->gfx.config.max_shader_engines = 1;
1760 		adev->gfx.config.max_tile_pipes = 2;
1761 		adev->gfx.config.max_sh_per_se = 1;
1762 		adev->gfx.config.max_backends_per_se = 1;
1763 
1764 		switch (adev->pdev->revision) {
1765 		case 0xc0:
1766 		case 0xc1:
1767 		case 0xc2:
1768 		case 0xc4:
1769 		case 0xc8:
1770 		case 0xc9:
1771 			adev->gfx.config.max_cu_per_sh = 3;
1772 			break;
1773 		case 0xd0:
1774 		case 0xd1:
1775 		case 0xd2:
1776 		default:
1777 			adev->gfx.config.max_cu_per_sh = 2;
1778 			break;
1779 		}
1780 
1781 		adev->gfx.config.max_texture_channel_caches = 2;
1782 		adev->gfx.config.max_gprs = 256;
1783 		adev->gfx.config.max_gs_threads = 16;
1784 		adev->gfx.config.max_hw_contexts = 8;
1785 
1786 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1787 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1788 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1789 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1790 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1791 		break;
1792 	default:
1793 		adev->gfx.config.max_shader_engines = 2;
1794 		adev->gfx.config.max_tile_pipes = 4;
1795 		adev->gfx.config.max_cu_per_sh = 2;
1796 		adev->gfx.config.max_sh_per_se = 1;
1797 		adev->gfx.config.max_backends_per_se = 2;
1798 		adev->gfx.config.max_texture_channel_caches = 4;
1799 		adev->gfx.config.max_gprs = 256;
1800 		adev->gfx.config.max_gs_threads = 32;
1801 		adev->gfx.config.max_hw_contexts = 8;
1802 
1803 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1808 		break;
1809 	}
1810 
1811 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1812 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1813 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1814 
1815 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1816 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1817 	if (adev->flags & AMD_IS_APU) {
1818 		/* Get memory bank mapping mode. */
1819 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1820 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1821 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1822 
1823 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1824 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1825 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1826 
1827 		/* Validate settings in case only one DIMM installed. */
1828 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1829 			dimm00_addr_map = 0;
1830 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1831 			dimm01_addr_map = 0;
1832 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1833 			dimm10_addr_map = 0;
1834 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1835 			dimm11_addr_map = 0;
1836 
1837 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1838 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1839 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1840 			adev->gfx.config.mem_row_size_in_kb = 2;
1841 		else
1842 			adev->gfx.config.mem_row_size_in_kb = 1;
1843 	} else {
1844 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1845 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1846 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1847 			adev->gfx.config.mem_row_size_in_kb = 4;
1848 	}
1849 
1850 	adev->gfx.config.shader_engine_tile_size = 32;
1851 	adev->gfx.config.num_gpus = 1;
1852 	adev->gfx.config.multi_gpu_tile_size = 64;
1853 
1854 	/* fix up row size */
1855 	switch (adev->gfx.config.mem_row_size_in_kb) {
1856 	case 1:
1857 	default:
1858 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1859 		break;
1860 	case 2:
1861 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1862 		break;
1863 	case 4:
1864 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1865 		break;
1866 	}
1867 	adev->gfx.config.gb_addr_config = gb_addr_config;
1868 
1869 	return 0;
1870 }
1871 
1872 static int gfx_v8_0_sw_init(void *handle)
1873 {
1874 	int i, r;
1875 	struct amdgpu_ring *ring;
1876 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1877 
1878 	/* EOP Event */
1879 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1880 	if (r)
1881 		return r;
1882 
1883 	/* Privileged reg */
1884 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1885 	if (r)
1886 		return r;
1887 
1888 	/* Privileged inst */
1889 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1890 	if (r)
1891 		return r;
1892 
1893 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1894 
1895 	gfx_v8_0_scratch_init(adev);
1896 
1897 	r = gfx_v8_0_init_microcode(adev);
1898 	if (r) {
1899 		DRM_ERROR("Failed to load gfx firmware!\n");
1900 		return r;
1901 	}
1902 
1903 	r = gfx_v8_0_rlc_init(adev);
1904 	if (r) {
1905 		DRM_ERROR("Failed to init rlc BOs!\n");
1906 		return r;
1907 	}
1908 
1909 	r = gfx_v8_0_mec_init(adev);
1910 	if (r) {
1911 		DRM_ERROR("Failed to init MEC BOs!\n");
1912 		return r;
1913 	}
1914 
1915 	/* set up the gfx ring */
1916 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1917 		ring = &adev->gfx.gfx_ring[i];
1918 		ring->ring_obj = NULL;
1919 		sprintf(ring->name, "gfx");
1920 		/* no gfx doorbells on iceland */
1921 		if (adev->asic_type != CHIP_TOPAZ) {
1922 			ring->use_doorbell = true;
1923 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1924 		}
1925 
1926 		r = amdgpu_ring_init(adev, ring, 1024,
1927 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1928 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1929 				     AMDGPU_RING_TYPE_GFX);
1930 		if (r)
1931 			return r;
1932 	}
1933 
1934 	/* set up the compute queues */
1935 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1936 		unsigned irq_type;
1937 
1938 		/* max 32 queues per MEC */
1939 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1940 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1941 			break;
1942 		}
1943 		ring = &adev->gfx.compute_ring[i];
1944 		ring->ring_obj = NULL;
1945 		ring->use_doorbell = true;
1946 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1947 		ring->me = 1; /* first MEC */
1948 		ring->pipe = i / 8;
1949 		ring->queue = i % 8;
1950 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1951 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1952 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1953 		r = amdgpu_ring_init(adev, ring, 1024,
1954 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1955 				     &adev->gfx.eop_irq, irq_type,
1956 				     AMDGPU_RING_TYPE_COMPUTE);
1957 		if (r)
1958 			return r;
1959 	}
1960 
1961 	/* reserve GDS, GWS and OA resource for gfx */
1962 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1963 			PAGE_SIZE, true,
1964 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1965 			NULL, &adev->gds.gds_gfx_bo);
1966 	if (r)
1967 		return r;
1968 
1969 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1970 		PAGE_SIZE, true,
1971 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1972 		NULL, &adev->gds.gws_gfx_bo);
1973 	if (r)
1974 		return r;
1975 
1976 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1977 			PAGE_SIZE, true,
1978 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1979 			NULL, &adev->gds.oa_gfx_bo);
1980 	if (r)
1981 		return r;
1982 
1983 	adev->gfx.ce_ram_size = 0x8000;
1984 
1985 	r = gfx_v8_0_gpu_early_init(adev);
1986 	if (r)
1987 		return r;
1988 
1989 	return 0;
1990 }
1991 
1992 static int gfx_v8_0_sw_fini(void *handle)
1993 {
1994 	int i;
1995 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1996 
1997 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1998 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1999 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2000 
2001 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2002 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2003 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2004 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2005 
2006 	gfx_v8_0_mec_fini(adev);
2007 
2008 	gfx_v8_0_rlc_fini(adev);
2009 
2010 	gfx_v8_0_free_microcode(adev);
2011 
2012 	return 0;
2013 }
2014 
2015 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2016 {
2017 	uint32_t *modearray, *mod2array;
2018 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2019 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2020 	u32 reg_offset;
2021 
2022 	modearray = adev->gfx.config.tile_mode_array;
2023 	mod2array = adev->gfx.config.macrotile_mode_array;
2024 
2025 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2026 		modearray[reg_offset] = 0;
2027 
2028 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2029 		mod2array[reg_offset] = 0;
2030 
2031 	switch (adev->asic_type) {
2032 	case CHIP_TOPAZ:
2033 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 				PIPE_CONFIG(ADDR_SURF_P2) |
2035 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2036 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038 				PIPE_CONFIG(ADDR_SURF_P2) |
2039 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2040 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 				PIPE_CONFIG(ADDR_SURF_P2) |
2043 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2044 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 				PIPE_CONFIG(ADDR_SURF_P2) |
2047 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2048 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050 				PIPE_CONFIG(ADDR_SURF_P2) |
2051 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2052 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2054 				PIPE_CONFIG(ADDR_SURF_P2) |
2055 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2058 				PIPE_CONFIG(ADDR_SURF_P2) |
2059 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2060 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2061 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2062 				PIPE_CONFIG(ADDR_SURF_P2));
2063 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2064 				PIPE_CONFIG(ADDR_SURF_P2) |
2065 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2066 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2067 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2068 				 PIPE_CONFIG(ADDR_SURF_P2) |
2069 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2070 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2072 				 PIPE_CONFIG(ADDR_SURF_P2) |
2073 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2074 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2075 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2076 				 PIPE_CONFIG(ADDR_SURF_P2) |
2077 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2080 				 PIPE_CONFIG(ADDR_SURF_P2) |
2081 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2082 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2083 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2084 				 PIPE_CONFIG(ADDR_SURF_P2) |
2085 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2086 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2088 				 PIPE_CONFIG(ADDR_SURF_P2) |
2089 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2091 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2092 				 PIPE_CONFIG(ADDR_SURF_P2) |
2093 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2096 				 PIPE_CONFIG(ADDR_SURF_P2) |
2097 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2100 				 PIPE_CONFIG(ADDR_SURF_P2) |
2101 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2102 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2103 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2104 				 PIPE_CONFIG(ADDR_SURF_P2) |
2105 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2106 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2107 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2108 				 PIPE_CONFIG(ADDR_SURF_P2) |
2109 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2110 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2112 				 PIPE_CONFIG(ADDR_SURF_P2) |
2113 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2114 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2116 				 PIPE_CONFIG(ADDR_SURF_P2) |
2117 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2120 				 PIPE_CONFIG(ADDR_SURF_P2) |
2121 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2122 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2123 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2124 				 PIPE_CONFIG(ADDR_SURF_P2) |
2125 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2126 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2127 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2128 				 PIPE_CONFIG(ADDR_SURF_P2) |
2129 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2130 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2131 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2132 				 PIPE_CONFIG(ADDR_SURF_P2) |
2133 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2134 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2135 
2136 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2137 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2138 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139 				NUM_BANKS(ADDR_SURF_8_BANK));
2140 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2141 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2142 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2143 				NUM_BANKS(ADDR_SURF_8_BANK));
2144 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2145 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2147 				NUM_BANKS(ADDR_SURF_8_BANK));
2148 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2149 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2150 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151 				NUM_BANKS(ADDR_SURF_8_BANK));
2152 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2153 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2155 				NUM_BANKS(ADDR_SURF_8_BANK));
2156 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2158 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2159 				NUM_BANKS(ADDR_SURF_8_BANK));
2160 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2163 				NUM_BANKS(ADDR_SURF_8_BANK));
2164 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2165 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2166 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167 				NUM_BANKS(ADDR_SURF_16_BANK));
2168 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2169 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171 				NUM_BANKS(ADDR_SURF_16_BANK));
2172 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2173 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175 				 NUM_BANKS(ADDR_SURF_16_BANK));
2176 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2177 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2179 				 NUM_BANKS(ADDR_SURF_16_BANK));
2180 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2182 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2183 				 NUM_BANKS(ADDR_SURF_16_BANK));
2184 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2187 				 NUM_BANKS(ADDR_SURF_16_BANK));
2188 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2190 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2191 				 NUM_BANKS(ADDR_SURF_8_BANK));
2192 
2193 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2194 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2195 			    reg_offset != 23)
2196 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2197 
2198 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2199 			if (reg_offset != 7)
2200 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2201 
2202 		break;
2203 	case CHIP_FIJI:
2204 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2207 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2209 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2210 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2211 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2213 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2214 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2215 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2216 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2218 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2219 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2220 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2221 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2222 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2223 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2224 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2225 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2226 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2227 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2229 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2230 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2233 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2234 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2235 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2237 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2238 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2239 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2241 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2245 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2248 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2249 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2252 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2253 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2257 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2261 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2263 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2265 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2271 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2272 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2274 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2275 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2279 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2283 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2287 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2291 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2295 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2296 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2299 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2301 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2303 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2307 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2311 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2312 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2313 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2314 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2315 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2316 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2319 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2322 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2324 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326 
2327 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2330 				NUM_BANKS(ADDR_SURF_8_BANK));
2331 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334 				NUM_BANKS(ADDR_SURF_8_BANK));
2335 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 				NUM_BANKS(ADDR_SURF_8_BANK));
2339 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 				NUM_BANKS(ADDR_SURF_8_BANK));
2343 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2345 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346 				NUM_BANKS(ADDR_SURF_8_BANK));
2347 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2350 				NUM_BANKS(ADDR_SURF_8_BANK));
2351 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2354 				NUM_BANKS(ADDR_SURF_8_BANK));
2355 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 				NUM_BANKS(ADDR_SURF_8_BANK));
2359 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362 				NUM_BANKS(ADDR_SURF_8_BANK));
2363 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2365 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2366 				 NUM_BANKS(ADDR_SURF_8_BANK));
2367 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 				 NUM_BANKS(ADDR_SURF_8_BANK));
2371 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2374 				 NUM_BANKS(ADDR_SURF_8_BANK));
2375 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2378 				 NUM_BANKS(ADDR_SURF_8_BANK));
2379 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 				 NUM_BANKS(ADDR_SURF_4_BANK));
2383 
2384 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2386 
2387 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2388 			if (reg_offset != 7)
2389 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2390 
2391 		break;
2392 	case CHIP_TONGA:
2393 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2396 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2400 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2403 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2404 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2407 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2408 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2410 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2411 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2415 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2419 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2423 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2424 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2425 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2426 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2427 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2437 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2442 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2444 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2452 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2463 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2468 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2472 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2476 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2484 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2485 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2486 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2488 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2490 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2492 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2496 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2498 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2499 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2500 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2501 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2504 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2505 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2507 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2512 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2513 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2514 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2515 
2516 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2519 				NUM_BANKS(ADDR_SURF_16_BANK));
2520 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 				NUM_BANKS(ADDR_SURF_16_BANK));
2524 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527 				NUM_BANKS(ADDR_SURF_16_BANK));
2528 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2531 				NUM_BANKS(ADDR_SURF_16_BANK));
2532 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2534 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535 				NUM_BANKS(ADDR_SURF_16_BANK));
2536 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539 				NUM_BANKS(ADDR_SURF_16_BANK));
2540 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543 				NUM_BANKS(ADDR_SURF_16_BANK));
2544 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2546 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2547 				NUM_BANKS(ADDR_SURF_16_BANK));
2548 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2550 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2551 				NUM_BANKS(ADDR_SURF_16_BANK));
2552 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2554 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2555 				 NUM_BANKS(ADDR_SURF_16_BANK));
2556 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2558 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2559 				 NUM_BANKS(ADDR_SURF_16_BANK));
2560 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2563 				 NUM_BANKS(ADDR_SURF_8_BANK));
2564 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567 				 NUM_BANKS(ADDR_SURF_4_BANK));
2568 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2569 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2570 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2571 				 NUM_BANKS(ADDR_SURF_4_BANK));
2572 
2573 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2574 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2575 
2576 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2577 			if (reg_offset != 7)
2578 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2579 
2580 		break;
2581 	case CHIP_POLARIS11:
2582 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2585 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2589 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2592 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2593 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2596 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2597 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2599 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2601 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2612 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2613 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2614 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2615 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2616 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2631 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2633 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2641 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2648 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2649 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2652 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2653 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2657 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2661 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2665 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2669 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2673 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2675 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2677 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2679 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2681 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2682 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2683 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2685 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2686 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2687 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2688 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2689 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2690 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2693 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2696 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2701 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2703 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2704 
2705 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2707 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2708 				NUM_BANKS(ADDR_SURF_16_BANK));
2709 
2710 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2711 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2712 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2713 				NUM_BANKS(ADDR_SURF_16_BANK));
2714 
2715 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718 				NUM_BANKS(ADDR_SURF_16_BANK));
2719 
2720 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2722 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2723 				NUM_BANKS(ADDR_SURF_16_BANK));
2724 
2725 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2728 				NUM_BANKS(ADDR_SURF_16_BANK));
2729 
2730 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2732 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2733 				NUM_BANKS(ADDR_SURF_16_BANK));
2734 
2735 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2738 				NUM_BANKS(ADDR_SURF_16_BANK));
2739 
2740 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2741 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2742 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743 				NUM_BANKS(ADDR_SURF_16_BANK));
2744 
2745 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2746 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2747 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2748 				NUM_BANKS(ADDR_SURF_16_BANK));
2749 
2750 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2752 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 				NUM_BANKS(ADDR_SURF_16_BANK));
2754 
2755 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2757 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2758 				NUM_BANKS(ADDR_SURF_16_BANK));
2759 
2760 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2762 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2763 				NUM_BANKS(ADDR_SURF_16_BANK));
2764 
2765 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2766 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2767 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2768 				NUM_BANKS(ADDR_SURF_8_BANK));
2769 
2770 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2773 				NUM_BANKS(ADDR_SURF_4_BANK));
2774 
2775 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2776 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2777 
2778 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2779 			if (reg_offset != 7)
2780 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2781 
2782 		break;
2783 	case CHIP_POLARIS10:
2784 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2786 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2787 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2790 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2791 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2793 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2794 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2795 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2798 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2799 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2801 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2802 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2803 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2804 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2806 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2807 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2808 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2809 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2810 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2811 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2812 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2815 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2817 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2818 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2819 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2825 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2828 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2829 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2833 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2834 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2835 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2838 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2839 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2842 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2843 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2845 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2846 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2847 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2850 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2851 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2853 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2854 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2855 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2859 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2863 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2867 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2871 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2873 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2875 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2877 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2878 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2879 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2880 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2881 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2882 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2883 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2884 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2885 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2886 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2887 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2888 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2889 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2890 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2891 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2892 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2893 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2894 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2895 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2896 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2897 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2898 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2899 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2901 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2902 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2903 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2904 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2905 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2906 
2907 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2908 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2909 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2910 				NUM_BANKS(ADDR_SURF_16_BANK));
2911 
2912 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2913 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2914 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2915 				NUM_BANKS(ADDR_SURF_16_BANK));
2916 
2917 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2918 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2919 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2920 				NUM_BANKS(ADDR_SURF_16_BANK));
2921 
2922 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2923 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2924 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2925 				NUM_BANKS(ADDR_SURF_16_BANK));
2926 
2927 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2928 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2929 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2930 				NUM_BANKS(ADDR_SURF_16_BANK));
2931 
2932 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2935 				NUM_BANKS(ADDR_SURF_16_BANK));
2936 
2937 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2938 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2939 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2940 				NUM_BANKS(ADDR_SURF_16_BANK));
2941 
2942 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2943 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2944 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2945 				NUM_BANKS(ADDR_SURF_16_BANK));
2946 
2947 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2948 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950 				NUM_BANKS(ADDR_SURF_16_BANK));
2951 
2952 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2953 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2954 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2955 				NUM_BANKS(ADDR_SURF_16_BANK));
2956 
2957 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2958 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2959 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2960 				NUM_BANKS(ADDR_SURF_16_BANK));
2961 
2962 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2963 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2964 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2965 				NUM_BANKS(ADDR_SURF_8_BANK));
2966 
2967 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2970 				NUM_BANKS(ADDR_SURF_4_BANK));
2971 
2972 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2975 				NUM_BANKS(ADDR_SURF_4_BANK));
2976 
2977 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2978 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2979 
2980 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2981 			if (reg_offset != 7)
2982 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2983 
2984 		break;
2985 	case CHIP_STONEY:
2986 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987 				PIPE_CONFIG(ADDR_SURF_P2) |
2988 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2989 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2991 				PIPE_CONFIG(ADDR_SURF_P2) |
2992 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2993 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2995 				PIPE_CONFIG(ADDR_SURF_P2) |
2996 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2997 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2999 				PIPE_CONFIG(ADDR_SURF_P2) |
3000 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3001 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3003 				PIPE_CONFIG(ADDR_SURF_P2) |
3004 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3005 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3006 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007 				PIPE_CONFIG(ADDR_SURF_P2) |
3008 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3009 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3010 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3011 				PIPE_CONFIG(ADDR_SURF_P2) |
3012 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3013 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3014 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3015 				PIPE_CONFIG(ADDR_SURF_P2));
3016 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3017 				PIPE_CONFIG(ADDR_SURF_P2) |
3018 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3021 				 PIPE_CONFIG(ADDR_SURF_P2) |
3022 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3023 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3024 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3025 				 PIPE_CONFIG(ADDR_SURF_P2) |
3026 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3027 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3028 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3029 				 PIPE_CONFIG(ADDR_SURF_P2) |
3030 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3033 				 PIPE_CONFIG(ADDR_SURF_P2) |
3034 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3036 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3037 				 PIPE_CONFIG(ADDR_SURF_P2) |
3038 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3041 				 PIPE_CONFIG(ADDR_SURF_P2) |
3042 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3044 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3045 				 PIPE_CONFIG(ADDR_SURF_P2) |
3046 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3047 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3049 				 PIPE_CONFIG(ADDR_SURF_P2) |
3050 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3053 				 PIPE_CONFIG(ADDR_SURF_P2) |
3054 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3057 				 PIPE_CONFIG(ADDR_SURF_P2) |
3058 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3061 				 PIPE_CONFIG(ADDR_SURF_P2) |
3062 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3063 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3065 				 PIPE_CONFIG(ADDR_SURF_P2) |
3066 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3067 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3069 				 PIPE_CONFIG(ADDR_SURF_P2) |
3070 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3073 				 PIPE_CONFIG(ADDR_SURF_P2) |
3074 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3075 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3076 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3077 				 PIPE_CONFIG(ADDR_SURF_P2) |
3078 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3079 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081 				 PIPE_CONFIG(ADDR_SURF_P2) |
3082 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3083 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3084 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085 				 PIPE_CONFIG(ADDR_SURF_P2) |
3086 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3087 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3088 
3089 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3091 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3092 				NUM_BANKS(ADDR_SURF_8_BANK));
3093 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3095 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096 				NUM_BANKS(ADDR_SURF_8_BANK));
3097 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3098 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3099 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3100 				NUM_BANKS(ADDR_SURF_8_BANK));
3101 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3102 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3103 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3104 				NUM_BANKS(ADDR_SURF_8_BANK));
3105 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 				NUM_BANKS(ADDR_SURF_8_BANK));
3109 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3111 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3112 				NUM_BANKS(ADDR_SURF_8_BANK));
3113 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3115 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3116 				NUM_BANKS(ADDR_SURF_8_BANK));
3117 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3118 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3119 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120 				NUM_BANKS(ADDR_SURF_16_BANK));
3121 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3122 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3123 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3124 				NUM_BANKS(ADDR_SURF_16_BANK));
3125 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3126 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128 				 NUM_BANKS(ADDR_SURF_16_BANK));
3129 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3131 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 				 NUM_BANKS(ADDR_SURF_16_BANK));
3133 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3135 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136 				 NUM_BANKS(ADDR_SURF_16_BANK));
3137 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3140 				 NUM_BANKS(ADDR_SURF_16_BANK));
3141 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3143 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3144 				 NUM_BANKS(ADDR_SURF_8_BANK));
3145 
3146 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3147 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3148 			    reg_offset != 23)
3149 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3150 
3151 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3152 			if (reg_offset != 7)
3153 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3154 
3155 		break;
3156 	default:
3157 		dev_warn(adev->dev,
3158 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3159 			 adev->asic_type);
3160 
3161 	case CHIP_CARRIZO:
3162 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3163 				PIPE_CONFIG(ADDR_SURF_P2) |
3164 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3165 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3166 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3167 				PIPE_CONFIG(ADDR_SURF_P2) |
3168 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3169 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3170 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3171 				PIPE_CONFIG(ADDR_SURF_P2) |
3172 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3173 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3174 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3175 				PIPE_CONFIG(ADDR_SURF_P2) |
3176 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3177 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3178 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3179 				PIPE_CONFIG(ADDR_SURF_P2) |
3180 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3181 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3182 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3183 				PIPE_CONFIG(ADDR_SURF_P2) |
3184 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3185 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3186 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3187 				PIPE_CONFIG(ADDR_SURF_P2) |
3188 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3189 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3190 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3191 				PIPE_CONFIG(ADDR_SURF_P2));
3192 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193 				PIPE_CONFIG(ADDR_SURF_P2) |
3194 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3195 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197 				 PIPE_CONFIG(ADDR_SURF_P2) |
3198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201 				 PIPE_CONFIG(ADDR_SURF_P2) |
3202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3205 				 PIPE_CONFIG(ADDR_SURF_P2) |
3206 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3207 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3208 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3209 				 PIPE_CONFIG(ADDR_SURF_P2) |
3210 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3211 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3212 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3213 				 PIPE_CONFIG(ADDR_SURF_P2) |
3214 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3215 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3216 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3217 				 PIPE_CONFIG(ADDR_SURF_P2) |
3218 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3219 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3220 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3221 				 PIPE_CONFIG(ADDR_SURF_P2) |
3222 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3223 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3224 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3225 				 PIPE_CONFIG(ADDR_SURF_P2) |
3226 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3227 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3228 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3229 				 PIPE_CONFIG(ADDR_SURF_P2) |
3230 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3231 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3232 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3233 				 PIPE_CONFIG(ADDR_SURF_P2) |
3234 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3235 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3236 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3237 				 PIPE_CONFIG(ADDR_SURF_P2) |
3238 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3239 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3240 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3241 				 PIPE_CONFIG(ADDR_SURF_P2) |
3242 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3244 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3245 				 PIPE_CONFIG(ADDR_SURF_P2) |
3246 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3247 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3249 				 PIPE_CONFIG(ADDR_SURF_P2) |
3250 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3251 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3252 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3253 				 PIPE_CONFIG(ADDR_SURF_P2) |
3254 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3255 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3256 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3257 				 PIPE_CONFIG(ADDR_SURF_P2) |
3258 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3259 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 				 PIPE_CONFIG(ADDR_SURF_P2) |
3262 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3263 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3264 
3265 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3266 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3267 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3268 				NUM_BANKS(ADDR_SURF_8_BANK));
3269 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3271 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272 				NUM_BANKS(ADDR_SURF_8_BANK));
3273 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3274 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3275 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3276 				NUM_BANKS(ADDR_SURF_8_BANK));
3277 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3278 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3279 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3280 				NUM_BANKS(ADDR_SURF_8_BANK));
3281 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284 				NUM_BANKS(ADDR_SURF_8_BANK));
3285 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3287 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3288 				NUM_BANKS(ADDR_SURF_8_BANK));
3289 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3291 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3292 				NUM_BANKS(ADDR_SURF_8_BANK));
3293 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3295 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296 				NUM_BANKS(ADDR_SURF_16_BANK));
3297 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3298 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3299 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3300 				NUM_BANKS(ADDR_SURF_16_BANK));
3301 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3302 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3303 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3304 				 NUM_BANKS(ADDR_SURF_16_BANK));
3305 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308 				 NUM_BANKS(ADDR_SURF_16_BANK));
3309 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3311 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3312 				 NUM_BANKS(ADDR_SURF_16_BANK));
3313 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3315 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3316 				 NUM_BANKS(ADDR_SURF_16_BANK));
3317 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3320 				 NUM_BANKS(ADDR_SURF_8_BANK));
3321 
3322 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3323 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3324 			    reg_offset != 23)
3325 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3326 
3327 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3328 			if (reg_offset != 7)
3329 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3330 
3331 		break;
3332 	}
3333 }
3334 
3335 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3336 {
3337 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3338 
3339 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3340 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3341 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3342 	} else if (se_num == 0xffffffff) {
3343 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3344 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3345 	} else if (sh_num == 0xffffffff) {
3346 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3347 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3348 	} else {
3349 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3350 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3351 	}
3352 	WREG32(mmGRBM_GFX_INDEX, data);
3353 }
3354 
3355 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3356 {
3357 	return (u32)((1ULL << bit_width) - 1);
3358 }
3359 
3360 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3361 {
3362 	u32 data, mask;
3363 
3364 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
3365 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3366 
3367 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3368 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3369 
3370 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3371 				       adev->gfx.config.max_sh_per_se);
3372 
3373 	return (~data) & mask;
3374 }
3375 
3376 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3377 {
3378 	int i, j;
3379 	u32 data;
3380 	u32 active_rbs = 0;
3381 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3382 					adev->gfx.config.max_sh_per_se;
3383 
3384 	mutex_lock(&adev->grbm_idx_mutex);
3385 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3386 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3387 			gfx_v8_0_select_se_sh(adev, i, j);
3388 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3389 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3390 					       rb_bitmap_width_per_sh);
3391 		}
3392 	}
3393 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3394 	mutex_unlock(&adev->grbm_idx_mutex);
3395 
3396 	adev->gfx.config.backend_enable_mask = active_rbs;
3397 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3398 }
3399 
3400 /**
3401  * gfx_v8_0_init_compute_vmid - gart enable
3402  *
3403  * @rdev: amdgpu_device pointer
3404  *
3405  * Initialize compute vmid sh_mem registers
3406  *
3407  */
3408 #define DEFAULT_SH_MEM_BASES	(0x6000)
3409 #define FIRST_COMPUTE_VMID	(8)
3410 #define LAST_COMPUTE_VMID	(16)
3411 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3412 {
3413 	int i;
3414 	uint32_t sh_mem_config;
3415 	uint32_t sh_mem_bases;
3416 
3417 	/*
3418 	 * Configure apertures:
3419 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3420 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3421 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3422 	 */
3423 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3424 
3425 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3426 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3427 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3428 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3429 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3430 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3431 
3432 	mutex_lock(&adev->srbm_mutex);
3433 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3434 		vi_srbm_select(adev, 0, 0, 0, i);
3435 		/* CP and shaders */
3436 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3437 		WREG32(mmSH_MEM_APE1_BASE, 1);
3438 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3439 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3440 	}
3441 	vi_srbm_select(adev, 0, 0, 0, 0);
3442 	mutex_unlock(&adev->srbm_mutex);
3443 }
3444 
3445 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3446 {
3447 	u32 tmp;
3448 	int i;
3449 
3450 	tmp = RREG32(mmGRBM_CNTL);
3451 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3452 	WREG32(mmGRBM_CNTL, tmp);
3453 
3454 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3455 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3456 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3457 
3458 	gfx_v8_0_tiling_mode_table_init(adev);
3459 
3460 	gfx_v8_0_setup_rb(adev);
3461 	gfx_v8_0_get_cu_info(adev);
3462 
3463 	/* XXX SH_MEM regs */
3464 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3465 	mutex_lock(&adev->srbm_mutex);
3466 	for (i = 0; i < 16; i++) {
3467 		vi_srbm_select(adev, 0, 0, 0, i);
3468 		/* CP and shaders */
3469 		if (i == 0) {
3470 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3471 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3472 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3473 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3474 			WREG32(mmSH_MEM_CONFIG, tmp);
3475 		} else {
3476 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3477 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3478 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3479 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3480 			WREG32(mmSH_MEM_CONFIG, tmp);
3481 		}
3482 
3483 		WREG32(mmSH_MEM_APE1_BASE, 1);
3484 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3485 		WREG32(mmSH_MEM_BASES, 0);
3486 	}
3487 	vi_srbm_select(adev, 0, 0, 0, 0);
3488 	mutex_unlock(&adev->srbm_mutex);
3489 
3490 	gfx_v8_0_init_compute_vmid(adev);
3491 
3492 	mutex_lock(&adev->grbm_idx_mutex);
3493 	/*
3494 	 * making sure that the following register writes will be broadcasted
3495 	 * to all the shaders
3496 	 */
3497 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3498 
3499 	WREG32(mmPA_SC_FIFO_SIZE,
3500 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3501 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3502 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3503 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3504 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3505 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3506 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3507 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3508 	mutex_unlock(&adev->grbm_idx_mutex);
3509 
3510 }
3511 
3512 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3513 {
3514 	u32 i, j, k;
3515 	u32 mask;
3516 
3517 	mutex_lock(&adev->grbm_idx_mutex);
3518 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3519 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3520 			gfx_v8_0_select_se_sh(adev, i, j);
3521 			for (k = 0; k < adev->usec_timeout; k++) {
3522 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3523 					break;
3524 				udelay(1);
3525 			}
3526 		}
3527 	}
3528 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3529 	mutex_unlock(&adev->grbm_idx_mutex);
3530 
3531 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3532 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3533 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3534 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3535 	for (k = 0; k < adev->usec_timeout; k++) {
3536 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3537 			break;
3538 		udelay(1);
3539 	}
3540 }
3541 
3542 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3543 					       bool enable)
3544 {
3545 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3546 
3547 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3548 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3549 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3550 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3551 
3552 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3553 }
3554 
3555 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3556 {
3557 	/* csib */
3558 	WREG32(mmRLC_CSIB_ADDR_HI,
3559 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3560 	WREG32(mmRLC_CSIB_ADDR_LO,
3561 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3562 	WREG32(mmRLC_CSIB_LENGTH,
3563 			adev->gfx.rlc.clear_state_size);
3564 }
3565 
3566 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3567 				int ind_offset,
3568 				int list_size,
3569 				int *unique_indices,
3570 				int *indices_count,
3571 				int max_indices,
3572 				int *ind_start_offsets,
3573 				int *offset_count,
3574 				int max_offset)
3575 {
3576 	int indices;
3577 	bool new_entry = true;
3578 
3579 	for (; ind_offset < list_size; ind_offset++) {
3580 
3581 		if (new_entry) {
3582 			new_entry = false;
3583 			ind_start_offsets[*offset_count] = ind_offset;
3584 			*offset_count = *offset_count + 1;
3585 			BUG_ON(*offset_count >= max_offset);
3586 		}
3587 
3588 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3589 			new_entry = true;
3590 			continue;
3591 		}
3592 
3593 		ind_offset += 2;
3594 
3595 		/* look for the matching indice */
3596 		for (indices = 0;
3597 			indices < *indices_count;
3598 			indices++) {
3599 			if (unique_indices[indices] ==
3600 				register_list_format[ind_offset])
3601 				break;
3602 		}
3603 
3604 		if (indices >= *indices_count) {
3605 			unique_indices[*indices_count] =
3606 				register_list_format[ind_offset];
3607 			indices = *indices_count;
3608 			*indices_count = *indices_count + 1;
3609 			BUG_ON(*indices_count >= max_indices);
3610 		}
3611 
3612 		register_list_format[ind_offset] = indices;
3613 	}
3614 }
3615 
3616 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3617 {
3618 	int i, temp, data;
3619 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3620 	int indices_count = 0;
3621 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3622 	int offset_count = 0;
3623 
3624 	int list_size;
3625 	unsigned int *register_list_format =
3626 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3627 	if (register_list_format == NULL)
3628 		return -ENOMEM;
3629 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3630 			adev->gfx.rlc.reg_list_format_size_bytes);
3631 
3632 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3633 				RLC_FormatDirectRegListLength,
3634 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3635 				unique_indices,
3636 				&indices_count,
3637 				sizeof(unique_indices) / sizeof(int),
3638 				indirect_start_offsets,
3639 				&offset_count,
3640 				sizeof(indirect_start_offsets)/sizeof(int));
3641 
3642 	/* save and restore list */
3643 	temp = RREG32(mmRLC_SRM_CNTL);
3644 	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3645 	WREG32(mmRLC_SRM_CNTL, temp);
3646 
3647 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3648 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3649 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3650 
3651 	/* indirect list */
3652 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3653 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3654 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3655 
3656 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3657 	list_size = list_size >> 1;
3658 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3659 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3660 
3661 	/* starting offsets starts */
3662 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3663 		adev->gfx.rlc.starting_offsets_start);
3664 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3665 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3666 				indirect_start_offsets[i]);
3667 
3668 	/* unique indices */
3669 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3670 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3671 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3672 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3673 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3674 	}
3675 	kfree(register_list_format);
3676 
3677 	return 0;
3678 }
3679 
3680 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3681 {
3682 	uint32_t data;
3683 
3684 	data = RREG32(mmRLC_SRM_CNTL);
3685 	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3686 	WREG32(mmRLC_SRM_CNTL, data);
3687 }
3688 
3689 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3690 {
3691 	uint32_t data;
3692 
3693 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3694 			AMD_PG_SUPPORT_GFX_SMG |
3695 			AMD_PG_SUPPORT_GFX_DMG)) {
3696 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3697 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3698 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3699 		WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3700 
3701 		data = 0;
3702 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3703 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3704 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3705 		data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3706 		WREG32(mmRLC_PG_DELAY, data);
3707 
3708 		data = RREG32(mmRLC_PG_DELAY_2);
3709 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3710 		data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3711 		WREG32(mmRLC_PG_DELAY_2, data);
3712 
3713 		data = RREG32(mmRLC_AUTO_PG_CTRL);
3714 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3715 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3716 		WREG32(mmRLC_AUTO_PG_CTRL, data);
3717 	}
3718 }
3719 
3720 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3721 {
3722 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3723 			      AMD_PG_SUPPORT_GFX_SMG |
3724 			      AMD_PG_SUPPORT_GFX_DMG |
3725 			      AMD_PG_SUPPORT_CP |
3726 			      AMD_PG_SUPPORT_GDS |
3727 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3728 		gfx_v8_0_init_csb(adev);
3729 		gfx_v8_0_init_save_restore_list(adev);
3730 		gfx_v8_0_enable_save_restore_machine(adev);
3731 
3732 		if (adev->asic_type == CHIP_POLARIS11)
3733 			polaris11_init_power_gating(adev);
3734 	}
3735 }
3736 
3737 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3738 {
3739 	u32 tmp = RREG32(mmRLC_CNTL);
3740 
3741 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3742 	WREG32(mmRLC_CNTL, tmp);
3743 
3744 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3745 
3746 	gfx_v8_0_wait_for_rlc_serdes(adev);
3747 }
3748 
3749 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3750 {
3751 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3752 
3753 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3754 	WREG32(mmGRBM_SOFT_RESET, tmp);
3755 	udelay(50);
3756 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3757 	WREG32(mmGRBM_SOFT_RESET, tmp);
3758 	udelay(50);
3759 }
3760 
3761 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3762 {
3763 	u32 tmp = RREG32(mmRLC_CNTL);
3764 
3765 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3766 	WREG32(mmRLC_CNTL, tmp);
3767 
3768 	/* carrizo do enable cp interrupt after cp inited */
3769 	if (!(adev->flags & AMD_IS_APU))
3770 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3771 
3772 	udelay(50);
3773 }
3774 
3775 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3776 {
3777 	const struct rlc_firmware_header_v2_0 *hdr;
3778 	const __le32 *fw_data;
3779 	unsigned i, fw_size;
3780 
3781 	if (!adev->gfx.rlc_fw)
3782 		return -EINVAL;
3783 
3784 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3785 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3786 
3787 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3788 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3789 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3790 
3791 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3792 	for (i = 0; i < fw_size; i++)
3793 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3794 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3795 
3796 	return 0;
3797 }
3798 
3799 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3800 {
3801 	int r;
3802 
3803 	gfx_v8_0_rlc_stop(adev);
3804 
3805 	/* disable CG */
3806 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3807 	if (adev->asic_type == CHIP_POLARIS11 ||
3808 		adev->asic_type == CHIP_POLARIS10)
3809 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3810 
3811 	/* disable PG */
3812 	WREG32(mmRLC_PG_CNTL, 0);
3813 
3814 	gfx_v8_0_rlc_reset(adev);
3815 
3816 	gfx_v8_0_init_pg(adev);
3817 
3818 	if (!adev->pp_enabled) {
3819 		if (!adev->firmware.smu_load) {
3820 			/* legacy rlc firmware loading */
3821 			r = gfx_v8_0_rlc_load_microcode(adev);
3822 			if (r)
3823 				return r;
3824 		} else {
3825 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3826 							AMDGPU_UCODE_ID_RLC_G);
3827 			if (r)
3828 				return -EINVAL;
3829 		}
3830 	}
3831 
3832 	gfx_v8_0_rlc_start(adev);
3833 
3834 	return 0;
3835 }
3836 
3837 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3838 {
3839 	int i;
3840 	u32 tmp = RREG32(mmCP_ME_CNTL);
3841 
3842 	if (enable) {
3843 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3844 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3845 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3846 	} else {
3847 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3848 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3849 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3850 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3851 			adev->gfx.gfx_ring[i].ready = false;
3852 	}
3853 	WREG32(mmCP_ME_CNTL, tmp);
3854 	udelay(50);
3855 }
3856 
3857 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3858 {
3859 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3860 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3861 	const struct gfx_firmware_header_v1_0 *me_hdr;
3862 	const __le32 *fw_data;
3863 	unsigned i, fw_size;
3864 
3865 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3866 		return -EINVAL;
3867 
3868 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3869 		adev->gfx.pfp_fw->data;
3870 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3871 		adev->gfx.ce_fw->data;
3872 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3873 		adev->gfx.me_fw->data;
3874 
3875 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3876 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3877 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3878 
3879 	gfx_v8_0_cp_gfx_enable(adev, false);
3880 
3881 	/* PFP */
3882 	fw_data = (const __le32 *)
3883 		(adev->gfx.pfp_fw->data +
3884 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3885 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3886 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
3887 	for (i = 0; i < fw_size; i++)
3888 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3889 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3890 
3891 	/* CE */
3892 	fw_data = (const __le32 *)
3893 		(adev->gfx.ce_fw->data +
3894 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3895 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3896 	WREG32(mmCP_CE_UCODE_ADDR, 0);
3897 	for (i = 0; i < fw_size; i++)
3898 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3899 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3900 
3901 	/* ME */
3902 	fw_data = (const __le32 *)
3903 		(adev->gfx.me_fw->data +
3904 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3905 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3906 	WREG32(mmCP_ME_RAM_WADDR, 0);
3907 	for (i = 0; i < fw_size; i++)
3908 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3909 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3910 
3911 	return 0;
3912 }
3913 
3914 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3915 {
3916 	u32 count = 0;
3917 	const struct cs_section_def *sect = NULL;
3918 	const struct cs_extent_def *ext = NULL;
3919 
3920 	/* begin clear state */
3921 	count += 2;
3922 	/* context control state */
3923 	count += 3;
3924 
3925 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3926 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3927 			if (sect->id == SECT_CONTEXT)
3928 				count += 2 + ext->reg_count;
3929 			else
3930 				return 0;
3931 		}
3932 	}
3933 	/* pa_sc_raster_config/pa_sc_raster_config1 */
3934 	count += 4;
3935 	/* end clear state */
3936 	count += 2;
3937 	/* clear state */
3938 	count += 2;
3939 
3940 	return count;
3941 }
3942 
3943 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3944 {
3945 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3946 	const struct cs_section_def *sect = NULL;
3947 	const struct cs_extent_def *ext = NULL;
3948 	int r, i;
3949 
3950 	/* init the CP */
3951 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3952 	WREG32(mmCP_ENDIAN_SWAP, 0);
3953 	WREG32(mmCP_DEVICE_ID, 1);
3954 
3955 	gfx_v8_0_cp_gfx_enable(adev, true);
3956 
3957 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3958 	if (r) {
3959 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3960 		return r;
3961 	}
3962 
3963 	/* clear state buffer */
3964 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3965 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3966 
3967 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3968 	amdgpu_ring_write(ring, 0x80000000);
3969 	amdgpu_ring_write(ring, 0x80000000);
3970 
3971 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3972 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3973 			if (sect->id == SECT_CONTEXT) {
3974 				amdgpu_ring_write(ring,
3975 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3976 					       ext->reg_count));
3977 				amdgpu_ring_write(ring,
3978 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3979 				for (i = 0; i < ext->reg_count; i++)
3980 					amdgpu_ring_write(ring, ext->extent[i]);
3981 			}
3982 		}
3983 	}
3984 
3985 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3986 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3987 	switch (adev->asic_type) {
3988 	case CHIP_TONGA:
3989 	case CHIP_POLARIS10:
3990 		amdgpu_ring_write(ring, 0x16000012);
3991 		amdgpu_ring_write(ring, 0x0000002A);
3992 		break;
3993 	case CHIP_POLARIS11:
3994 		amdgpu_ring_write(ring, 0x16000012);
3995 		amdgpu_ring_write(ring, 0x00000000);
3996 		break;
3997 	case CHIP_FIJI:
3998 		amdgpu_ring_write(ring, 0x3a00161a);
3999 		amdgpu_ring_write(ring, 0x0000002e);
4000 		break;
4001 	case CHIP_CARRIZO:
4002 		amdgpu_ring_write(ring, 0x00000002);
4003 		amdgpu_ring_write(ring, 0x00000000);
4004 		break;
4005 	case CHIP_TOPAZ:
4006 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4007 				0x00000000 : 0x00000002);
4008 		amdgpu_ring_write(ring, 0x00000000);
4009 		break;
4010 	case CHIP_STONEY:
4011 		amdgpu_ring_write(ring, 0x00000000);
4012 		amdgpu_ring_write(ring, 0x00000000);
4013 		break;
4014 	default:
4015 		BUG();
4016 	}
4017 
4018 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4019 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4020 
4021 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4022 	amdgpu_ring_write(ring, 0);
4023 
4024 	/* init the CE partitions */
4025 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4026 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4027 	amdgpu_ring_write(ring, 0x8000);
4028 	amdgpu_ring_write(ring, 0x8000);
4029 
4030 	amdgpu_ring_commit(ring);
4031 
4032 	return 0;
4033 }
4034 
4035 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4036 {
4037 	struct amdgpu_ring *ring;
4038 	u32 tmp;
4039 	u32 rb_bufsz;
4040 	u64 rb_addr, rptr_addr;
4041 	int r;
4042 
4043 	/* Set the write pointer delay */
4044 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4045 
4046 	/* set the RB to use vmid 0 */
4047 	WREG32(mmCP_RB_VMID, 0);
4048 
4049 	/* Set ring buffer size */
4050 	ring = &adev->gfx.gfx_ring[0];
4051 	rb_bufsz = order_base_2(ring->ring_size / 8);
4052 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4053 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4054 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4055 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4056 #ifdef __BIG_ENDIAN
4057 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4058 #endif
4059 	WREG32(mmCP_RB0_CNTL, tmp);
4060 
4061 	/* Initialize the ring buffer's read and write pointers */
4062 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4063 	ring->wptr = 0;
4064 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4065 
4066 	/* set the wb address wether it's enabled or not */
4067 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4068 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4069 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4070 
4071 	mdelay(1);
4072 	WREG32(mmCP_RB0_CNTL, tmp);
4073 
4074 	rb_addr = ring->gpu_addr >> 8;
4075 	WREG32(mmCP_RB0_BASE, rb_addr);
4076 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4077 
4078 	/* no gfx doorbells on iceland */
4079 	if (adev->asic_type != CHIP_TOPAZ) {
4080 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4081 		if (ring->use_doorbell) {
4082 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4083 					    DOORBELL_OFFSET, ring->doorbell_index);
4084 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4085 					    DOORBELL_HIT, 0);
4086 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4087 					    DOORBELL_EN, 1);
4088 		} else {
4089 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4090 					    DOORBELL_EN, 0);
4091 		}
4092 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4093 
4094 		if (adev->asic_type == CHIP_TONGA) {
4095 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4096 					    DOORBELL_RANGE_LOWER,
4097 					    AMDGPU_DOORBELL_GFX_RING0);
4098 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4099 
4100 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4101 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4102 		}
4103 
4104 	}
4105 
4106 	/* start the ring */
4107 	gfx_v8_0_cp_gfx_start(adev);
4108 	ring->ready = true;
4109 	r = amdgpu_ring_test_ring(ring);
4110 	if (r) {
4111 		ring->ready = false;
4112 		return r;
4113 	}
4114 
4115 	return 0;
4116 }
4117 
4118 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4119 {
4120 	int i;
4121 
4122 	if (enable) {
4123 		WREG32(mmCP_MEC_CNTL, 0);
4124 	} else {
4125 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4126 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4127 			adev->gfx.compute_ring[i].ready = false;
4128 	}
4129 	udelay(50);
4130 }
4131 
4132 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4133 {
4134 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4135 	const __le32 *fw_data;
4136 	unsigned i, fw_size;
4137 
4138 	if (!adev->gfx.mec_fw)
4139 		return -EINVAL;
4140 
4141 	gfx_v8_0_cp_compute_enable(adev, false);
4142 
4143 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4144 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4145 
4146 	fw_data = (const __le32 *)
4147 		(adev->gfx.mec_fw->data +
4148 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4149 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4150 
4151 	/* MEC1 */
4152 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4153 	for (i = 0; i < fw_size; i++)
4154 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4155 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4156 
4157 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4158 	if (adev->gfx.mec2_fw) {
4159 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4160 
4161 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4162 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4163 
4164 		fw_data = (const __le32 *)
4165 			(adev->gfx.mec2_fw->data +
4166 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4167 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4168 
4169 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4170 		for (i = 0; i < fw_size; i++)
4171 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4172 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4173 	}
4174 
4175 	return 0;
4176 }
4177 
4178 struct vi_mqd {
4179 	uint32_t header;  /* ordinal0 */
4180 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4181 	uint32_t compute_dim_x;  /* ordinal2 */
4182 	uint32_t compute_dim_y;  /* ordinal3 */
4183 	uint32_t compute_dim_z;  /* ordinal4 */
4184 	uint32_t compute_start_x;  /* ordinal5 */
4185 	uint32_t compute_start_y;  /* ordinal6 */
4186 	uint32_t compute_start_z;  /* ordinal7 */
4187 	uint32_t compute_num_thread_x;  /* ordinal8 */
4188 	uint32_t compute_num_thread_y;  /* ordinal9 */
4189 	uint32_t compute_num_thread_z;  /* ordinal10 */
4190 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4191 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4192 	uint32_t compute_pgm_lo;  /* ordinal13 */
4193 	uint32_t compute_pgm_hi;  /* ordinal14 */
4194 	uint32_t compute_tba_lo;  /* ordinal15 */
4195 	uint32_t compute_tba_hi;  /* ordinal16 */
4196 	uint32_t compute_tma_lo;  /* ordinal17 */
4197 	uint32_t compute_tma_hi;  /* ordinal18 */
4198 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4199 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4200 	uint32_t compute_vmid;  /* ordinal21 */
4201 	uint32_t compute_resource_limits;  /* ordinal22 */
4202 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4203 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4204 	uint32_t compute_tmpring_size;  /* ordinal25 */
4205 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4206 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4207 	uint32_t compute_restart_x;  /* ordinal28 */
4208 	uint32_t compute_restart_y;  /* ordinal29 */
4209 	uint32_t compute_restart_z;  /* ordinal30 */
4210 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4211 	uint32_t compute_misc_reserved;  /* ordinal32 */
4212 	uint32_t compute_dispatch_id;  /* ordinal33 */
4213 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4214 	uint32_t compute_relaunch;  /* ordinal35 */
4215 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4216 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4217 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4218 	uint32_t reserved9;  /* ordinal39 */
4219 	uint32_t reserved10;  /* ordinal40 */
4220 	uint32_t reserved11;  /* ordinal41 */
4221 	uint32_t reserved12;  /* ordinal42 */
4222 	uint32_t reserved13;  /* ordinal43 */
4223 	uint32_t reserved14;  /* ordinal44 */
4224 	uint32_t reserved15;  /* ordinal45 */
4225 	uint32_t reserved16;  /* ordinal46 */
4226 	uint32_t reserved17;  /* ordinal47 */
4227 	uint32_t reserved18;  /* ordinal48 */
4228 	uint32_t reserved19;  /* ordinal49 */
4229 	uint32_t reserved20;  /* ordinal50 */
4230 	uint32_t reserved21;  /* ordinal51 */
4231 	uint32_t reserved22;  /* ordinal52 */
4232 	uint32_t reserved23;  /* ordinal53 */
4233 	uint32_t reserved24;  /* ordinal54 */
4234 	uint32_t reserved25;  /* ordinal55 */
4235 	uint32_t reserved26;  /* ordinal56 */
4236 	uint32_t reserved27;  /* ordinal57 */
4237 	uint32_t reserved28;  /* ordinal58 */
4238 	uint32_t reserved29;  /* ordinal59 */
4239 	uint32_t reserved30;  /* ordinal60 */
4240 	uint32_t reserved31;  /* ordinal61 */
4241 	uint32_t reserved32;  /* ordinal62 */
4242 	uint32_t reserved33;  /* ordinal63 */
4243 	uint32_t reserved34;  /* ordinal64 */
4244 	uint32_t compute_user_data_0;  /* ordinal65 */
4245 	uint32_t compute_user_data_1;  /* ordinal66 */
4246 	uint32_t compute_user_data_2;  /* ordinal67 */
4247 	uint32_t compute_user_data_3;  /* ordinal68 */
4248 	uint32_t compute_user_data_4;  /* ordinal69 */
4249 	uint32_t compute_user_data_5;  /* ordinal70 */
4250 	uint32_t compute_user_data_6;  /* ordinal71 */
4251 	uint32_t compute_user_data_7;  /* ordinal72 */
4252 	uint32_t compute_user_data_8;  /* ordinal73 */
4253 	uint32_t compute_user_data_9;  /* ordinal74 */
4254 	uint32_t compute_user_data_10;  /* ordinal75 */
4255 	uint32_t compute_user_data_11;  /* ordinal76 */
4256 	uint32_t compute_user_data_12;  /* ordinal77 */
4257 	uint32_t compute_user_data_13;  /* ordinal78 */
4258 	uint32_t compute_user_data_14;  /* ordinal79 */
4259 	uint32_t compute_user_data_15;  /* ordinal80 */
4260 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4261 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4262 	uint32_t reserved35;  /* ordinal83 */
4263 	uint32_t reserved36;  /* ordinal84 */
4264 	uint32_t reserved37;  /* ordinal85 */
4265 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4266 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4267 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4268 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4269 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4270 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4271 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4272 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4273 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4274 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4275 	uint32_t reserved38;  /* ordinal96 */
4276 	uint32_t reserved39;  /* ordinal97 */
4277 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4278 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4279 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4280 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4281 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4282 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4283 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4284 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4285 	uint32_t reserved40;  /* ordinal106 */
4286 	uint32_t reserved41;  /* ordinal107 */
4287 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4288 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4289 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4290 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4291 	uint32_t reserved42;  /* ordinal112 */
4292 	uint32_t reserved43;  /* ordinal113 */
4293 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4294 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4295 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4296 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4297 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4298 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4299 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4300 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4301 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4302 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4303 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4304 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4305 	uint32_t reserved44;  /* ordinal126 */
4306 	uint32_t reserved45;  /* ordinal127 */
4307 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4308 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4309 	uint32_t cp_hqd_active;  /* ordinal130 */
4310 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4311 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4312 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4313 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4314 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4315 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4316 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4317 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4318 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4319 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4320 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4321 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4322 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4323 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4324 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4325 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4326 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4327 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4328 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4329 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4330 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4331 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4332 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4333 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4334 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4335 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4336 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4337 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4338 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4339 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4340 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4341 	uint32_t cp_mqd_control;  /* ordinal162 */
4342 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4343 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4344 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4345 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4346 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4347 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4348 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4349 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4350 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4351 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4352 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4353 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4354 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4355 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4356 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4357 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4358 	uint32_t cp_hqd_error;  /* ordinal179 */
4359 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4360 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4361 	uint32_t reserved46;  /* ordinal182 */
4362 	uint32_t reserved47;  /* ordinal183 */
4363 	uint32_t reserved48;  /* ordinal184 */
4364 	uint32_t reserved49;  /* ordinal185 */
4365 	uint32_t reserved50;  /* ordinal186 */
4366 	uint32_t reserved51;  /* ordinal187 */
4367 	uint32_t reserved52;  /* ordinal188 */
4368 	uint32_t reserved53;  /* ordinal189 */
4369 	uint32_t reserved54;  /* ordinal190 */
4370 	uint32_t reserved55;  /* ordinal191 */
4371 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4372 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4373 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4374 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4375 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4376 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4377 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4378 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4379 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4380 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4381 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4382 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4383 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4384 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4385 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4386 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4387 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4388 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4389 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4390 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4391 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4392 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4393 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4394 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4395 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4396 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4397 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4398 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4399 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4400 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4401 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4402 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4403 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4404 	uint32_t reserved56;  /* ordinal225 */
4405 	uint32_t reserved57;  /* ordinal226 */
4406 	uint32_t reserved58;  /* ordinal227 */
4407 	uint32_t set_resources_header;  /* ordinal228 */
4408 	uint32_t set_resources_dw1;  /* ordinal229 */
4409 	uint32_t set_resources_dw2;  /* ordinal230 */
4410 	uint32_t set_resources_dw3;  /* ordinal231 */
4411 	uint32_t set_resources_dw4;  /* ordinal232 */
4412 	uint32_t set_resources_dw5;  /* ordinal233 */
4413 	uint32_t set_resources_dw6;  /* ordinal234 */
4414 	uint32_t set_resources_dw7;  /* ordinal235 */
4415 	uint32_t reserved59;  /* ordinal236 */
4416 	uint32_t reserved60;  /* ordinal237 */
4417 	uint32_t reserved61;  /* ordinal238 */
4418 	uint32_t reserved62;  /* ordinal239 */
4419 	uint32_t reserved63;  /* ordinal240 */
4420 	uint32_t reserved64;  /* ordinal241 */
4421 	uint32_t reserved65;  /* ordinal242 */
4422 	uint32_t reserved66;  /* ordinal243 */
4423 	uint32_t reserved67;  /* ordinal244 */
4424 	uint32_t reserved68;  /* ordinal245 */
4425 	uint32_t reserved69;  /* ordinal246 */
4426 	uint32_t reserved70;  /* ordinal247 */
4427 	uint32_t reserved71;  /* ordinal248 */
4428 	uint32_t reserved72;  /* ordinal249 */
4429 	uint32_t reserved73;  /* ordinal250 */
4430 	uint32_t reserved74;  /* ordinal251 */
4431 	uint32_t reserved75;  /* ordinal252 */
4432 	uint32_t reserved76;  /* ordinal253 */
4433 	uint32_t reserved77;  /* ordinal254 */
4434 	uint32_t reserved78;  /* ordinal255 */
4435 
4436 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4437 };
4438 
4439 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4440 {
4441 	int i, r;
4442 
4443 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4444 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4445 
4446 		if (ring->mqd_obj) {
4447 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4448 			if (unlikely(r != 0))
4449 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4450 
4451 			amdgpu_bo_unpin(ring->mqd_obj);
4452 			amdgpu_bo_unreserve(ring->mqd_obj);
4453 
4454 			amdgpu_bo_unref(&ring->mqd_obj);
4455 			ring->mqd_obj = NULL;
4456 		}
4457 	}
4458 }
4459 
4460 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4461 {
4462 	int r, i, j;
4463 	u32 tmp;
4464 	bool use_doorbell = true;
4465 	u64 hqd_gpu_addr;
4466 	u64 mqd_gpu_addr;
4467 	u64 eop_gpu_addr;
4468 	u64 wb_gpu_addr;
4469 	u32 *buf;
4470 	struct vi_mqd *mqd;
4471 
4472 	/* init the pipes */
4473 	mutex_lock(&adev->srbm_mutex);
4474 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4475 		int me = (i < 4) ? 1 : 2;
4476 		int pipe = (i < 4) ? i : (i - 4);
4477 
4478 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4479 		eop_gpu_addr >>= 8;
4480 
4481 		vi_srbm_select(adev, me, pipe, 0, 0);
4482 
4483 		/* write the EOP addr */
4484 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4485 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4486 
4487 		/* set the VMID assigned */
4488 		WREG32(mmCP_HQD_VMID, 0);
4489 
4490 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4491 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4492 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4493 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4494 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4495 	}
4496 	vi_srbm_select(adev, 0, 0, 0, 0);
4497 	mutex_unlock(&adev->srbm_mutex);
4498 
4499 	/* init the queues.  Just two for now. */
4500 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4501 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4502 
4503 		if (ring->mqd_obj == NULL) {
4504 			r = amdgpu_bo_create(adev,
4505 					     sizeof(struct vi_mqd),
4506 					     PAGE_SIZE, true,
4507 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4508 					     NULL, &ring->mqd_obj);
4509 			if (r) {
4510 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4511 				return r;
4512 			}
4513 		}
4514 
4515 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4516 		if (unlikely(r != 0)) {
4517 			gfx_v8_0_cp_compute_fini(adev);
4518 			return r;
4519 		}
4520 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4521 				  &mqd_gpu_addr);
4522 		if (r) {
4523 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4524 			gfx_v8_0_cp_compute_fini(adev);
4525 			return r;
4526 		}
4527 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4528 		if (r) {
4529 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4530 			gfx_v8_0_cp_compute_fini(adev);
4531 			return r;
4532 		}
4533 
4534 		/* init the mqd struct */
4535 		memset(buf, 0, sizeof(struct vi_mqd));
4536 
4537 		mqd = (struct vi_mqd *)buf;
4538 		mqd->header = 0xC0310800;
4539 		mqd->compute_pipelinestat_enable = 0x00000001;
4540 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4541 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4542 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4543 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4544 		mqd->compute_misc_reserved = 0x00000003;
4545 
4546 		mutex_lock(&adev->srbm_mutex);
4547 		vi_srbm_select(adev, ring->me,
4548 			       ring->pipe,
4549 			       ring->queue, 0);
4550 
4551 		/* disable wptr polling */
4552 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4553 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4554 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4555 
4556 		mqd->cp_hqd_eop_base_addr_lo =
4557 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4558 		mqd->cp_hqd_eop_base_addr_hi =
4559 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4560 
4561 		/* enable doorbell? */
4562 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4563 		if (use_doorbell) {
4564 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4565 		} else {
4566 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4567 		}
4568 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4569 		mqd->cp_hqd_pq_doorbell_control = tmp;
4570 
4571 		/* disable the queue if it's active */
4572 		mqd->cp_hqd_dequeue_request = 0;
4573 		mqd->cp_hqd_pq_rptr = 0;
4574 		mqd->cp_hqd_pq_wptr= 0;
4575 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4576 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4577 			for (j = 0; j < adev->usec_timeout; j++) {
4578 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4579 					break;
4580 				udelay(1);
4581 			}
4582 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4583 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4584 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4585 		}
4586 
4587 		/* set the pointer to the MQD */
4588 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4589 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4590 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4591 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4592 
4593 		/* set MQD vmid to 0 */
4594 		tmp = RREG32(mmCP_MQD_CONTROL);
4595 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4596 		WREG32(mmCP_MQD_CONTROL, tmp);
4597 		mqd->cp_mqd_control = tmp;
4598 
4599 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4600 		hqd_gpu_addr = ring->gpu_addr >> 8;
4601 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4602 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4603 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4604 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4605 
4606 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4607 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4608 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4609 				    (order_base_2(ring->ring_size / 4) - 1));
4610 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4611 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4612 #ifdef __BIG_ENDIAN
4613 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4614 #endif
4615 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4616 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4617 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4618 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4619 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4620 		mqd->cp_hqd_pq_control = tmp;
4621 
4622 		/* set the wb address wether it's enabled or not */
4623 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4624 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4625 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4626 			upper_32_bits(wb_gpu_addr) & 0xffff;
4627 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4628 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4629 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4630 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4631 
4632 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4633 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4634 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4635 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4636 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4637 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4638 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4639 
4640 		/* enable the doorbell if requested */
4641 		if (use_doorbell) {
4642 			if ((adev->asic_type == CHIP_CARRIZO) ||
4643 			    (adev->asic_type == CHIP_FIJI) ||
4644 			    (adev->asic_type == CHIP_STONEY) ||
4645 			    (adev->asic_type == CHIP_POLARIS11) ||
4646 			    (adev->asic_type == CHIP_POLARIS10)) {
4647 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4648 				       AMDGPU_DOORBELL_KIQ << 2);
4649 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4650 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4651 			}
4652 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4653 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4654 					    DOORBELL_OFFSET, ring->doorbell_index);
4655 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4656 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4657 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4658 			mqd->cp_hqd_pq_doorbell_control = tmp;
4659 
4660 		} else {
4661 			mqd->cp_hqd_pq_doorbell_control = 0;
4662 		}
4663 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4664 		       mqd->cp_hqd_pq_doorbell_control);
4665 
4666 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4667 		ring->wptr = 0;
4668 		mqd->cp_hqd_pq_wptr = ring->wptr;
4669 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4670 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4671 
4672 		/* set the vmid for the queue */
4673 		mqd->cp_hqd_vmid = 0;
4674 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4675 
4676 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4677 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4678 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4679 		mqd->cp_hqd_persistent_state = tmp;
4680 		if (adev->asic_type == CHIP_STONEY ||
4681 			adev->asic_type == CHIP_POLARIS11 ||
4682 			adev->asic_type == CHIP_POLARIS10) {
4683 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4684 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4685 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4686 		}
4687 
4688 		/* activate the queue */
4689 		mqd->cp_hqd_active = 1;
4690 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4691 
4692 		vi_srbm_select(adev, 0, 0, 0, 0);
4693 		mutex_unlock(&adev->srbm_mutex);
4694 
4695 		amdgpu_bo_kunmap(ring->mqd_obj);
4696 		amdgpu_bo_unreserve(ring->mqd_obj);
4697 	}
4698 
4699 	if (use_doorbell) {
4700 		tmp = RREG32(mmCP_PQ_STATUS);
4701 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4702 		WREG32(mmCP_PQ_STATUS, tmp);
4703 	}
4704 
4705 	gfx_v8_0_cp_compute_enable(adev, true);
4706 
4707 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4708 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4709 
4710 		ring->ready = true;
4711 		r = amdgpu_ring_test_ring(ring);
4712 		if (r)
4713 			ring->ready = false;
4714 	}
4715 
4716 	return 0;
4717 }
4718 
4719 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4720 {
4721 	int r;
4722 
4723 	if (!(adev->flags & AMD_IS_APU))
4724 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4725 
4726 	if (!adev->pp_enabled) {
4727 		if (!adev->firmware.smu_load) {
4728 			/* legacy firmware loading */
4729 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4730 			if (r)
4731 				return r;
4732 
4733 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4734 			if (r)
4735 				return r;
4736 		} else {
4737 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4738 							AMDGPU_UCODE_ID_CP_CE);
4739 			if (r)
4740 				return -EINVAL;
4741 
4742 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4743 							AMDGPU_UCODE_ID_CP_PFP);
4744 			if (r)
4745 				return -EINVAL;
4746 
4747 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4748 							AMDGPU_UCODE_ID_CP_ME);
4749 			if (r)
4750 				return -EINVAL;
4751 
4752 			if (adev->asic_type == CHIP_TOPAZ) {
4753 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4754 				if (r)
4755 					return r;
4756 			} else {
4757 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4758 										 AMDGPU_UCODE_ID_CP_MEC1);
4759 				if (r)
4760 					return -EINVAL;
4761 			}
4762 		}
4763 	}
4764 
4765 	r = gfx_v8_0_cp_gfx_resume(adev);
4766 	if (r)
4767 		return r;
4768 
4769 	r = gfx_v8_0_cp_compute_resume(adev);
4770 	if (r)
4771 		return r;
4772 
4773 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4774 
4775 	return 0;
4776 }
4777 
4778 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4779 {
4780 	gfx_v8_0_cp_gfx_enable(adev, enable);
4781 	gfx_v8_0_cp_compute_enable(adev, enable);
4782 }
4783 
4784 static int gfx_v8_0_hw_init(void *handle)
4785 {
4786 	int r;
4787 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4788 
4789 	gfx_v8_0_init_golden_registers(adev);
4790 
4791 	gfx_v8_0_gpu_init(adev);
4792 
4793 	r = gfx_v8_0_rlc_resume(adev);
4794 	if (r)
4795 		return r;
4796 
4797 	r = gfx_v8_0_cp_resume(adev);
4798 	if (r)
4799 		return r;
4800 
4801 	return r;
4802 }
4803 
4804 static int gfx_v8_0_hw_fini(void *handle)
4805 {
4806 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4807 
4808 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4809 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4810 	gfx_v8_0_cp_enable(adev, false);
4811 	gfx_v8_0_rlc_stop(adev);
4812 	gfx_v8_0_cp_compute_fini(adev);
4813 
4814 	amdgpu_set_powergating_state(adev,
4815 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4816 
4817 	return 0;
4818 }
4819 
4820 static int gfx_v8_0_suspend(void *handle)
4821 {
4822 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4823 
4824 	return gfx_v8_0_hw_fini(adev);
4825 }
4826 
4827 static int gfx_v8_0_resume(void *handle)
4828 {
4829 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830 
4831 	return gfx_v8_0_hw_init(adev);
4832 }
4833 
4834 static bool gfx_v8_0_is_idle(void *handle)
4835 {
4836 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837 
4838 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4839 		return false;
4840 	else
4841 		return true;
4842 }
4843 
4844 static int gfx_v8_0_wait_for_idle(void *handle)
4845 {
4846 	unsigned i;
4847 	u32 tmp;
4848 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4849 
4850 	for (i = 0; i < adev->usec_timeout; i++) {
4851 		/* read MC_STATUS */
4852 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4853 
4854 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4855 			return 0;
4856 		udelay(1);
4857 	}
4858 	return -ETIMEDOUT;
4859 }
4860 
4861 static int gfx_v8_0_soft_reset(void *handle)
4862 {
4863 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4864 	u32 tmp;
4865 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4866 
4867 	/* GRBM_STATUS */
4868 	tmp = RREG32(mmGRBM_STATUS);
4869 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4870 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4871 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4872 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4873 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4874 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4875 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4876 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4877 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4878 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4879 	}
4880 
4881 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4882 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4883 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4884 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4885 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4886 	}
4887 
4888 	/* GRBM_STATUS2 */
4889 	tmp = RREG32(mmGRBM_STATUS2);
4890 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4891 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4892 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4893 
4894 	/* SRBM_STATUS */
4895 	tmp = RREG32(mmSRBM_STATUS);
4896 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4897 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4898 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4899 
4900 	if (grbm_soft_reset || srbm_soft_reset) {
4901 		/* stop the rlc */
4902 		gfx_v8_0_rlc_stop(adev);
4903 
4904 		/* Disable GFX parsing/prefetching */
4905 		gfx_v8_0_cp_gfx_enable(adev, false);
4906 
4907 		/* Disable MEC parsing/prefetching */
4908 		gfx_v8_0_cp_compute_enable(adev, false);
4909 
4910 		if (grbm_soft_reset || srbm_soft_reset) {
4911 			tmp = RREG32(mmGMCON_DEBUG);
4912 			tmp = REG_SET_FIELD(tmp,
4913 					    GMCON_DEBUG, GFX_STALL, 1);
4914 			tmp = REG_SET_FIELD(tmp,
4915 					    GMCON_DEBUG, GFX_CLEAR, 1);
4916 			WREG32(mmGMCON_DEBUG, tmp);
4917 
4918 			udelay(50);
4919 		}
4920 
4921 		if (grbm_soft_reset) {
4922 			tmp = RREG32(mmGRBM_SOFT_RESET);
4923 			tmp |= grbm_soft_reset;
4924 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4925 			WREG32(mmGRBM_SOFT_RESET, tmp);
4926 			tmp = RREG32(mmGRBM_SOFT_RESET);
4927 
4928 			udelay(50);
4929 
4930 			tmp &= ~grbm_soft_reset;
4931 			WREG32(mmGRBM_SOFT_RESET, tmp);
4932 			tmp = RREG32(mmGRBM_SOFT_RESET);
4933 		}
4934 
4935 		if (srbm_soft_reset) {
4936 			tmp = RREG32(mmSRBM_SOFT_RESET);
4937 			tmp |= srbm_soft_reset;
4938 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4939 			WREG32(mmSRBM_SOFT_RESET, tmp);
4940 			tmp = RREG32(mmSRBM_SOFT_RESET);
4941 
4942 			udelay(50);
4943 
4944 			tmp &= ~srbm_soft_reset;
4945 			WREG32(mmSRBM_SOFT_RESET, tmp);
4946 			tmp = RREG32(mmSRBM_SOFT_RESET);
4947 		}
4948 
4949 		if (grbm_soft_reset || srbm_soft_reset) {
4950 			tmp = RREG32(mmGMCON_DEBUG);
4951 			tmp = REG_SET_FIELD(tmp,
4952 					    GMCON_DEBUG, GFX_STALL, 0);
4953 			tmp = REG_SET_FIELD(tmp,
4954 					    GMCON_DEBUG, GFX_CLEAR, 0);
4955 			WREG32(mmGMCON_DEBUG, tmp);
4956 		}
4957 
4958 		/* Wait a little for things to settle down */
4959 		udelay(50);
4960 	}
4961 	return 0;
4962 }
4963 
4964 /**
4965  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4966  *
4967  * @adev: amdgpu_device pointer
4968  *
4969  * Fetches a GPU clock counter snapshot.
4970  * Returns the 64 bit clock counter snapshot.
4971  */
4972 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4973 {
4974 	uint64_t clock;
4975 
4976 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4977 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4978 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4979 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4980 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4981 	return clock;
4982 }
4983 
4984 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4985 					  uint32_t vmid,
4986 					  uint32_t gds_base, uint32_t gds_size,
4987 					  uint32_t gws_base, uint32_t gws_size,
4988 					  uint32_t oa_base, uint32_t oa_size)
4989 {
4990 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4991 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4992 
4993 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4994 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4995 
4996 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4997 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4998 
4999 	/* GDS Base */
5000 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5001 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5002 				WRITE_DATA_DST_SEL(0)));
5003 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5004 	amdgpu_ring_write(ring, 0);
5005 	amdgpu_ring_write(ring, gds_base);
5006 
5007 	/* GDS Size */
5008 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5009 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5010 				WRITE_DATA_DST_SEL(0)));
5011 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5012 	amdgpu_ring_write(ring, 0);
5013 	amdgpu_ring_write(ring, gds_size);
5014 
5015 	/* GWS */
5016 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5017 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5018 				WRITE_DATA_DST_SEL(0)));
5019 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5020 	amdgpu_ring_write(ring, 0);
5021 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5022 
5023 	/* OA */
5024 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5025 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5026 				WRITE_DATA_DST_SEL(0)));
5027 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5028 	amdgpu_ring_write(ring, 0);
5029 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5030 }
5031 
5032 static int gfx_v8_0_early_init(void *handle)
5033 {
5034 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5035 
5036 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5037 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5038 	gfx_v8_0_set_ring_funcs(adev);
5039 	gfx_v8_0_set_irq_funcs(adev);
5040 	gfx_v8_0_set_gds_init(adev);
5041 	gfx_v8_0_set_rlc_funcs(adev);
5042 
5043 	return 0;
5044 }
5045 
5046 static int gfx_v8_0_late_init(void *handle)
5047 {
5048 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5049 	int r;
5050 
5051 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5052 	if (r)
5053 		return r;
5054 
5055 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5056 	if (r)
5057 		return r;
5058 
5059 	/* requires IBs so do in late init after IB pool is initialized */
5060 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5061 	if (r)
5062 		return r;
5063 
5064 	amdgpu_set_powergating_state(adev,
5065 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5066 
5067 	return 0;
5068 }
5069 
5070 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5071 		bool enable)
5072 {
5073 	uint32_t data, temp;
5074 
5075 	/* Send msg to SMU via Powerplay */
5076 	amdgpu_set_powergating_state(adev,
5077 			AMD_IP_BLOCK_TYPE_SMC,
5078 			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5079 
5080 	if (enable) {
5081 		/* Enable static MGPG */
5082 		temp = data = RREG32(mmRLC_PG_CNTL);
5083 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5084 
5085 		if (temp != data)
5086 			WREG32(mmRLC_PG_CNTL, data);
5087 	} else {
5088 		temp = data = RREG32(mmRLC_PG_CNTL);
5089 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5090 
5091 		if (temp != data)
5092 			WREG32(mmRLC_PG_CNTL, data);
5093 	}
5094 }
5095 
5096 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5097 		bool enable)
5098 {
5099 	uint32_t data, temp;
5100 
5101 	if (enable) {
5102 		/* Enable dynamic MGPG */
5103 		temp = data = RREG32(mmRLC_PG_CNTL);
5104 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5105 
5106 		if (temp != data)
5107 			WREG32(mmRLC_PG_CNTL, data);
5108 	} else {
5109 		temp = data = RREG32(mmRLC_PG_CNTL);
5110 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5111 
5112 		if (temp != data)
5113 			WREG32(mmRLC_PG_CNTL, data);
5114 	}
5115 }
5116 
5117 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5118 		bool enable)
5119 {
5120 	uint32_t data, temp;
5121 
5122 	if (enable) {
5123 		/* Enable quick PG */
5124 		temp = data = RREG32(mmRLC_PG_CNTL);
5125 		data |= 0x100000;
5126 
5127 		if (temp != data)
5128 			WREG32(mmRLC_PG_CNTL, data);
5129 	} else {
5130 		temp = data = RREG32(mmRLC_PG_CNTL);
5131 		data &= ~0x100000;
5132 
5133 		if (temp != data)
5134 			WREG32(mmRLC_PG_CNTL, data);
5135 	}
5136 }
5137 
5138 static int gfx_v8_0_set_powergating_state(void *handle,
5139 					  enum amd_powergating_state state)
5140 {
5141 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5142 
5143 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5144 		return 0;
5145 
5146 	switch (adev->asic_type) {
5147 	case CHIP_POLARIS11:
5148 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5149 			polaris11_enable_gfx_static_mg_power_gating(adev,
5150 					state == AMD_PG_STATE_GATE ? true : false);
5151 		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5152 			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5153 					state == AMD_PG_STATE_GATE ? true : false);
5154 		else
5155 			polaris11_enable_gfx_quick_mg_power_gating(adev,
5156 					state == AMD_PG_STATE_GATE ? true : false);
5157 		break;
5158 	default:
5159 		break;
5160 	}
5161 
5162 	return 0;
5163 }
5164 
5165 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5166 				     uint32_t reg_addr, uint32_t cmd)
5167 {
5168 	uint32_t data;
5169 
5170 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5171 
5172 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5173 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5174 
5175 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5176 	if (adev->asic_type == CHIP_STONEY)
5177 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5178 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5179 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5180 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5181 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5182 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5183 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5184 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5185 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5186 	else
5187 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5188 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5189 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5190 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5191 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5192 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5193 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5194 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5195 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5196 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5197 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5198 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5199 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5200 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5201 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5202 
5203 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5204 }
5205 
5206 #define MSG_ENTER_RLC_SAFE_MODE     1
5207 #define MSG_EXIT_RLC_SAFE_MODE      0
5208 
5209 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5210 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5211 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5212 
5213 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5214 {
5215 	u32 data = 0;
5216 	unsigned i;
5217 
5218 	data = RREG32(mmRLC_CNTL);
5219 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5220 		return;
5221 
5222 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5223 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5224 			       AMD_PG_SUPPORT_GFX_DMG))) {
5225 		data |= RLC_GPR_REG2__REQ_MASK;
5226 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5227 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5228 		WREG32(mmRLC_GPR_REG2, data);
5229 
5230 		for (i = 0; i < adev->usec_timeout; i++) {
5231 			if ((RREG32(mmRLC_GPM_STAT) &
5232 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5233 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5234 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5235 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5236 				break;
5237 			udelay(1);
5238 		}
5239 
5240 		for (i = 0; i < adev->usec_timeout; i++) {
5241 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5242 				break;
5243 			udelay(1);
5244 		}
5245 		adev->gfx.rlc.in_safe_mode = true;
5246 	}
5247 }
5248 
5249 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5250 {
5251 	u32 data;
5252 	unsigned i;
5253 
5254 	data = RREG32(mmRLC_CNTL);
5255 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5256 		return;
5257 
5258 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5259 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5260 			       AMD_PG_SUPPORT_GFX_DMG))) {
5261 		data |= RLC_GPR_REG2__REQ_MASK;
5262 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5263 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5264 		WREG32(mmRLC_GPR_REG2, data);
5265 		adev->gfx.rlc.in_safe_mode = false;
5266 	}
5267 
5268 	for (i = 0; i < adev->usec_timeout; i++) {
5269 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5270 			break;
5271 		udelay(1);
5272 	}
5273 }
5274 
5275 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5276 {
5277 	u32 data;
5278 	unsigned i;
5279 
5280 	data = RREG32(mmRLC_CNTL);
5281 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5282 		return;
5283 
5284 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5285 		data |= RLC_SAFE_MODE__CMD_MASK;
5286 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5287 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5288 		WREG32(mmRLC_SAFE_MODE, data);
5289 
5290 		for (i = 0; i < adev->usec_timeout; i++) {
5291 			if ((RREG32(mmRLC_GPM_STAT) &
5292 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5293 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5294 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5295 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5296 				break;
5297 			udelay(1);
5298 		}
5299 
5300 		for (i = 0; i < adev->usec_timeout; i++) {
5301 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5302 				break;
5303 			udelay(1);
5304 		}
5305 		adev->gfx.rlc.in_safe_mode = true;
5306 	}
5307 }
5308 
5309 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5310 {
5311 	u32 data = 0;
5312 	unsigned i;
5313 
5314 	data = RREG32(mmRLC_CNTL);
5315 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5316 		return;
5317 
5318 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5319 		if (adev->gfx.rlc.in_safe_mode) {
5320 			data |= RLC_SAFE_MODE__CMD_MASK;
5321 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5322 			WREG32(mmRLC_SAFE_MODE, data);
5323 			adev->gfx.rlc.in_safe_mode = false;
5324 		}
5325 	}
5326 
5327 	for (i = 0; i < adev->usec_timeout; i++) {
5328 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5329 			break;
5330 		udelay(1);
5331 	}
5332 }
5333 
5334 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5335 {
5336 	adev->gfx.rlc.in_safe_mode = true;
5337 }
5338 
5339 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5340 {
5341 	adev->gfx.rlc.in_safe_mode = false;
5342 }
5343 
5344 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5345 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5346 	.exit_safe_mode = cz_exit_rlc_safe_mode
5347 };
5348 
5349 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5350 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5351 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5352 };
5353 
5354 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5355 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5356 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5357 };
5358 
5359 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5360 						      bool enable)
5361 {
5362 	uint32_t temp, data;
5363 
5364 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5365 
5366 	/* It is disabled by HW by default */
5367 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5368 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5369 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5370 				/* 1 - RLC memory Light sleep */
5371 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5372 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5373 				if (temp != data)
5374 					WREG32(mmRLC_MEM_SLP_CNTL, data);
5375 			}
5376 
5377 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5378 				/* 2 - CP memory Light sleep */
5379 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5380 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5381 				if (temp != data)
5382 					WREG32(mmCP_MEM_SLP_CNTL, data);
5383 			}
5384 		}
5385 
5386 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5387 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5388 		if (adev->flags & AMD_IS_APU)
5389 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5390 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5391 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5392 		else
5393 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5394 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5395 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5396 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5397 
5398 		if (temp != data)
5399 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5400 
5401 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5402 		gfx_v8_0_wait_for_rlc_serdes(adev);
5403 
5404 		/* 5 - clear mgcg override */
5405 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5406 
5407 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5408 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5409 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5410 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5411 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5412 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5413 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5414 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5415 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5416 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5417 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5418 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5419 			if (temp != data)
5420 				WREG32(mmCGTS_SM_CTRL_REG, data);
5421 		}
5422 		udelay(50);
5423 
5424 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5425 		gfx_v8_0_wait_for_rlc_serdes(adev);
5426 	} else {
5427 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5428 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5429 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5430 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5431 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5432 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5433 		if (temp != data)
5434 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5435 
5436 		/* 2 - disable MGLS in RLC */
5437 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5438 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5439 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5440 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5441 		}
5442 
5443 		/* 3 - disable MGLS in CP */
5444 		data = RREG32(mmCP_MEM_SLP_CNTL);
5445 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5446 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5447 			WREG32(mmCP_MEM_SLP_CNTL, data);
5448 		}
5449 
5450 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5451 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5452 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5453 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5454 		if (temp != data)
5455 			WREG32(mmCGTS_SM_CTRL_REG, data);
5456 
5457 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5458 		gfx_v8_0_wait_for_rlc_serdes(adev);
5459 
5460 		/* 6 - set mgcg override */
5461 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5462 
5463 		udelay(50);
5464 
5465 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5466 		gfx_v8_0_wait_for_rlc_serdes(adev);
5467 	}
5468 
5469 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5470 }
5471 
5472 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5473 						      bool enable)
5474 {
5475 	uint32_t temp, temp1, data, data1;
5476 
5477 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5478 
5479 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5480 
5481 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5482 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5483 		 * Cmp_busy/GFX_Idle interrupts
5484 		 */
5485 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5486 
5487 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5488 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5489 		if (temp1 != data1)
5490 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5491 
5492 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5493 		gfx_v8_0_wait_for_rlc_serdes(adev);
5494 
5495 		/* 3 - clear cgcg override */
5496 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5497 
5498 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5499 		gfx_v8_0_wait_for_rlc_serdes(adev);
5500 
5501 		/* 4 - write cmd to set CGLS */
5502 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5503 
5504 		/* 5 - enable cgcg */
5505 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5506 
5507 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5508 			/* enable cgls*/
5509 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5510 
5511 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5512 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5513 
5514 			if (temp1 != data1)
5515 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5516 		} else {
5517 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5518 		}
5519 
5520 		if (temp != data)
5521 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5522 	} else {
5523 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5524 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5525 
5526 		/* TEST CGCG */
5527 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5528 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5529 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5530 		if (temp1 != data1)
5531 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5532 
5533 		/* read gfx register to wake up cgcg */
5534 		RREG32(mmCB_CGTT_SCLK_CTRL);
5535 		RREG32(mmCB_CGTT_SCLK_CTRL);
5536 		RREG32(mmCB_CGTT_SCLK_CTRL);
5537 		RREG32(mmCB_CGTT_SCLK_CTRL);
5538 
5539 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5540 		gfx_v8_0_wait_for_rlc_serdes(adev);
5541 
5542 		/* write cmd to Set CGCG Overrride */
5543 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5544 
5545 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5546 		gfx_v8_0_wait_for_rlc_serdes(adev);
5547 
5548 		/* write cmd to Clear CGLS */
5549 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5550 
5551 		/* disable cgcg, cgls should be disabled too. */
5552 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5553 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5554 		if (temp != data)
5555 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5556 	}
5557 
5558 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5559 }
5560 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5561 					    bool enable)
5562 {
5563 	if (enable) {
5564 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5565 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5566 		 */
5567 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5568 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5569 	} else {
5570 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5571 		 * ===  CGCG + CGLS ===
5572 		 */
5573 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5574 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5575 	}
5576 	return 0;
5577 }
5578 
5579 static int gfx_v8_0_set_clockgating_state(void *handle,
5580 					  enum amd_clockgating_state state)
5581 {
5582 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5583 
5584 	switch (adev->asic_type) {
5585 	case CHIP_FIJI:
5586 	case CHIP_CARRIZO:
5587 	case CHIP_STONEY:
5588 		gfx_v8_0_update_gfx_clock_gating(adev,
5589 						 state == AMD_CG_STATE_GATE ? true : false);
5590 		break;
5591 	default:
5592 		break;
5593 	}
5594 	return 0;
5595 }
5596 
5597 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5598 {
5599 	u32 rptr;
5600 
5601 	rptr = ring->adev->wb.wb[ring->rptr_offs];
5602 
5603 	return rptr;
5604 }
5605 
5606 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5607 {
5608 	struct amdgpu_device *adev = ring->adev;
5609 	u32 wptr;
5610 
5611 	if (ring->use_doorbell)
5612 		/* XXX check if swapping is necessary on BE */
5613 		wptr = ring->adev->wb.wb[ring->wptr_offs];
5614 	else
5615 		wptr = RREG32(mmCP_RB0_WPTR);
5616 
5617 	return wptr;
5618 }
5619 
5620 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5621 {
5622 	struct amdgpu_device *adev = ring->adev;
5623 
5624 	if (ring->use_doorbell) {
5625 		/* XXX check if swapping is necessary on BE */
5626 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5627 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5628 	} else {
5629 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5630 		(void)RREG32(mmCP_RB0_WPTR);
5631 	}
5632 }
5633 
5634 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5635 {
5636 	u32 ref_and_mask, reg_mem_engine;
5637 
5638 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5639 		switch (ring->me) {
5640 		case 1:
5641 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5642 			break;
5643 		case 2:
5644 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5645 			break;
5646 		default:
5647 			return;
5648 		}
5649 		reg_mem_engine = 0;
5650 	} else {
5651 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5652 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5653 	}
5654 
5655 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5656 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5657 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5658 				 reg_mem_engine));
5659 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5660 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5661 	amdgpu_ring_write(ring, ref_and_mask);
5662 	amdgpu_ring_write(ring, ref_and_mask);
5663 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5664 }
5665 
5666 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5667 {
5668 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5669 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5670 				 WRITE_DATA_DST_SEL(0) |
5671 				 WR_CONFIRM));
5672 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5673 	amdgpu_ring_write(ring, 0);
5674 	amdgpu_ring_write(ring, 1);
5675 
5676 }
5677 
5678 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5679 				      struct amdgpu_ib *ib,
5680 				      unsigned vm_id, bool ctx_switch)
5681 {
5682 	u32 header, control = 0;
5683 	u32 next_rptr = ring->wptr + 5;
5684 
5685 	if (ctx_switch)
5686 		next_rptr += 2;
5687 
5688 	next_rptr += 4;
5689 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5690 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5691 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5692 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5693 	amdgpu_ring_write(ring, next_rptr);
5694 
5695 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
5696 	if (ctx_switch) {
5697 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5698 		amdgpu_ring_write(ring, 0);
5699 	}
5700 
5701 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5702 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5703 	else
5704 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5705 
5706 	control |= ib->length_dw | (vm_id << 24);
5707 
5708 	amdgpu_ring_write(ring, header);
5709 	amdgpu_ring_write(ring,
5710 #ifdef __BIG_ENDIAN
5711 			  (2 << 0) |
5712 #endif
5713 			  (ib->gpu_addr & 0xFFFFFFFC));
5714 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5715 	amdgpu_ring_write(ring, control);
5716 }
5717 
5718 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5719 					  struct amdgpu_ib *ib,
5720 					  unsigned vm_id, bool ctx_switch)
5721 {
5722 	u32 header, control = 0;
5723 	u32 next_rptr = ring->wptr + 5;
5724 
5725 	control |= INDIRECT_BUFFER_VALID;
5726 
5727 	next_rptr += 4;
5728 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5729 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5730 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5731 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5732 	amdgpu_ring_write(ring, next_rptr);
5733 
5734 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5735 
5736 	control |= ib->length_dw | (vm_id << 24);
5737 
5738 	amdgpu_ring_write(ring, header);
5739 	amdgpu_ring_write(ring,
5740 #ifdef __BIG_ENDIAN
5741 					  (2 << 0) |
5742 #endif
5743 					  (ib->gpu_addr & 0xFFFFFFFC));
5744 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5745 	amdgpu_ring_write(ring, control);
5746 }
5747 
5748 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5749 					 u64 seq, unsigned flags)
5750 {
5751 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5752 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5753 
5754 	/* EVENT_WRITE_EOP - flush caches, send int */
5755 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5756 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5757 				 EOP_TC_ACTION_EN |
5758 				 EOP_TC_WB_ACTION_EN |
5759 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5760 				 EVENT_INDEX(5)));
5761 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5762 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5763 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5764 	amdgpu_ring_write(ring, lower_32_bits(seq));
5765 	amdgpu_ring_write(ring, upper_32_bits(seq));
5766 
5767 }
5768 
5769 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5770 {
5771 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5772 	uint32_t seq = ring->fence_drv.sync_seq;
5773 	uint64_t addr = ring->fence_drv.gpu_addr;
5774 
5775 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5776 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5777 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
5778 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5779 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5780 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5781 	amdgpu_ring_write(ring, seq);
5782 	amdgpu_ring_write(ring, 0xffffffff);
5783 	amdgpu_ring_write(ring, 4); /* poll interval */
5784 
5785 	if (usepfp) {
5786 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
5787 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5788 		amdgpu_ring_write(ring, 0);
5789 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5790 		amdgpu_ring_write(ring, 0);
5791 	}
5792 }
5793 
5794 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5795 					unsigned vm_id, uint64_t pd_addr)
5796 {
5797 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5798 
5799 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5800 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5801 				 WRITE_DATA_DST_SEL(0)) |
5802 				 WR_CONFIRM);
5803 	if (vm_id < 8) {
5804 		amdgpu_ring_write(ring,
5805 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5806 	} else {
5807 		amdgpu_ring_write(ring,
5808 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5809 	}
5810 	amdgpu_ring_write(ring, 0);
5811 	amdgpu_ring_write(ring, pd_addr >> 12);
5812 
5813 	/* bits 0-15 are the VM contexts0-15 */
5814 	/* invalidate the cache */
5815 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5816 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5817 				 WRITE_DATA_DST_SEL(0)));
5818 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5819 	amdgpu_ring_write(ring, 0);
5820 	amdgpu_ring_write(ring, 1 << vm_id);
5821 
5822 	/* wait for the invalidate to complete */
5823 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5824 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5825 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
5826 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5827 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5828 	amdgpu_ring_write(ring, 0);
5829 	amdgpu_ring_write(ring, 0); /* ref */
5830 	amdgpu_ring_write(ring, 0); /* mask */
5831 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5832 
5833 	/* compute doesn't have PFP */
5834 	if (usepfp) {
5835 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5836 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5837 		amdgpu_ring_write(ring, 0x0);
5838 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5839 		amdgpu_ring_write(ring, 0);
5840 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5841 		amdgpu_ring_write(ring, 0);
5842 	}
5843 }
5844 
5845 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5846 {
5847 	return ring->adev->wb.wb[ring->rptr_offs];
5848 }
5849 
5850 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5851 {
5852 	return ring->adev->wb.wb[ring->wptr_offs];
5853 }
5854 
5855 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5856 {
5857 	struct amdgpu_device *adev = ring->adev;
5858 
5859 	/* XXX check if swapping is necessary on BE */
5860 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
5861 	WDOORBELL32(ring->doorbell_index, ring->wptr);
5862 }
5863 
5864 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5865 					     u64 addr, u64 seq,
5866 					     unsigned flags)
5867 {
5868 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5869 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5870 
5871 	/* RELEASE_MEM - flush caches, send int */
5872 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5873 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5874 				 EOP_TC_ACTION_EN |
5875 				 EOP_TC_WB_ACTION_EN |
5876 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5877 				 EVENT_INDEX(5)));
5878 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5879 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5880 	amdgpu_ring_write(ring, upper_32_bits(addr));
5881 	amdgpu_ring_write(ring, lower_32_bits(seq));
5882 	amdgpu_ring_write(ring, upper_32_bits(seq));
5883 }
5884 
5885 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5886 						 enum amdgpu_interrupt_state state)
5887 {
5888 	u32 cp_int_cntl;
5889 
5890 	switch (state) {
5891 	case AMDGPU_IRQ_STATE_DISABLE:
5892 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5893 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5894 					    TIME_STAMP_INT_ENABLE, 0);
5895 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5896 		break;
5897 	case AMDGPU_IRQ_STATE_ENABLE:
5898 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5899 		cp_int_cntl =
5900 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5901 				      TIME_STAMP_INT_ENABLE, 1);
5902 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5903 		break;
5904 	default:
5905 		break;
5906 	}
5907 }
5908 
5909 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5910 						     int me, int pipe,
5911 						     enum amdgpu_interrupt_state state)
5912 {
5913 	u32 mec_int_cntl, mec_int_cntl_reg;
5914 
5915 	/*
5916 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5917 	 * handles the setting of interrupts for this specific pipe. All other
5918 	 * pipes' interrupts are set by amdkfd.
5919 	 */
5920 
5921 	if (me == 1) {
5922 		switch (pipe) {
5923 		case 0:
5924 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5925 			break;
5926 		default:
5927 			DRM_DEBUG("invalid pipe %d\n", pipe);
5928 			return;
5929 		}
5930 	} else {
5931 		DRM_DEBUG("invalid me %d\n", me);
5932 		return;
5933 	}
5934 
5935 	switch (state) {
5936 	case AMDGPU_IRQ_STATE_DISABLE:
5937 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5938 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5939 					     TIME_STAMP_INT_ENABLE, 0);
5940 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5941 		break;
5942 	case AMDGPU_IRQ_STATE_ENABLE:
5943 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5944 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5945 					     TIME_STAMP_INT_ENABLE, 1);
5946 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5947 		break;
5948 	default:
5949 		break;
5950 	}
5951 }
5952 
5953 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5954 					     struct amdgpu_irq_src *source,
5955 					     unsigned type,
5956 					     enum amdgpu_interrupt_state state)
5957 {
5958 	u32 cp_int_cntl;
5959 
5960 	switch (state) {
5961 	case AMDGPU_IRQ_STATE_DISABLE:
5962 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5963 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5964 					    PRIV_REG_INT_ENABLE, 0);
5965 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5966 		break;
5967 	case AMDGPU_IRQ_STATE_ENABLE:
5968 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5969 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5970 					    PRIV_REG_INT_ENABLE, 1);
5971 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5972 		break;
5973 	default:
5974 		break;
5975 	}
5976 
5977 	return 0;
5978 }
5979 
5980 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5981 					      struct amdgpu_irq_src *source,
5982 					      unsigned type,
5983 					      enum amdgpu_interrupt_state state)
5984 {
5985 	u32 cp_int_cntl;
5986 
5987 	switch (state) {
5988 	case AMDGPU_IRQ_STATE_DISABLE:
5989 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5990 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5991 					    PRIV_INSTR_INT_ENABLE, 0);
5992 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5993 		break;
5994 	case AMDGPU_IRQ_STATE_ENABLE:
5995 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5996 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5997 					    PRIV_INSTR_INT_ENABLE, 1);
5998 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5999 		break;
6000 	default:
6001 		break;
6002 	}
6003 
6004 	return 0;
6005 }
6006 
6007 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6008 					    struct amdgpu_irq_src *src,
6009 					    unsigned type,
6010 					    enum amdgpu_interrupt_state state)
6011 {
6012 	switch (type) {
6013 	case AMDGPU_CP_IRQ_GFX_EOP:
6014 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6015 		break;
6016 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6017 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6018 		break;
6019 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6020 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6021 		break;
6022 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6023 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6024 		break;
6025 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6026 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6027 		break;
6028 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6029 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6030 		break;
6031 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6032 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6033 		break;
6034 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6035 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6036 		break;
6037 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6038 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6039 		break;
6040 	default:
6041 		break;
6042 	}
6043 	return 0;
6044 }
6045 
6046 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6047 			    struct amdgpu_irq_src *source,
6048 			    struct amdgpu_iv_entry *entry)
6049 {
6050 	int i;
6051 	u8 me_id, pipe_id, queue_id;
6052 	struct amdgpu_ring *ring;
6053 
6054 	DRM_DEBUG("IH: CP EOP\n");
6055 	me_id = (entry->ring_id & 0x0c) >> 2;
6056 	pipe_id = (entry->ring_id & 0x03) >> 0;
6057 	queue_id = (entry->ring_id & 0x70) >> 4;
6058 
6059 	switch (me_id) {
6060 	case 0:
6061 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6062 		break;
6063 	case 1:
6064 	case 2:
6065 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6066 			ring = &adev->gfx.compute_ring[i];
6067 			/* Per-queue interrupt is supported for MEC starting from VI.
6068 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6069 			  */
6070 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6071 				amdgpu_fence_process(ring);
6072 		}
6073 		break;
6074 	}
6075 	return 0;
6076 }
6077 
6078 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6079 				 struct amdgpu_irq_src *source,
6080 				 struct amdgpu_iv_entry *entry)
6081 {
6082 	DRM_ERROR("Illegal register access in command stream\n");
6083 	schedule_work(&adev->reset_work);
6084 	return 0;
6085 }
6086 
6087 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6088 				  struct amdgpu_irq_src *source,
6089 				  struct amdgpu_iv_entry *entry)
6090 {
6091 	DRM_ERROR("Illegal instruction in command stream\n");
6092 	schedule_work(&adev->reset_work);
6093 	return 0;
6094 }
6095 
6096 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6097 	.name = "gfx_v8_0",
6098 	.early_init = gfx_v8_0_early_init,
6099 	.late_init = gfx_v8_0_late_init,
6100 	.sw_init = gfx_v8_0_sw_init,
6101 	.sw_fini = gfx_v8_0_sw_fini,
6102 	.hw_init = gfx_v8_0_hw_init,
6103 	.hw_fini = gfx_v8_0_hw_fini,
6104 	.suspend = gfx_v8_0_suspend,
6105 	.resume = gfx_v8_0_resume,
6106 	.is_idle = gfx_v8_0_is_idle,
6107 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6108 	.soft_reset = gfx_v8_0_soft_reset,
6109 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6110 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6111 };
6112 
6113 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6114 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6115 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6116 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6117 	.parse_cs = NULL,
6118 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6119 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6120 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6121 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6122 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6123 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6124 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6125 	.test_ring = gfx_v8_0_ring_test_ring,
6126 	.test_ib = gfx_v8_0_ring_test_ib,
6127 	.insert_nop = amdgpu_ring_insert_nop,
6128 	.pad_ib = amdgpu_ring_generic_pad_ib,
6129 };
6130 
6131 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6132 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
6133 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6134 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6135 	.parse_cs = NULL,
6136 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6137 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6138 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6139 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6140 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6141 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6142 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6143 	.test_ring = gfx_v8_0_ring_test_ring,
6144 	.test_ib = gfx_v8_0_ring_test_ib,
6145 	.insert_nop = amdgpu_ring_insert_nop,
6146 	.pad_ib = amdgpu_ring_generic_pad_ib,
6147 };
6148 
6149 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6150 {
6151 	int i;
6152 
6153 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6154 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6155 
6156 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6157 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6158 }
6159 
6160 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6161 	.set = gfx_v8_0_set_eop_interrupt_state,
6162 	.process = gfx_v8_0_eop_irq,
6163 };
6164 
6165 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6166 	.set = gfx_v8_0_set_priv_reg_fault_state,
6167 	.process = gfx_v8_0_priv_reg_irq,
6168 };
6169 
6170 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6171 	.set = gfx_v8_0_set_priv_inst_fault_state,
6172 	.process = gfx_v8_0_priv_inst_irq,
6173 };
6174 
6175 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6176 {
6177 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6178 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6179 
6180 	adev->gfx.priv_reg_irq.num_types = 1;
6181 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6182 
6183 	adev->gfx.priv_inst_irq.num_types = 1;
6184 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6185 }
6186 
6187 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6188 {
6189 	switch (adev->asic_type) {
6190 	case CHIP_TOPAZ:
6191 	case CHIP_STONEY:
6192 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6193 		break;
6194 	case CHIP_CARRIZO:
6195 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6196 		break;
6197 	default:
6198 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6199 		break;
6200 	}
6201 }
6202 
6203 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6204 {
6205 	/* init asci gds info */
6206 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6207 	adev->gds.gws.total_size = 64;
6208 	adev->gds.oa.total_size = 16;
6209 
6210 	if (adev->gds.mem.total_size == 64 * 1024) {
6211 		adev->gds.mem.gfx_partition_size = 4096;
6212 		adev->gds.mem.cs_partition_size = 4096;
6213 
6214 		adev->gds.gws.gfx_partition_size = 4;
6215 		adev->gds.gws.cs_partition_size = 4;
6216 
6217 		adev->gds.oa.gfx_partition_size = 4;
6218 		adev->gds.oa.cs_partition_size = 1;
6219 	} else {
6220 		adev->gds.mem.gfx_partition_size = 1024;
6221 		adev->gds.mem.cs_partition_size = 1024;
6222 
6223 		adev->gds.gws.gfx_partition_size = 16;
6224 		adev->gds.gws.cs_partition_size = 16;
6225 
6226 		adev->gds.oa.gfx_partition_size = 4;
6227 		adev->gds.oa.cs_partition_size = 4;
6228 	}
6229 }
6230 
6231 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6232 {
6233 	u32 data, mask;
6234 
6235 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6236 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6237 
6238 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6239 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6240 
6241 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6242 
6243 	return (~data) & mask;
6244 }
6245 
6246 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6247 {
6248 	int i, j, k, counter, active_cu_number = 0;
6249 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6250 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6251 
6252 	memset(cu_info, 0, sizeof(*cu_info));
6253 
6254 	mutex_lock(&adev->grbm_idx_mutex);
6255 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6256 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6257 			mask = 1;
6258 			ao_bitmap = 0;
6259 			counter = 0;
6260 			gfx_v8_0_select_se_sh(adev, i, j);
6261 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6262 			cu_info->bitmap[i][j] = bitmap;
6263 
6264 			for (k = 0; k < 16; k ++) {
6265 				if (bitmap & mask) {
6266 					if (counter < 2)
6267 						ao_bitmap |= mask;
6268 					counter ++;
6269 				}
6270 				mask <<= 1;
6271 			}
6272 			active_cu_number += counter;
6273 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6274 		}
6275 	}
6276 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6277 	mutex_unlock(&adev->grbm_idx_mutex);
6278 
6279 	cu_info->number = active_cu_number;
6280 	cu_info->ao_cu_mask = ao_cu_mask;
6281 }
6282