xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34 
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37 
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40 
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #include "smu/smu_7_1_3_d.h"
53 
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56 
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 
62 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78 
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82 
83 /* BPM Register Address*/
84 enum {
85 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90 	BPM_REG_FGCG_MAX
91 };
92 
93 #define RLC_FormatDirectRegListLength        14
94 
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 
142 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
143 {
144 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
145 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
146 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
147 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
148 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
149 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
150 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
151 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
152 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
153 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
154 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
155 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
156 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
157 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
158 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
159 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
160 };
161 
162 static const u32 golden_settings_tonga_a11[] =
163 {
164 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
165 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
166 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
167 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
168 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
169 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
170 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
171 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
172 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
173 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
174 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
175 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
176 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
177 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
178 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
179 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
180 };
181 
182 static const u32 tonga_golden_common_all[] =
183 {
184 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
185 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
186 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
187 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
188 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
189 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
190 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
191 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
192 };
193 
194 static const u32 tonga_mgcg_cgcg_init[] =
195 {
196 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
197 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
198 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
201 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
203 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
207 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
212 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
216 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
217 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
218 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
219 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
220 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
221 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
222 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
223 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
224 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
225 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
226 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
227 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
245 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
250 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
255 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
260 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
265 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
268 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
269 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
270 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
271 };
272 
273 static const u32 golden_settings_polaris11_a11[] =
274 {
275 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
276 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
277 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
278 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
279 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
280 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
281 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
282 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
283 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
284 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
285 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
286 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
287 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
288 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
289 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
290 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
291 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
292 };
293 
294 static const u32 polaris11_golden_common_all[] =
295 {
296 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
297 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
298 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
299 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
300 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
301 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
302 };
303 
304 static const u32 golden_settings_polaris10_a11[] =
305 {
306 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
307 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
308 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
309 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
310 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
311 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
312 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
313 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
314 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
315 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
316 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
317 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
318 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
319 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
320 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
321 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
322 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324 
325 static const u32 polaris10_golden_common_all[] =
326 {
327 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
329 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
330 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
331 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
332 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
333 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
334 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
335 };
336 
337 static const u32 fiji_golden_common_all[] =
338 {
339 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
341 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
342 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
343 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
347 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
348 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
349 };
350 
351 static const u32 golden_settings_fiji_a10[] =
352 {
353 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
354 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
355 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
356 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
357 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
358 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
359 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
360 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
361 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
362 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
363 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
364 };
365 
366 static const u32 fiji_mgcg_cgcg_init[] =
367 {
368 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
369 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
373 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
375 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
379 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
384 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
388 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
389 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
390 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
391 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
392 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
393 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
394 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
395 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
396 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
397 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
398 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
399 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
400 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
401 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
402 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
403 };
404 
405 static const u32 golden_settings_iceland_a11[] =
406 {
407 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
408 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
409 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
410 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
411 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
412 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
413 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
414 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
415 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
416 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
417 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
418 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
419 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
420 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
421 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
422 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
423 };
424 
425 static const u32 iceland_golden_common_all[] =
426 {
427 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
428 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
429 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
430 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
431 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
432 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
433 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
434 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
435 };
436 
437 static const u32 iceland_mgcg_cgcg_init[] =
438 {
439 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
440 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
441 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
445 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
446 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
450 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
455 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
459 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
460 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
461 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
462 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
464 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
465 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
466 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
467 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
468 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
469 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
470 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
473 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
478 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
483 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
488 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
491 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
492 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
493 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
494 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
495 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
496 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
497 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
498 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
499 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
500 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
501 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
502 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
503 };
504 
505 static const u32 cz_golden_settings_a11[] =
506 {
507 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
508 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
509 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
510 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
511 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
512 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
513 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
514 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
515 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
516 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
517 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
518 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
519 };
520 
521 static const u32 cz_golden_common_all[] =
522 {
523 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
524 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
525 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
526 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
527 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
528 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
529 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
530 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
531 };
532 
533 static const u32 cz_mgcg_cgcg_init[] =
534 {
535 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
536 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
537 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
542 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
546 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
551 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
555 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
556 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
557 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
558 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
559 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
560 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
561 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
562 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
563 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
564 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
565 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
566 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
567 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
568 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
569 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
570 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
571 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
572 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
573 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
574 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
575 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
576 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
577 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
578 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
579 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
580 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
581 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
582 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
583 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
584 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
585 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
586 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
587 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
588 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
589 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
590 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
591 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
592 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
593 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
594 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
595 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
596 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
597 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
598 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
599 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
600 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
601 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
602 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
603 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
604 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
605 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
606 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
607 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
608 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
609 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
610 };
611 
612 static const u32 stoney_golden_settings_a11[] =
613 {
614 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
615 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
616 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
617 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
618 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
619 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
620 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
621 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
622 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
623 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
624 };
625 
626 static const u32 stoney_golden_common_all[] =
627 {
628 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
630 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
631 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
632 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
633 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
634 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
635 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
636 };
637 
638 static const u32 stoney_mgcg_cgcg_init[] =
639 {
640 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
641 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
642 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
643 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
644 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
645 };
646 
647 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
650 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
651 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
652 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
653 
654 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
655 {
656 	switch (adev->asic_type) {
657 	case CHIP_TOPAZ:
658 		amdgpu_program_register_sequence(adev,
659 						 iceland_mgcg_cgcg_init,
660 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
661 		amdgpu_program_register_sequence(adev,
662 						 golden_settings_iceland_a11,
663 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
664 		amdgpu_program_register_sequence(adev,
665 						 iceland_golden_common_all,
666 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
667 		break;
668 	case CHIP_FIJI:
669 		amdgpu_program_register_sequence(adev,
670 						 fiji_mgcg_cgcg_init,
671 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
672 		amdgpu_program_register_sequence(adev,
673 						 golden_settings_fiji_a10,
674 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
675 		amdgpu_program_register_sequence(adev,
676 						 fiji_golden_common_all,
677 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
678 		break;
679 
680 	case CHIP_TONGA:
681 		amdgpu_program_register_sequence(adev,
682 						 tonga_mgcg_cgcg_init,
683 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
684 		amdgpu_program_register_sequence(adev,
685 						 golden_settings_tonga_a11,
686 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
687 		amdgpu_program_register_sequence(adev,
688 						 tonga_golden_common_all,
689 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
690 		break;
691 	case CHIP_POLARIS11:
692 		amdgpu_program_register_sequence(adev,
693 						 golden_settings_polaris11_a11,
694 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
695 		amdgpu_program_register_sequence(adev,
696 						 polaris11_golden_common_all,
697 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
698 		break;
699 	case CHIP_POLARIS10:
700 		amdgpu_program_register_sequence(adev,
701 						 golden_settings_polaris10_a11,
702 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
703 		amdgpu_program_register_sequence(adev,
704 						 polaris10_golden_common_all,
705 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
706 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
707 		if (adev->pdev->revision == 0xc7 &&
708 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
709 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
710 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
711 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
712 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
713 		}
714 		break;
715 	case CHIP_CARRIZO:
716 		amdgpu_program_register_sequence(adev,
717 						 cz_mgcg_cgcg_init,
718 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
719 		amdgpu_program_register_sequence(adev,
720 						 cz_golden_settings_a11,
721 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
722 		amdgpu_program_register_sequence(adev,
723 						 cz_golden_common_all,
724 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
725 		break;
726 	case CHIP_STONEY:
727 		amdgpu_program_register_sequence(adev,
728 						 stoney_mgcg_cgcg_init,
729 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
730 		amdgpu_program_register_sequence(adev,
731 						 stoney_golden_settings_a11,
732 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
733 		amdgpu_program_register_sequence(adev,
734 						 stoney_golden_common_all,
735 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
736 		break;
737 	default:
738 		break;
739 	}
740 }
741 
742 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
743 {
744 	int i;
745 
746 	adev->gfx.scratch.num_reg = 7;
747 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
748 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
749 		adev->gfx.scratch.free[i] = true;
750 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
751 	}
752 }
753 
754 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
755 {
756 	struct amdgpu_device *adev = ring->adev;
757 	uint32_t scratch;
758 	uint32_t tmp = 0;
759 	unsigned i;
760 	int r;
761 
762 	r = amdgpu_gfx_scratch_get(adev, &scratch);
763 	if (r) {
764 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
765 		return r;
766 	}
767 	WREG32(scratch, 0xCAFEDEAD);
768 	r = amdgpu_ring_alloc(ring, 3);
769 	if (r) {
770 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
771 			  ring->idx, r);
772 		amdgpu_gfx_scratch_free(adev, scratch);
773 		return r;
774 	}
775 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
776 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
777 	amdgpu_ring_write(ring, 0xDEADBEEF);
778 	amdgpu_ring_commit(ring);
779 
780 	for (i = 0; i < adev->usec_timeout; i++) {
781 		tmp = RREG32(scratch);
782 		if (tmp == 0xDEADBEEF)
783 			break;
784 		DRM_UDELAY(1);
785 	}
786 	if (i < adev->usec_timeout) {
787 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
788 			 ring->idx, i);
789 	} else {
790 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
791 			  ring->idx, scratch, tmp);
792 		r = -EINVAL;
793 	}
794 	amdgpu_gfx_scratch_free(adev, scratch);
795 	return r;
796 }
797 
798 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
799 {
800 	struct amdgpu_device *adev = ring->adev;
801 	struct amdgpu_ib ib;
802 	struct dma_fence *f = NULL;
803 	uint32_t scratch;
804 	uint32_t tmp = 0;
805 	long r;
806 
807 	r = amdgpu_gfx_scratch_get(adev, &scratch);
808 	if (r) {
809 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
810 		return r;
811 	}
812 	WREG32(scratch, 0xCAFEDEAD);
813 	memset(&ib, 0, sizeof(ib));
814 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
815 	if (r) {
816 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
817 		goto err1;
818 	}
819 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
820 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
821 	ib.ptr[2] = 0xDEADBEEF;
822 	ib.length_dw = 3;
823 
824 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
825 	if (r)
826 		goto err2;
827 
828 	r = dma_fence_wait_timeout(f, false, timeout);
829 	if (r == 0) {
830 		DRM_ERROR("amdgpu: IB test timed out.\n");
831 		r = -ETIMEDOUT;
832 		goto err2;
833 	} else if (r < 0) {
834 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
835 		goto err2;
836 	}
837 	tmp = RREG32(scratch);
838 	if (tmp == 0xDEADBEEF) {
839 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
840 		r = 0;
841 	} else {
842 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
843 			  scratch, tmp);
844 		r = -EINVAL;
845 	}
846 err2:
847 	amdgpu_ib_free(adev, &ib, NULL);
848 	dma_fence_put(f);
849 err1:
850 	amdgpu_gfx_scratch_free(adev, scratch);
851 	return r;
852 }
853 
854 
855 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
856 	release_firmware(adev->gfx.pfp_fw);
857 	adev->gfx.pfp_fw = NULL;
858 	release_firmware(adev->gfx.me_fw);
859 	adev->gfx.me_fw = NULL;
860 	release_firmware(adev->gfx.ce_fw);
861 	adev->gfx.ce_fw = NULL;
862 	release_firmware(adev->gfx.rlc_fw);
863 	adev->gfx.rlc_fw = NULL;
864 	release_firmware(adev->gfx.mec_fw);
865 	adev->gfx.mec_fw = NULL;
866 	if ((adev->asic_type != CHIP_STONEY) &&
867 	    (adev->asic_type != CHIP_TOPAZ))
868 		release_firmware(adev->gfx.mec2_fw);
869 	adev->gfx.mec2_fw = NULL;
870 
871 	kfree(adev->gfx.rlc.register_list_format);
872 }
873 
874 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
875 {
876 	const char *chip_name;
877 	char fw_name[30];
878 	int err;
879 	struct amdgpu_firmware_info *info = NULL;
880 	const struct common_firmware_header *header = NULL;
881 	const struct gfx_firmware_header_v1_0 *cp_hdr;
882 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
883 	unsigned int *tmp = NULL, i;
884 
885 	DRM_DEBUG("\n");
886 
887 	switch (adev->asic_type) {
888 	case CHIP_TOPAZ:
889 		chip_name = "topaz";
890 		break;
891 	case CHIP_TONGA:
892 		chip_name = "tonga";
893 		break;
894 	case CHIP_CARRIZO:
895 		chip_name = "carrizo";
896 		break;
897 	case CHIP_FIJI:
898 		chip_name = "fiji";
899 		break;
900 	case CHIP_POLARIS11:
901 		chip_name = "polaris11";
902 		break;
903 	case CHIP_POLARIS10:
904 		chip_name = "polaris10";
905 		break;
906 	case CHIP_STONEY:
907 		chip_name = "stoney";
908 		break;
909 	default:
910 		BUG();
911 	}
912 
913 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
914 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
915 	if (err)
916 		goto out;
917 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
918 	if (err)
919 		goto out;
920 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
921 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
922 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
923 
924 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
925 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
926 	if (err)
927 		goto out;
928 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
929 	if (err)
930 		goto out;
931 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
932 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934 
935 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
936 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
937 	if (err)
938 		goto out;
939 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
940 	if (err)
941 		goto out;
942 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
943 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945 
946 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
947 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
948 	if (err)
949 		goto out;
950 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
951 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
952 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
953 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
954 
955 	adev->gfx.rlc.save_and_restore_offset =
956 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
957 	adev->gfx.rlc.clear_state_descriptor_offset =
958 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
959 	adev->gfx.rlc.avail_scratch_ram_locations =
960 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
961 	adev->gfx.rlc.reg_restore_list_size =
962 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
963 	adev->gfx.rlc.reg_list_format_start =
964 			le32_to_cpu(rlc_hdr->reg_list_format_start);
965 	adev->gfx.rlc.reg_list_format_separate_start =
966 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
967 	adev->gfx.rlc.starting_offsets_start =
968 			le32_to_cpu(rlc_hdr->starting_offsets_start);
969 	adev->gfx.rlc.reg_list_format_size_bytes =
970 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
971 	adev->gfx.rlc.reg_list_size_bytes =
972 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
973 
974 	adev->gfx.rlc.register_list_format =
975 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
976 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
977 
978 	if (!adev->gfx.rlc.register_list_format) {
979 		err = -ENOMEM;
980 		goto out;
981 	}
982 
983 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
984 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
985 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
986 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
987 
988 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
989 
990 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
991 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
992 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
993 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
994 
995 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
996 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
997 	if (err)
998 		goto out;
999 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1000 	if (err)
1001 		goto out;
1002 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1003 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1004 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1005 
1006 	if ((adev->asic_type != CHIP_STONEY) &&
1007 	    (adev->asic_type != CHIP_TOPAZ)) {
1008 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1009 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1010 		if (!err) {
1011 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1012 			if (err)
1013 				goto out;
1014 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1015 				adev->gfx.mec2_fw->data;
1016 			adev->gfx.mec2_fw_version =
1017 				le32_to_cpu(cp_hdr->header.ucode_version);
1018 			adev->gfx.mec2_feature_version =
1019 				le32_to_cpu(cp_hdr->ucode_feature_version);
1020 		} else {
1021 			err = 0;
1022 			adev->gfx.mec2_fw = NULL;
1023 		}
1024 	}
1025 
1026 	if (adev->firmware.smu_load) {
1027 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1028 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1029 		info->fw = adev->gfx.pfp_fw;
1030 		header = (const struct common_firmware_header *)info->fw->data;
1031 		adev->firmware.fw_size +=
1032 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1033 
1034 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1035 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1036 		info->fw = adev->gfx.me_fw;
1037 		header = (const struct common_firmware_header *)info->fw->data;
1038 		adev->firmware.fw_size +=
1039 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1040 
1041 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1042 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1043 		info->fw = adev->gfx.ce_fw;
1044 		header = (const struct common_firmware_header *)info->fw->data;
1045 		adev->firmware.fw_size +=
1046 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1047 
1048 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1049 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1050 		info->fw = adev->gfx.rlc_fw;
1051 		header = (const struct common_firmware_header *)info->fw->data;
1052 		adev->firmware.fw_size +=
1053 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1054 
1055 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1056 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1057 		info->fw = adev->gfx.mec_fw;
1058 		header = (const struct common_firmware_header *)info->fw->data;
1059 		adev->firmware.fw_size +=
1060 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 
1062 		/* we need account JT in */
1063 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1064 		adev->firmware.fw_size +=
1065 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1066 
1067 		if (amdgpu_sriov_vf(adev)) {
1068 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1069 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1070 			info->fw = adev->gfx.mec_fw;
1071 			adev->firmware.fw_size +=
1072 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1073 		}
1074 
1075 		if (adev->gfx.mec2_fw) {
1076 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1077 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1078 			info->fw = adev->gfx.mec2_fw;
1079 			header = (const struct common_firmware_header *)info->fw->data;
1080 			adev->firmware.fw_size +=
1081 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1082 		}
1083 
1084 	}
1085 
1086 out:
1087 	if (err) {
1088 		dev_err(adev->dev,
1089 			"gfx8: Failed to load firmware \"%s\"\n",
1090 			fw_name);
1091 		release_firmware(adev->gfx.pfp_fw);
1092 		adev->gfx.pfp_fw = NULL;
1093 		release_firmware(adev->gfx.me_fw);
1094 		adev->gfx.me_fw = NULL;
1095 		release_firmware(adev->gfx.ce_fw);
1096 		adev->gfx.ce_fw = NULL;
1097 		release_firmware(adev->gfx.rlc_fw);
1098 		adev->gfx.rlc_fw = NULL;
1099 		release_firmware(adev->gfx.mec_fw);
1100 		adev->gfx.mec_fw = NULL;
1101 		release_firmware(adev->gfx.mec2_fw);
1102 		adev->gfx.mec2_fw = NULL;
1103 	}
1104 	return err;
1105 }
1106 
1107 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1108 				    volatile u32 *buffer)
1109 {
1110 	u32 count = 0, i;
1111 	const struct cs_section_def *sect = NULL;
1112 	const struct cs_extent_def *ext = NULL;
1113 
1114 	if (adev->gfx.rlc.cs_data == NULL)
1115 		return;
1116 	if (buffer == NULL)
1117 		return;
1118 
1119 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1120 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1121 
1122 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1123 	buffer[count++] = cpu_to_le32(0x80000000);
1124 	buffer[count++] = cpu_to_le32(0x80000000);
1125 
1126 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1127 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1128 			if (sect->id == SECT_CONTEXT) {
1129 				buffer[count++] =
1130 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1131 				buffer[count++] = cpu_to_le32(ext->reg_index -
1132 						PACKET3_SET_CONTEXT_REG_START);
1133 				for (i = 0; i < ext->reg_count; i++)
1134 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1135 			} else {
1136 				return;
1137 			}
1138 		}
1139 	}
1140 
1141 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1142 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1143 			PACKET3_SET_CONTEXT_REG_START);
1144 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1145 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1146 
1147 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1148 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149 
1150 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1151 	buffer[count++] = cpu_to_le32(0);
1152 }
1153 
1154 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155 {
1156 	const __le32 *fw_data;
1157 	volatile u32 *dst_ptr;
1158 	int me, i, max_me = 4;
1159 	u32 bo_offset = 0;
1160 	u32 table_offset, table_size;
1161 
1162 	if (adev->asic_type == CHIP_CARRIZO)
1163 		max_me = 5;
1164 
1165 	/* write the cp table buffer */
1166 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1167 	for (me = 0; me < max_me; me++) {
1168 		if (me == 0) {
1169 			const struct gfx_firmware_header_v1_0 *hdr =
1170 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1171 			fw_data = (const __le32 *)
1172 				(adev->gfx.ce_fw->data +
1173 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174 			table_offset = le32_to_cpu(hdr->jt_offset);
1175 			table_size = le32_to_cpu(hdr->jt_size);
1176 		} else if (me == 1) {
1177 			const struct gfx_firmware_header_v1_0 *hdr =
1178 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1179 			fw_data = (const __le32 *)
1180 				(adev->gfx.pfp_fw->data +
1181 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1182 			table_offset = le32_to_cpu(hdr->jt_offset);
1183 			table_size = le32_to_cpu(hdr->jt_size);
1184 		} else if (me == 2) {
1185 			const struct gfx_firmware_header_v1_0 *hdr =
1186 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1187 			fw_data = (const __le32 *)
1188 				(adev->gfx.me_fw->data +
1189 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1190 			table_offset = le32_to_cpu(hdr->jt_offset);
1191 			table_size = le32_to_cpu(hdr->jt_size);
1192 		} else if (me == 3) {
1193 			const struct gfx_firmware_header_v1_0 *hdr =
1194 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1195 			fw_data = (const __le32 *)
1196 				(adev->gfx.mec_fw->data +
1197 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1198 			table_offset = le32_to_cpu(hdr->jt_offset);
1199 			table_size = le32_to_cpu(hdr->jt_size);
1200 		} else  if (me == 4) {
1201 			const struct gfx_firmware_header_v1_0 *hdr =
1202 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1203 			fw_data = (const __le32 *)
1204 				(adev->gfx.mec2_fw->data +
1205 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1206 			table_offset = le32_to_cpu(hdr->jt_offset);
1207 			table_size = le32_to_cpu(hdr->jt_size);
1208 		}
1209 
1210 		for (i = 0; i < table_size; i ++) {
1211 			dst_ptr[bo_offset + i] =
1212 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1213 		}
1214 
1215 		bo_offset += table_size;
1216 	}
1217 }
1218 
1219 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1220 {
1221 	int r;
1222 
1223 	/* clear state block */
1224 	if (adev->gfx.rlc.clear_state_obj) {
1225 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1226 		if (unlikely(r != 0))
1227 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1228 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1229 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1230 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1231 		adev->gfx.rlc.clear_state_obj = NULL;
1232 	}
1233 
1234 	/* jump table block */
1235 	if (adev->gfx.rlc.cp_table_obj) {
1236 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1237 		if (unlikely(r != 0))
1238 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1239 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1240 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1241 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1242 		adev->gfx.rlc.cp_table_obj = NULL;
1243 	}
1244 }
1245 
1246 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1247 {
1248 	volatile u32 *dst_ptr;
1249 	u32 dws;
1250 	const struct cs_section_def *cs_data;
1251 	int r;
1252 
1253 	adev->gfx.rlc.cs_data = vi_cs_data;
1254 
1255 	cs_data = adev->gfx.rlc.cs_data;
1256 
1257 	if (cs_data) {
1258 		/* clear state block */
1259 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1260 
1261 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1262 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1263 					     AMDGPU_GEM_DOMAIN_VRAM,
1264 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1265 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1266 					     NULL, NULL,
1267 					     &adev->gfx.rlc.clear_state_obj);
1268 			if (r) {
1269 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1270 				gfx_v8_0_rlc_fini(adev);
1271 				return r;
1272 			}
1273 		}
1274 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1275 		if (unlikely(r != 0)) {
1276 			gfx_v8_0_rlc_fini(adev);
1277 			return r;
1278 		}
1279 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1280 				  &adev->gfx.rlc.clear_state_gpu_addr);
1281 		if (r) {
1282 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1283 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1284 			gfx_v8_0_rlc_fini(adev);
1285 			return r;
1286 		}
1287 
1288 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1289 		if (r) {
1290 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1291 			gfx_v8_0_rlc_fini(adev);
1292 			return r;
1293 		}
1294 		/* set up the cs buffer */
1295 		dst_ptr = adev->gfx.rlc.cs_ptr;
1296 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1297 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1298 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1299 	}
1300 
1301 	if ((adev->asic_type == CHIP_CARRIZO) ||
1302 	    (adev->asic_type == CHIP_STONEY)) {
1303 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1304 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1305 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1306 					     AMDGPU_GEM_DOMAIN_VRAM,
1307 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1308 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1309 					     NULL, NULL,
1310 					     &adev->gfx.rlc.cp_table_obj);
1311 			if (r) {
1312 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1313 				return r;
1314 			}
1315 		}
1316 
1317 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1318 		if (unlikely(r != 0)) {
1319 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1320 			return r;
1321 		}
1322 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1323 				  &adev->gfx.rlc.cp_table_gpu_addr);
1324 		if (r) {
1325 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1326 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1327 			return r;
1328 		}
1329 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330 		if (r) {
1331 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1332 			return r;
1333 		}
1334 
1335 		cz_init_cp_jump_table(adev);
1336 
1337 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1338 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1339 	}
1340 
1341 	return 0;
1342 }
1343 
1344 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1345 {
1346 	int r;
1347 
1348 	if (adev->gfx.mec.hpd_eop_obj) {
1349 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1350 		if (unlikely(r != 0))
1351 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1352 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1353 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1354 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1355 		adev->gfx.mec.hpd_eop_obj = NULL;
1356 	}
1357 }
1358 
1359 #define MEC_HPD_SIZE 2048
1360 
1361 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1362 {
1363 	int r;
1364 	u32 *hpd;
1365 
1366 	/*
1367 	 * we assign only 1 pipe because all other pipes will
1368 	 * be handled by KFD
1369 	 */
1370 	adev->gfx.mec.num_mec = 1;
1371 	adev->gfx.mec.num_pipe = 1;
1372 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1373 
1374 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1375 		r = amdgpu_bo_create(adev,
1376 				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1377 				     PAGE_SIZE, true,
1378 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1379 				     &adev->gfx.mec.hpd_eop_obj);
1380 		if (r) {
1381 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1382 			return r;
1383 		}
1384 	}
1385 
1386 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1387 	if (unlikely(r != 0)) {
1388 		gfx_v8_0_mec_fini(adev);
1389 		return r;
1390 	}
1391 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1392 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1393 	if (r) {
1394 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1395 		gfx_v8_0_mec_fini(adev);
1396 		return r;
1397 	}
1398 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1399 	if (r) {
1400 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1401 		gfx_v8_0_mec_fini(adev);
1402 		return r;
1403 	}
1404 
1405 	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1406 
1407 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1408 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1409 
1410 	return 0;
1411 }
1412 
1413 static const u32 vgpr_init_compute_shader[] =
1414 {
1415 	0x7e000209, 0x7e020208,
1416 	0x7e040207, 0x7e060206,
1417 	0x7e080205, 0x7e0a0204,
1418 	0x7e0c0203, 0x7e0e0202,
1419 	0x7e100201, 0x7e120200,
1420 	0x7e140209, 0x7e160208,
1421 	0x7e180207, 0x7e1a0206,
1422 	0x7e1c0205, 0x7e1e0204,
1423 	0x7e200203, 0x7e220202,
1424 	0x7e240201, 0x7e260200,
1425 	0x7e280209, 0x7e2a0208,
1426 	0x7e2c0207, 0x7e2e0206,
1427 	0x7e300205, 0x7e320204,
1428 	0x7e340203, 0x7e360202,
1429 	0x7e380201, 0x7e3a0200,
1430 	0x7e3c0209, 0x7e3e0208,
1431 	0x7e400207, 0x7e420206,
1432 	0x7e440205, 0x7e460204,
1433 	0x7e480203, 0x7e4a0202,
1434 	0x7e4c0201, 0x7e4e0200,
1435 	0x7e500209, 0x7e520208,
1436 	0x7e540207, 0x7e560206,
1437 	0x7e580205, 0x7e5a0204,
1438 	0x7e5c0203, 0x7e5e0202,
1439 	0x7e600201, 0x7e620200,
1440 	0x7e640209, 0x7e660208,
1441 	0x7e680207, 0x7e6a0206,
1442 	0x7e6c0205, 0x7e6e0204,
1443 	0x7e700203, 0x7e720202,
1444 	0x7e740201, 0x7e760200,
1445 	0x7e780209, 0x7e7a0208,
1446 	0x7e7c0207, 0x7e7e0206,
1447 	0xbf8a0000, 0xbf810000,
1448 };
1449 
1450 static const u32 sgpr_init_compute_shader[] =
1451 {
1452 	0xbe8a0100, 0xbe8c0102,
1453 	0xbe8e0104, 0xbe900106,
1454 	0xbe920108, 0xbe940100,
1455 	0xbe960102, 0xbe980104,
1456 	0xbe9a0106, 0xbe9c0108,
1457 	0xbe9e0100, 0xbea00102,
1458 	0xbea20104, 0xbea40106,
1459 	0xbea60108, 0xbea80100,
1460 	0xbeaa0102, 0xbeac0104,
1461 	0xbeae0106, 0xbeb00108,
1462 	0xbeb20100, 0xbeb40102,
1463 	0xbeb60104, 0xbeb80106,
1464 	0xbeba0108, 0xbebc0100,
1465 	0xbebe0102, 0xbec00104,
1466 	0xbec20106, 0xbec40108,
1467 	0xbec60100, 0xbec80102,
1468 	0xbee60004, 0xbee70005,
1469 	0xbeea0006, 0xbeeb0007,
1470 	0xbee80008, 0xbee90009,
1471 	0xbefc0000, 0xbf8a0000,
1472 	0xbf810000, 0x00000000,
1473 };
1474 
1475 static const u32 vgpr_init_regs[] =
1476 {
1477 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1478 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1479 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1480 	mmCOMPUTE_NUM_THREAD_Y, 1,
1481 	mmCOMPUTE_NUM_THREAD_Z, 1,
1482 	mmCOMPUTE_PGM_RSRC2, 20,
1483 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1484 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1485 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1486 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1487 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1488 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1489 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1490 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1491 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1492 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1493 };
1494 
1495 static const u32 sgpr1_init_regs[] =
1496 {
1497 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1498 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1499 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1500 	mmCOMPUTE_NUM_THREAD_Y, 1,
1501 	mmCOMPUTE_NUM_THREAD_Z, 1,
1502 	mmCOMPUTE_PGM_RSRC2, 20,
1503 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1504 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1505 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1506 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1507 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1508 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1509 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1510 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1511 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1512 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1513 };
1514 
1515 static const u32 sgpr2_init_regs[] =
1516 {
1517 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1518 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1519 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1520 	mmCOMPUTE_NUM_THREAD_Y, 1,
1521 	mmCOMPUTE_NUM_THREAD_Z, 1,
1522 	mmCOMPUTE_PGM_RSRC2, 20,
1523 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1524 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1525 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1526 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1527 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1528 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1529 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1530 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1531 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1532 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1533 };
1534 
1535 static const u32 sec_ded_counter_registers[] =
1536 {
1537 	mmCPC_EDC_ATC_CNT,
1538 	mmCPC_EDC_SCRATCH_CNT,
1539 	mmCPC_EDC_UCODE_CNT,
1540 	mmCPF_EDC_ATC_CNT,
1541 	mmCPF_EDC_ROQ_CNT,
1542 	mmCPF_EDC_TAG_CNT,
1543 	mmCPG_EDC_ATC_CNT,
1544 	mmCPG_EDC_DMA_CNT,
1545 	mmCPG_EDC_TAG_CNT,
1546 	mmDC_EDC_CSINVOC_CNT,
1547 	mmDC_EDC_RESTORE_CNT,
1548 	mmDC_EDC_STATE_CNT,
1549 	mmGDS_EDC_CNT,
1550 	mmGDS_EDC_GRBM_CNT,
1551 	mmGDS_EDC_OA_DED,
1552 	mmSPI_EDC_CNT,
1553 	mmSQC_ATC_EDC_GATCL1_CNT,
1554 	mmSQC_EDC_CNT,
1555 	mmSQ_EDC_DED_CNT,
1556 	mmSQ_EDC_INFO,
1557 	mmSQ_EDC_SEC_CNT,
1558 	mmTCC_EDC_CNT,
1559 	mmTCP_ATC_EDC_GATCL1_CNT,
1560 	mmTCP_EDC_CNT,
1561 	mmTD_EDC_CNT
1562 };
1563 
1564 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1565 {
1566 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1567 	struct amdgpu_ib ib;
1568 	struct dma_fence *f = NULL;
1569 	int r, i;
1570 	u32 tmp;
1571 	unsigned total_size, vgpr_offset, sgpr_offset;
1572 	u64 gpu_addr;
1573 
1574 	/* only supported on CZ */
1575 	if (adev->asic_type != CHIP_CARRIZO)
1576 		return 0;
1577 
1578 	/* bail if the compute ring is not ready */
1579 	if (!ring->ready)
1580 		return 0;
1581 
1582 	tmp = RREG32(mmGB_EDC_MODE);
1583 	WREG32(mmGB_EDC_MODE, 0);
1584 
1585 	total_size =
1586 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1587 	total_size +=
1588 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589 	total_size +=
1590 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1591 	total_size = ALIGN(total_size, 256);
1592 	vgpr_offset = total_size;
1593 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1594 	sgpr_offset = total_size;
1595 	total_size += sizeof(sgpr_init_compute_shader);
1596 
1597 	/* allocate an indirect buffer to put the commands in */
1598 	memset(&ib, 0, sizeof(ib));
1599 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1600 	if (r) {
1601 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1602 		return r;
1603 	}
1604 
1605 	/* load the compute shaders */
1606 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1607 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1608 
1609 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1610 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1611 
1612 	/* init the ib length to 0 */
1613 	ib.length_dw = 0;
1614 
1615 	/* VGPR */
1616 	/* write the register state for the compute dispatch */
1617 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1618 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1619 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1620 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1621 	}
1622 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1623 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1624 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1625 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1626 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1627 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1628 
1629 	/* write dispatch packet */
1630 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1631 	ib.ptr[ib.length_dw++] = 8; /* x */
1632 	ib.ptr[ib.length_dw++] = 1; /* y */
1633 	ib.ptr[ib.length_dw++] = 1; /* z */
1634 	ib.ptr[ib.length_dw++] =
1635 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1636 
1637 	/* write CS partial flush packet */
1638 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1639 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1640 
1641 	/* SGPR1 */
1642 	/* write the register state for the compute dispatch */
1643 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1644 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1645 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1646 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1647 	}
1648 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1649 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1650 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1651 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1652 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1653 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1654 
1655 	/* write dispatch packet */
1656 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1657 	ib.ptr[ib.length_dw++] = 8; /* x */
1658 	ib.ptr[ib.length_dw++] = 1; /* y */
1659 	ib.ptr[ib.length_dw++] = 1; /* z */
1660 	ib.ptr[ib.length_dw++] =
1661 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1662 
1663 	/* write CS partial flush packet */
1664 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1665 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1666 
1667 	/* SGPR2 */
1668 	/* write the register state for the compute dispatch */
1669 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1670 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1671 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1672 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1673 	}
1674 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1675 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1676 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1677 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1678 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1679 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1680 
1681 	/* write dispatch packet */
1682 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1683 	ib.ptr[ib.length_dw++] = 8; /* x */
1684 	ib.ptr[ib.length_dw++] = 1; /* y */
1685 	ib.ptr[ib.length_dw++] = 1; /* z */
1686 	ib.ptr[ib.length_dw++] =
1687 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1688 
1689 	/* write CS partial flush packet */
1690 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1691 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1692 
1693 	/* shedule the ib on the ring */
1694 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1695 	if (r) {
1696 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1697 		goto fail;
1698 	}
1699 
1700 	/* wait for the GPU to finish processing the IB */
1701 	r = dma_fence_wait(f, false);
1702 	if (r) {
1703 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1704 		goto fail;
1705 	}
1706 
1707 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1708 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1709 	WREG32(mmGB_EDC_MODE, tmp);
1710 
1711 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1712 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1713 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1714 
1715 
1716 	/* read back registers to clear the counters */
1717 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1718 		RREG32(sec_ded_counter_registers[i]);
1719 
1720 fail:
1721 	amdgpu_ib_free(adev, &ib, NULL);
1722 	dma_fence_put(f);
1723 
1724 	return r;
1725 }
1726 
1727 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1728 {
1729 	u32 gb_addr_config;
1730 	u32 mc_shared_chmap, mc_arb_ramcfg;
1731 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1732 	u32 tmp;
1733 	int ret;
1734 
1735 	switch (adev->asic_type) {
1736 	case CHIP_TOPAZ:
1737 		adev->gfx.config.max_shader_engines = 1;
1738 		adev->gfx.config.max_tile_pipes = 2;
1739 		adev->gfx.config.max_cu_per_sh = 6;
1740 		adev->gfx.config.max_sh_per_se = 1;
1741 		adev->gfx.config.max_backends_per_se = 2;
1742 		adev->gfx.config.max_texture_channel_caches = 2;
1743 		adev->gfx.config.max_gprs = 256;
1744 		adev->gfx.config.max_gs_threads = 32;
1745 		adev->gfx.config.max_hw_contexts = 8;
1746 
1747 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1748 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1749 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1750 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1751 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1752 		break;
1753 	case CHIP_FIJI:
1754 		adev->gfx.config.max_shader_engines = 4;
1755 		adev->gfx.config.max_tile_pipes = 16;
1756 		adev->gfx.config.max_cu_per_sh = 16;
1757 		adev->gfx.config.max_sh_per_se = 1;
1758 		adev->gfx.config.max_backends_per_se = 4;
1759 		adev->gfx.config.max_texture_channel_caches = 16;
1760 		adev->gfx.config.max_gprs = 256;
1761 		adev->gfx.config.max_gs_threads = 32;
1762 		adev->gfx.config.max_hw_contexts = 8;
1763 
1764 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1765 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1766 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1767 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1768 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1769 		break;
1770 	case CHIP_POLARIS11:
1771 		ret = amdgpu_atombios_get_gfx_info(adev);
1772 		if (ret)
1773 			return ret;
1774 		adev->gfx.config.max_gprs = 256;
1775 		adev->gfx.config.max_gs_threads = 32;
1776 		adev->gfx.config.max_hw_contexts = 8;
1777 
1778 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1783 		break;
1784 	case CHIP_POLARIS10:
1785 		ret = amdgpu_atombios_get_gfx_info(adev);
1786 		if (ret)
1787 			return ret;
1788 		adev->gfx.config.max_gprs = 256;
1789 		adev->gfx.config.max_gs_threads = 32;
1790 		adev->gfx.config.max_hw_contexts = 8;
1791 
1792 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1793 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1794 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1795 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1796 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1797 		break;
1798 	case CHIP_TONGA:
1799 		adev->gfx.config.max_shader_engines = 4;
1800 		adev->gfx.config.max_tile_pipes = 8;
1801 		adev->gfx.config.max_cu_per_sh = 8;
1802 		adev->gfx.config.max_sh_per_se = 1;
1803 		adev->gfx.config.max_backends_per_se = 2;
1804 		adev->gfx.config.max_texture_channel_caches = 8;
1805 		adev->gfx.config.max_gprs = 256;
1806 		adev->gfx.config.max_gs_threads = 32;
1807 		adev->gfx.config.max_hw_contexts = 8;
1808 
1809 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1810 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1811 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1812 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1813 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1814 		break;
1815 	case CHIP_CARRIZO:
1816 		adev->gfx.config.max_shader_engines = 1;
1817 		adev->gfx.config.max_tile_pipes = 2;
1818 		adev->gfx.config.max_sh_per_se = 1;
1819 		adev->gfx.config.max_backends_per_se = 2;
1820 
1821 		switch (adev->pdev->revision) {
1822 		case 0xc4:
1823 		case 0x84:
1824 		case 0xc8:
1825 		case 0xcc:
1826 		case 0xe1:
1827 		case 0xe3:
1828 			/* B10 */
1829 			adev->gfx.config.max_cu_per_sh = 8;
1830 			break;
1831 		case 0xc5:
1832 		case 0x81:
1833 		case 0x85:
1834 		case 0xc9:
1835 		case 0xcd:
1836 		case 0xe2:
1837 		case 0xe4:
1838 			/* B8 */
1839 			adev->gfx.config.max_cu_per_sh = 6;
1840 			break;
1841 		case 0xc6:
1842 		case 0xca:
1843 		case 0xce:
1844 		case 0x88:
1845 			/* B6 */
1846 			adev->gfx.config.max_cu_per_sh = 6;
1847 			break;
1848 		case 0xc7:
1849 		case 0x87:
1850 		case 0xcb:
1851 		case 0xe5:
1852 		case 0x89:
1853 		default:
1854 			/* B4 */
1855 			adev->gfx.config.max_cu_per_sh = 4;
1856 			break;
1857 		}
1858 
1859 		adev->gfx.config.max_texture_channel_caches = 2;
1860 		adev->gfx.config.max_gprs = 256;
1861 		adev->gfx.config.max_gs_threads = 32;
1862 		adev->gfx.config.max_hw_contexts = 8;
1863 
1864 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1869 		break;
1870 	case CHIP_STONEY:
1871 		adev->gfx.config.max_shader_engines = 1;
1872 		adev->gfx.config.max_tile_pipes = 2;
1873 		adev->gfx.config.max_sh_per_se = 1;
1874 		adev->gfx.config.max_backends_per_se = 1;
1875 
1876 		switch (adev->pdev->revision) {
1877 		case 0xc0:
1878 		case 0xc1:
1879 		case 0xc2:
1880 		case 0xc4:
1881 		case 0xc8:
1882 		case 0xc9:
1883 			adev->gfx.config.max_cu_per_sh = 3;
1884 			break;
1885 		case 0xd0:
1886 		case 0xd1:
1887 		case 0xd2:
1888 		default:
1889 			adev->gfx.config.max_cu_per_sh = 2;
1890 			break;
1891 		}
1892 
1893 		adev->gfx.config.max_texture_channel_caches = 2;
1894 		adev->gfx.config.max_gprs = 256;
1895 		adev->gfx.config.max_gs_threads = 16;
1896 		adev->gfx.config.max_hw_contexts = 8;
1897 
1898 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1903 		break;
1904 	default:
1905 		adev->gfx.config.max_shader_engines = 2;
1906 		adev->gfx.config.max_tile_pipes = 4;
1907 		adev->gfx.config.max_cu_per_sh = 2;
1908 		adev->gfx.config.max_sh_per_se = 1;
1909 		adev->gfx.config.max_backends_per_se = 2;
1910 		adev->gfx.config.max_texture_channel_caches = 4;
1911 		adev->gfx.config.max_gprs = 256;
1912 		adev->gfx.config.max_gs_threads = 32;
1913 		adev->gfx.config.max_hw_contexts = 8;
1914 
1915 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1916 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1917 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1918 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1919 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1920 		break;
1921 	}
1922 
1923 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1924 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1925 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926 
1927 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1928 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1929 	if (adev->flags & AMD_IS_APU) {
1930 		/* Get memory bank mapping mode. */
1931 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1932 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1933 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934 
1935 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1936 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1937 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938 
1939 		/* Validate settings in case only one DIMM installed. */
1940 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1941 			dimm00_addr_map = 0;
1942 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1943 			dimm01_addr_map = 0;
1944 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1945 			dimm10_addr_map = 0;
1946 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1947 			dimm11_addr_map = 0;
1948 
1949 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1950 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1951 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1952 			adev->gfx.config.mem_row_size_in_kb = 2;
1953 		else
1954 			adev->gfx.config.mem_row_size_in_kb = 1;
1955 	} else {
1956 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1957 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1958 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1959 			adev->gfx.config.mem_row_size_in_kb = 4;
1960 	}
1961 
1962 	adev->gfx.config.shader_engine_tile_size = 32;
1963 	adev->gfx.config.num_gpus = 1;
1964 	adev->gfx.config.multi_gpu_tile_size = 64;
1965 
1966 	/* fix up row size */
1967 	switch (adev->gfx.config.mem_row_size_in_kb) {
1968 	case 1:
1969 	default:
1970 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1971 		break;
1972 	case 2:
1973 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1974 		break;
1975 	case 4:
1976 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1977 		break;
1978 	}
1979 	adev->gfx.config.gb_addr_config = gb_addr_config;
1980 
1981 	return 0;
1982 }
1983 
1984 static int gfx_v8_0_sw_init(void *handle)
1985 {
1986 	int i, r;
1987 	struct amdgpu_ring *ring;
1988 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1989 
1990 	/* EOP Event */
1991 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1992 	if (r)
1993 		return r;
1994 
1995 	/* Privileged reg */
1996 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1997 	if (r)
1998 		return r;
1999 
2000 	/* Privileged inst */
2001 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2002 	if (r)
2003 		return r;
2004 
2005 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2006 
2007 	gfx_v8_0_scratch_init(adev);
2008 
2009 	r = gfx_v8_0_init_microcode(adev);
2010 	if (r) {
2011 		DRM_ERROR("Failed to load gfx firmware!\n");
2012 		return r;
2013 	}
2014 
2015 	r = gfx_v8_0_rlc_init(adev);
2016 	if (r) {
2017 		DRM_ERROR("Failed to init rlc BOs!\n");
2018 		return r;
2019 	}
2020 
2021 	r = gfx_v8_0_mec_init(adev);
2022 	if (r) {
2023 		DRM_ERROR("Failed to init MEC BOs!\n");
2024 		return r;
2025 	}
2026 
2027 	/* set up the gfx ring */
2028 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2029 		ring = &adev->gfx.gfx_ring[i];
2030 		ring->ring_obj = NULL;
2031 		sprintf(ring->name, "gfx");
2032 		/* no gfx doorbells on iceland */
2033 		if (adev->asic_type != CHIP_TOPAZ) {
2034 			ring->use_doorbell = true;
2035 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2036 		}
2037 
2038 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2039 				     AMDGPU_CP_IRQ_GFX_EOP);
2040 		if (r)
2041 			return r;
2042 	}
2043 
2044 	/* set up the compute queues */
2045 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2046 		unsigned irq_type;
2047 
2048 		/* max 32 queues per MEC */
2049 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2050 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2051 			break;
2052 		}
2053 		ring = &adev->gfx.compute_ring[i];
2054 		ring->ring_obj = NULL;
2055 		ring->use_doorbell = true;
2056 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2057 		ring->me = 1; /* first MEC */
2058 		ring->pipe = i / 8;
2059 		ring->queue = i % 8;
2060 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2061 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2062 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2063 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2064 				     irq_type);
2065 		if (r)
2066 			return r;
2067 	}
2068 
2069 	/* reserve GDS, GWS and OA resource for gfx */
2070 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2071 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2072 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2073 	if (r)
2074 		return r;
2075 
2076 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2077 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2078 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2079 	if (r)
2080 		return r;
2081 
2082 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2083 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2084 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2085 	if (r)
2086 		return r;
2087 
2088 	adev->gfx.ce_ram_size = 0x8000;
2089 
2090 	r = gfx_v8_0_gpu_early_init(adev);
2091 	if (r)
2092 		return r;
2093 
2094 	return 0;
2095 }
2096 
2097 static int gfx_v8_0_sw_fini(void *handle)
2098 {
2099 	int i;
2100 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2101 
2102 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2103 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2104 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2105 
2106 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2107 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2108 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2109 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2110 
2111 	gfx_v8_0_mec_fini(adev);
2112 	gfx_v8_0_rlc_fini(adev);
2113 	gfx_v8_0_free_microcode(adev);
2114 
2115 	return 0;
2116 }
2117 
2118 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2119 {
2120 	uint32_t *modearray, *mod2array;
2121 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2122 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2123 	u32 reg_offset;
2124 
2125 	modearray = adev->gfx.config.tile_mode_array;
2126 	mod2array = adev->gfx.config.macrotile_mode_array;
2127 
2128 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2129 		modearray[reg_offset] = 0;
2130 
2131 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2132 		mod2array[reg_offset] = 0;
2133 
2134 	switch (adev->asic_type) {
2135 	case CHIP_TOPAZ:
2136 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2137 				PIPE_CONFIG(ADDR_SURF_P2) |
2138 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2139 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2140 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2141 				PIPE_CONFIG(ADDR_SURF_P2) |
2142 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2143 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2144 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145 				PIPE_CONFIG(ADDR_SURF_P2) |
2146 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2147 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2148 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2149 				PIPE_CONFIG(ADDR_SURF_P2) |
2150 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2151 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2152 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 				PIPE_CONFIG(ADDR_SURF_P2) |
2154 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2155 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2157 				PIPE_CONFIG(ADDR_SURF_P2) |
2158 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2159 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2161 				PIPE_CONFIG(ADDR_SURF_P2) |
2162 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2163 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2165 				PIPE_CONFIG(ADDR_SURF_P2));
2166 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2167 				PIPE_CONFIG(ADDR_SURF_P2) |
2168 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2169 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2170 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 				 PIPE_CONFIG(ADDR_SURF_P2) |
2172 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2174 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 				 PIPE_CONFIG(ADDR_SURF_P2) |
2176 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2177 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2178 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2179 				 PIPE_CONFIG(ADDR_SURF_P2) |
2180 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2182 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183 				 PIPE_CONFIG(ADDR_SURF_P2) |
2184 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2185 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2187 				 PIPE_CONFIG(ADDR_SURF_P2) |
2188 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191 				 PIPE_CONFIG(ADDR_SURF_P2) |
2192 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2193 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2195 				 PIPE_CONFIG(ADDR_SURF_P2) |
2196 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2198 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2199 				 PIPE_CONFIG(ADDR_SURF_P2) |
2200 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2201 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2202 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2203 				 PIPE_CONFIG(ADDR_SURF_P2) |
2204 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2205 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2206 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2207 				 PIPE_CONFIG(ADDR_SURF_P2) |
2208 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2209 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2210 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2211 				 PIPE_CONFIG(ADDR_SURF_P2) |
2212 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2213 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2215 				 PIPE_CONFIG(ADDR_SURF_P2) |
2216 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2219 				 PIPE_CONFIG(ADDR_SURF_P2) |
2220 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2223 				 PIPE_CONFIG(ADDR_SURF_P2) |
2224 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2227 				 PIPE_CONFIG(ADDR_SURF_P2) |
2228 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2229 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2230 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 				 PIPE_CONFIG(ADDR_SURF_P2) |
2232 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2233 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2235 				 PIPE_CONFIG(ADDR_SURF_P2) |
2236 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2237 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2238 
2239 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2240 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2241 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2242 				NUM_BANKS(ADDR_SURF_8_BANK));
2243 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2244 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2245 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246 				NUM_BANKS(ADDR_SURF_8_BANK));
2247 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2248 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2249 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250 				NUM_BANKS(ADDR_SURF_8_BANK));
2251 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2252 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2253 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2254 				NUM_BANKS(ADDR_SURF_8_BANK));
2255 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 				NUM_BANKS(ADDR_SURF_8_BANK));
2259 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262 				NUM_BANKS(ADDR_SURF_8_BANK));
2263 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2265 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266 				NUM_BANKS(ADDR_SURF_8_BANK));
2267 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2268 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2269 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270 				NUM_BANKS(ADDR_SURF_16_BANK));
2271 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2272 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274 				NUM_BANKS(ADDR_SURF_16_BANK));
2275 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2276 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2277 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2278 				 NUM_BANKS(ADDR_SURF_16_BANK));
2279 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2281 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 				 NUM_BANKS(ADDR_SURF_16_BANK));
2283 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 				 NUM_BANKS(ADDR_SURF_16_BANK));
2287 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2288 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2289 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290 				 NUM_BANKS(ADDR_SURF_16_BANK));
2291 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2293 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2294 				 NUM_BANKS(ADDR_SURF_8_BANK));
2295 
2296 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2297 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2298 			    reg_offset != 23)
2299 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2300 
2301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2302 			if (reg_offset != 7)
2303 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2304 
2305 		break;
2306 	case CHIP_FIJI:
2307 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2308 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2309 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2310 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2311 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2313 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2314 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2315 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2317 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2318 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2319 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2320 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2321 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2322 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2323 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2326 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2328 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2330 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2332 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2334 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2336 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2337 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2338 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2341 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2344 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2345 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2346 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2349 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2353 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2354 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2357 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2381 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2382 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2385 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2389 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2390 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2393 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2394 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2398 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2406 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2410 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2429 
2430 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2431 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2432 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2433 				NUM_BANKS(ADDR_SURF_8_BANK));
2434 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2435 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2436 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2437 				NUM_BANKS(ADDR_SURF_8_BANK));
2438 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2439 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2440 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2441 				NUM_BANKS(ADDR_SURF_8_BANK));
2442 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2443 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2444 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2445 				NUM_BANKS(ADDR_SURF_8_BANK));
2446 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2448 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2449 				NUM_BANKS(ADDR_SURF_8_BANK));
2450 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2452 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2453 				NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2456 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2457 				NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2460 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 				NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465 				NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 				 NUM_BANKS(ADDR_SURF_8_BANK));
2470 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 				 NUM_BANKS(ADDR_SURF_8_BANK));
2474 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2476 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 				 NUM_BANKS(ADDR_SURF_8_BANK));
2478 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 				 NUM_BANKS(ADDR_SURF_8_BANK));
2482 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2484 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 				 NUM_BANKS(ADDR_SURF_4_BANK));
2486 
2487 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2488 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2489 
2490 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2491 			if (reg_offset != 7)
2492 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2493 
2494 		break;
2495 	case CHIP_TONGA:
2496 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2498 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2499 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2500 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2502 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2503 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2504 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2506 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2507 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2508 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2510 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2511 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2512 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2515 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2517 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2519 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2523 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2525 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2526 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2527 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2530 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2533 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2534 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2537 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2538 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2540 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2541 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2542 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2544 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2545 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2546 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2549 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2566 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2570 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2571 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2574 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2578 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2579 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2582 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2583 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2587 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2595 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2599 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2603 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2605 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2607 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2609 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2611 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2613 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2614 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2615 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2616 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2617 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2618 
2619 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2621 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2622 				NUM_BANKS(ADDR_SURF_16_BANK));
2623 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2626 				NUM_BANKS(ADDR_SURF_16_BANK));
2627 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2630 				NUM_BANKS(ADDR_SURF_16_BANK));
2631 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2633 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2634 				NUM_BANKS(ADDR_SURF_16_BANK));
2635 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658 				 NUM_BANKS(ADDR_SURF_16_BANK));
2659 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2662 				 NUM_BANKS(ADDR_SURF_16_BANK));
2663 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666 				 NUM_BANKS(ADDR_SURF_8_BANK));
2667 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2669 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2670 				 NUM_BANKS(ADDR_SURF_4_BANK));
2671 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2673 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2674 				 NUM_BANKS(ADDR_SURF_4_BANK));
2675 
2676 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2677 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2678 
2679 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2680 			if (reg_offset != 7)
2681 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2682 
2683 		break;
2684 	case CHIP_POLARIS11:
2685 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2688 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2690 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2692 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2695 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2696 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2697 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2698 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2699 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2700 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2701 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2704 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2708 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2710 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2712 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2714 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2716 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2718 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2719 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2722 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2726 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2730 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2734 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2735 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2738 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2742 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2744 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2746 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2750 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2756 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2760 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2764 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2768 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2770 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2771 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2772 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2774 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2776 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2782 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2784 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2788 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2792 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2794 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2795 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2798 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2799 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2800 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2802 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2803 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2804 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2806 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2807 
2808 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2810 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811 				NUM_BANKS(ADDR_SURF_16_BANK));
2812 
2813 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2814 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2815 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2816 				NUM_BANKS(ADDR_SURF_16_BANK));
2817 
2818 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2819 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2820 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2821 				NUM_BANKS(ADDR_SURF_16_BANK));
2822 
2823 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2824 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2825 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2826 				NUM_BANKS(ADDR_SURF_16_BANK));
2827 
2828 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2831 				NUM_BANKS(ADDR_SURF_16_BANK));
2832 
2833 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 				NUM_BANKS(ADDR_SURF_16_BANK));
2837 
2838 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2840 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2841 				NUM_BANKS(ADDR_SURF_16_BANK));
2842 
2843 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2844 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2845 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846 				NUM_BANKS(ADDR_SURF_16_BANK));
2847 
2848 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2849 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2850 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2851 				NUM_BANKS(ADDR_SURF_16_BANK));
2852 
2853 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 				NUM_BANKS(ADDR_SURF_16_BANK));
2857 
2858 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2860 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2861 				NUM_BANKS(ADDR_SURF_16_BANK));
2862 
2863 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2864 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2865 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2866 				NUM_BANKS(ADDR_SURF_16_BANK));
2867 
2868 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2870 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2871 				NUM_BANKS(ADDR_SURF_8_BANK));
2872 
2873 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2875 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2876 				NUM_BANKS(ADDR_SURF_4_BANK));
2877 
2878 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2879 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2880 
2881 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2882 			if (reg_offset != 7)
2883 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2884 
2885 		break;
2886 	case CHIP_POLARIS10:
2887 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2890 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2893 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2894 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2896 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2897 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2898 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2899 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2900 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2901 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2902 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2903 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2906 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2908 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2910 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2912 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2914 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2916 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2917 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2918 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2920 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2921 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2924 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2926 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2928 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2932 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2937 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2940 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2944 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2946 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2948 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2952 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2958 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2962 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2966 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2970 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2972 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2973 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2974 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2976 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2978 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2979 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2984 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2986 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2990 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2994 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2997 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3000 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3001 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3002 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3004 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3005 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3006 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3007 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3009 
3010 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3011 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3012 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3013 				NUM_BANKS(ADDR_SURF_16_BANK));
3014 
3015 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3016 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3017 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018 				NUM_BANKS(ADDR_SURF_16_BANK));
3019 
3020 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3021 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3022 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3023 				NUM_BANKS(ADDR_SURF_16_BANK));
3024 
3025 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3027 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028 				NUM_BANKS(ADDR_SURF_16_BANK));
3029 
3030 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3032 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3033 				NUM_BANKS(ADDR_SURF_16_BANK));
3034 
3035 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3037 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3038 				NUM_BANKS(ADDR_SURF_16_BANK));
3039 
3040 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3043 				NUM_BANKS(ADDR_SURF_16_BANK));
3044 
3045 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3047 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048 				NUM_BANKS(ADDR_SURF_16_BANK));
3049 
3050 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3052 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3053 				NUM_BANKS(ADDR_SURF_16_BANK));
3054 
3055 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3057 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3058 				NUM_BANKS(ADDR_SURF_16_BANK));
3059 
3060 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3063 				NUM_BANKS(ADDR_SURF_16_BANK));
3064 
3065 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3067 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3068 				NUM_BANKS(ADDR_SURF_8_BANK));
3069 
3070 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3072 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3073 				NUM_BANKS(ADDR_SURF_4_BANK));
3074 
3075 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3077 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3078 				NUM_BANKS(ADDR_SURF_4_BANK));
3079 
3080 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3081 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3082 
3083 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3084 			if (reg_offset != 7)
3085 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3086 
3087 		break;
3088 	case CHIP_STONEY:
3089 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3090 				PIPE_CONFIG(ADDR_SURF_P2) |
3091 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3092 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3094 				PIPE_CONFIG(ADDR_SURF_P2) |
3095 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3096 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3097 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3098 				PIPE_CONFIG(ADDR_SURF_P2) |
3099 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3100 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3101 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3102 				PIPE_CONFIG(ADDR_SURF_P2) |
3103 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3104 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3105 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106 				PIPE_CONFIG(ADDR_SURF_P2) |
3107 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3108 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3110 				PIPE_CONFIG(ADDR_SURF_P2) |
3111 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3112 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3114 				PIPE_CONFIG(ADDR_SURF_P2) |
3115 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3116 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3118 				PIPE_CONFIG(ADDR_SURF_P2));
3119 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3120 				PIPE_CONFIG(ADDR_SURF_P2) |
3121 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3122 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3123 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 				 PIPE_CONFIG(ADDR_SURF_P2) |
3125 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3126 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3127 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128 				 PIPE_CONFIG(ADDR_SURF_P2) |
3129 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3130 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3131 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3132 				 PIPE_CONFIG(ADDR_SURF_P2) |
3133 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3134 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3136 				 PIPE_CONFIG(ADDR_SURF_P2) |
3137 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3138 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3140 				 PIPE_CONFIG(ADDR_SURF_P2) |
3141 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3142 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144 				 PIPE_CONFIG(ADDR_SURF_P2) |
3145 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3148 				 PIPE_CONFIG(ADDR_SURF_P2) |
3149 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3152 				 PIPE_CONFIG(ADDR_SURF_P2) |
3153 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3156 				 PIPE_CONFIG(ADDR_SURF_P2) |
3157 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3158 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3159 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3160 				 PIPE_CONFIG(ADDR_SURF_P2) |
3161 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3162 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3163 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3164 				 PIPE_CONFIG(ADDR_SURF_P2) |
3165 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3166 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3168 				 PIPE_CONFIG(ADDR_SURF_P2) |
3169 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3172 				 PIPE_CONFIG(ADDR_SURF_P2) |
3173 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3176 				 PIPE_CONFIG(ADDR_SURF_P2) |
3177 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3180 				 PIPE_CONFIG(ADDR_SURF_P2) |
3181 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3182 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3183 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3184 				 PIPE_CONFIG(ADDR_SURF_P2) |
3185 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3186 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3187 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3188 				 PIPE_CONFIG(ADDR_SURF_P2) |
3189 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3190 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3191 
3192 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3194 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3195 				NUM_BANKS(ADDR_SURF_8_BANK));
3196 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3197 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3198 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199 				NUM_BANKS(ADDR_SURF_8_BANK));
3200 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3201 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3202 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3203 				NUM_BANKS(ADDR_SURF_8_BANK));
3204 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3205 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3206 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3207 				NUM_BANKS(ADDR_SURF_8_BANK));
3208 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3210 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3211 				NUM_BANKS(ADDR_SURF_8_BANK));
3212 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3214 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3215 				NUM_BANKS(ADDR_SURF_8_BANK));
3216 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219 				NUM_BANKS(ADDR_SURF_8_BANK));
3220 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3221 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3222 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3223 				NUM_BANKS(ADDR_SURF_16_BANK));
3224 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3225 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3226 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3227 				NUM_BANKS(ADDR_SURF_16_BANK));
3228 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3229 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3230 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3231 				 NUM_BANKS(ADDR_SURF_16_BANK));
3232 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3233 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3234 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3235 				 NUM_BANKS(ADDR_SURF_16_BANK));
3236 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3238 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239 				 NUM_BANKS(ADDR_SURF_16_BANK));
3240 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3241 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3242 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243 				 NUM_BANKS(ADDR_SURF_16_BANK));
3244 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3245 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3246 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3247 				 NUM_BANKS(ADDR_SURF_8_BANK));
3248 
3249 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3250 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3251 			    reg_offset != 23)
3252 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3253 
3254 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3255 			if (reg_offset != 7)
3256 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3257 
3258 		break;
3259 	default:
3260 		dev_warn(adev->dev,
3261 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3262 			 adev->asic_type);
3263 
3264 	case CHIP_CARRIZO:
3265 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3266 				PIPE_CONFIG(ADDR_SURF_P2) |
3267 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3268 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3269 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3270 				PIPE_CONFIG(ADDR_SURF_P2) |
3271 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3272 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3273 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3274 				PIPE_CONFIG(ADDR_SURF_P2) |
3275 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3276 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3277 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3278 				PIPE_CONFIG(ADDR_SURF_P2) |
3279 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3280 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3281 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282 				PIPE_CONFIG(ADDR_SURF_P2) |
3283 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3284 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3286 				PIPE_CONFIG(ADDR_SURF_P2) |
3287 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3288 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3290 				PIPE_CONFIG(ADDR_SURF_P2) |
3291 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3292 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3294 				PIPE_CONFIG(ADDR_SURF_P2));
3295 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3296 				PIPE_CONFIG(ADDR_SURF_P2) |
3297 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3298 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3299 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300 				 PIPE_CONFIG(ADDR_SURF_P2) |
3301 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3302 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3303 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304 				 PIPE_CONFIG(ADDR_SURF_P2) |
3305 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3306 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3307 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3308 				 PIPE_CONFIG(ADDR_SURF_P2) |
3309 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3310 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3311 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3312 				 PIPE_CONFIG(ADDR_SURF_P2) |
3313 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3314 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3316 				 PIPE_CONFIG(ADDR_SURF_P2) |
3317 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3318 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320 				 PIPE_CONFIG(ADDR_SURF_P2) |
3321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3324 				 PIPE_CONFIG(ADDR_SURF_P2) |
3325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3327 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3328 				 PIPE_CONFIG(ADDR_SURF_P2) |
3329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3331 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3332 				 PIPE_CONFIG(ADDR_SURF_P2) |
3333 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3334 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3335 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3336 				 PIPE_CONFIG(ADDR_SURF_P2) |
3337 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3338 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3339 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3340 				 PIPE_CONFIG(ADDR_SURF_P2) |
3341 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3342 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3344 				 PIPE_CONFIG(ADDR_SURF_P2) |
3345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3348 				 PIPE_CONFIG(ADDR_SURF_P2) |
3349 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3352 				 PIPE_CONFIG(ADDR_SURF_P2) |
3353 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3356 				 PIPE_CONFIG(ADDR_SURF_P2) |
3357 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3358 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3359 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3360 				 PIPE_CONFIG(ADDR_SURF_P2) |
3361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3363 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3364 				 PIPE_CONFIG(ADDR_SURF_P2) |
3365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3367 
3368 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3369 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3370 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3371 				NUM_BANKS(ADDR_SURF_8_BANK));
3372 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3373 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3374 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3375 				NUM_BANKS(ADDR_SURF_8_BANK));
3376 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3379 				NUM_BANKS(ADDR_SURF_8_BANK));
3380 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3381 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3382 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3383 				NUM_BANKS(ADDR_SURF_8_BANK));
3384 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3386 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3387 				NUM_BANKS(ADDR_SURF_8_BANK));
3388 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3390 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3391 				NUM_BANKS(ADDR_SURF_8_BANK));
3392 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395 				NUM_BANKS(ADDR_SURF_8_BANK));
3396 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3397 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3398 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3399 				NUM_BANKS(ADDR_SURF_16_BANK));
3400 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3401 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3402 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3403 				NUM_BANKS(ADDR_SURF_16_BANK));
3404 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3405 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3406 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3407 				 NUM_BANKS(ADDR_SURF_16_BANK));
3408 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3409 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3410 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3411 				 NUM_BANKS(ADDR_SURF_16_BANK));
3412 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3414 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415 				 NUM_BANKS(ADDR_SURF_16_BANK));
3416 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3417 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3418 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419 				 NUM_BANKS(ADDR_SURF_16_BANK));
3420 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3421 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3422 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3423 				 NUM_BANKS(ADDR_SURF_8_BANK));
3424 
3425 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3426 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3427 			    reg_offset != 23)
3428 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3429 
3430 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3431 			if (reg_offset != 7)
3432 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3433 
3434 		break;
3435 	}
3436 }
3437 
3438 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3439 				  u32 se_num, u32 sh_num, u32 instance)
3440 {
3441 	u32 data;
3442 
3443 	if (instance == 0xffffffff)
3444 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3445 	else
3446 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3447 
3448 	if (se_num == 0xffffffff)
3449 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3450 	else
3451 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3452 
3453 	if (sh_num == 0xffffffff)
3454 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3455 	else
3456 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3457 
3458 	WREG32(mmGRBM_GFX_INDEX, data);
3459 }
3460 
3461 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3462 {
3463 	return (u32)((1ULL << bit_width) - 1);
3464 }
3465 
3466 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3467 {
3468 	u32 data, mask;
3469 
3470 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3471 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3472 
3473 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3474 
3475 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3476 				       adev->gfx.config.max_sh_per_se);
3477 
3478 	return (~data) & mask;
3479 }
3480 
3481 static void
3482 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3483 {
3484 	switch (adev->asic_type) {
3485 	case CHIP_FIJI:
3486 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3487 			  RB_XSEL2(1) | PKR_MAP(2) |
3488 			  PKR_XSEL(1) | PKR_YSEL(1) |
3489 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3490 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3491 			   SE_PAIR_YSEL(2);
3492 		break;
3493 	case CHIP_TONGA:
3494 	case CHIP_POLARIS10:
3495 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3496 			  SE_XSEL(1) | SE_YSEL(1);
3497 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3498 			   SE_PAIR_YSEL(2);
3499 		break;
3500 	case CHIP_TOPAZ:
3501 	case CHIP_CARRIZO:
3502 		*rconf |= RB_MAP_PKR0(2);
3503 		*rconf1 |= 0x0;
3504 		break;
3505 	case CHIP_POLARIS11:
3506 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3507 			  SE_XSEL(1) | SE_YSEL(1);
3508 		*rconf1 |= 0x0;
3509 		break;
3510 	case CHIP_STONEY:
3511 		*rconf |= 0x0;
3512 		*rconf1 |= 0x0;
3513 		break;
3514 	default:
3515 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3516 		break;
3517 	}
3518 }
3519 
3520 static void
3521 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3522 					u32 raster_config, u32 raster_config_1,
3523 					unsigned rb_mask, unsigned num_rb)
3524 {
3525 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3526 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3527 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3528 	unsigned rb_per_se = num_rb / num_se;
3529 	unsigned se_mask[4];
3530 	unsigned se;
3531 
3532 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3533 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3534 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3535 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3536 
3537 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3538 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3539 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3540 
3541 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3542 			     (!se_mask[2] && !se_mask[3]))) {
3543 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3544 
3545 		if (!se_mask[0] && !se_mask[1]) {
3546 			raster_config_1 |=
3547 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3548 		} else {
3549 			raster_config_1 |=
3550 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3551 		}
3552 	}
3553 
3554 	for (se = 0; se < num_se; se++) {
3555 		unsigned raster_config_se = raster_config;
3556 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3557 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3558 		int idx = (se / 2) * 2;
3559 
3560 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3561 			raster_config_se &= ~SE_MAP_MASK;
3562 
3563 			if (!se_mask[idx]) {
3564 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3565 			} else {
3566 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3567 			}
3568 		}
3569 
3570 		pkr0_mask &= rb_mask;
3571 		pkr1_mask &= rb_mask;
3572 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3573 			raster_config_se &= ~PKR_MAP_MASK;
3574 
3575 			if (!pkr0_mask) {
3576 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3577 			} else {
3578 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3579 			}
3580 		}
3581 
3582 		if (rb_per_se >= 2) {
3583 			unsigned rb0_mask = 1 << (se * rb_per_se);
3584 			unsigned rb1_mask = rb0_mask << 1;
3585 
3586 			rb0_mask &= rb_mask;
3587 			rb1_mask &= rb_mask;
3588 			if (!rb0_mask || !rb1_mask) {
3589 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3590 
3591 				if (!rb0_mask) {
3592 					raster_config_se |=
3593 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3594 				} else {
3595 					raster_config_se |=
3596 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3597 				}
3598 			}
3599 
3600 			if (rb_per_se > 2) {
3601 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3602 				rb1_mask = rb0_mask << 1;
3603 				rb0_mask &= rb_mask;
3604 				rb1_mask &= rb_mask;
3605 				if (!rb0_mask || !rb1_mask) {
3606 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3607 
3608 					if (!rb0_mask) {
3609 						raster_config_se |=
3610 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3611 					} else {
3612 						raster_config_se |=
3613 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3614 					}
3615 				}
3616 			}
3617 		}
3618 
3619 		/* GRBM_GFX_INDEX has a different offset on VI */
3620 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3621 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3622 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3623 	}
3624 
3625 	/* GRBM_GFX_INDEX has a different offset on VI */
3626 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3627 }
3628 
3629 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3630 {
3631 	int i, j;
3632 	u32 data;
3633 	u32 raster_config = 0, raster_config_1 = 0;
3634 	u32 active_rbs = 0;
3635 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3636 					adev->gfx.config.max_sh_per_se;
3637 	unsigned num_rb_pipes;
3638 
3639 	mutex_lock(&adev->grbm_idx_mutex);
3640 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3641 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3642 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3643 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3644 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3645 					       rb_bitmap_width_per_sh);
3646 		}
3647 	}
3648 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3649 
3650 	adev->gfx.config.backend_enable_mask = active_rbs;
3651 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3652 
3653 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3654 			     adev->gfx.config.max_shader_engines, 16);
3655 
3656 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3657 
3658 	if (!adev->gfx.config.backend_enable_mask ||
3659 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3660 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3661 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3662 	} else {
3663 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3664 							adev->gfx.config.backend_enable_mask,
3665 							num_rb_pipes);
3666 	}
3667 
3668 	/* cache the values for userspace */
3669 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3670 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3671 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3672 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3673 				RREG32(mmCC_RB_BACKEND_DISABLE);
3674 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3675 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3676 			adev->gfx.config.rb_config[i][j].raster_config =
3677 				RREG32(mmPA_SC_RASTER_CONFIG);
3678 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3679 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3680 		}
3681 	}
3682 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3683 	mutex_unlock(&adev->grbm_idx_mutex);
3684 }
3685 
3686 /**
3687  * gfx_v8_0_init_compute_vmid - gart enable
3688  *
3689  * @rdev: amdgpu_device pointer
3690  *
3691  * Initialize compute vmid sh_mem registers
3692  *
3693  */
3694 #define DEFAULT_SH_MEM_BASES	(0x6000)
3695 #define FIRST_COMPUTE_VMID	(8)
3696 #define LAST_COMPUTE_VMID	(16)
3697 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3698 {
3699 	int i;
3700 	uint32_t sh_mem_config;
3701 	uint32_t sh_mem_bases;
3702 
3703 	/*
3704 	 * Configure apertures:
3705 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3706 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3707 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3708 	 */
3709 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3710 
3711 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3712 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3713 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3714 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3715 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3716 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3717 
3718 	mutex_lock(&adev->srbm_mutex);
3719 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3720 		vi_srbm_select(adev, 0, 0, 0, i);
3721 		/* CP and shaders */
3722 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3723 		WREG32(mmSH_MEM_APE1_BASE, 1);
3724 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3725 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3726 	}
3727 	vi_srbm_select(adev, 0, 0, 0, 0);
3728 	mutex_unlock(&adev->srbm_mutex);
3729 }
3730 
3731 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3732 {
3733 	u32 tmp;
3734 	int i;
3735 
3736 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3737 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3739 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3740 
3741 	gfx_v8_0_tiling_mode_table_init(adev);
3742 	gfx_v8_0_setup_rb(adev);
3743 	gfx_v8_0_get_cu_info(adev);
3744 
3745 	/* XXX SH_MEM regs */
3746 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3747 	mutex_lock(&adev->srbm_mutex);
3748 	for (i = 0; i < 16; i++) {
3749 		vi_srbm_select(adev, 0, 0, 0, i);
3750 		/* CP and shaders */
3751 		if (i == 0) {
3752 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3753 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3754 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3755 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3756 			WREG32(mmSH_MEM_CONFIG, tmp);
3757 		} else {
3758 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3759 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3760 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3761 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3762 			WREG32(mmSH_MEM_CONFIG, tmp);
3763 		}
3764 
3765 		WREG32(mmSH_MEM_APE1_BASE, 1);
3766 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3767 		WREG32(mmSH_MEM_BASES, 0);
3768 	}
3769 	vi_srbm_select(adev, 0, 0, 0, 0);
3770 	mutex_unlock(&adev->srbm_mutex);
3771 
3772 	gfx_v8_0_init_compute_vmid(adev);
3773 
3774 	mutex_lock(&adev->grbm_idx_mutex);
3775 	/*
3776 	 * making sure that the following register writes will be broadcasted
3777 	 * to all the shaders
3778 	 */
3779 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3780 
3781 	WREG32(mmPA_SC_FIFO_SIZE,
3782 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3783 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3784 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3785 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3786 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3787 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3788 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3789 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3790 	mutex_unlock(&adev->grbm_idx_mutex);
3791 
3792 }
3793 
3794 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3795 {
3796 	u32 i, j, k;
3797 	u32 mask;
3798 
3799 	mutex_lock(&adev->grbm_idx_mutex);
3800 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3801 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3802 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3803 			for (k = 0; k < adev->usec_timeout; k++) {
3804 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3805 					break;
3806 				udelay(1);
3807 			}
3808 		}
3809 	}
3810 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3811 	mutex_unlock(&adev->grbm_idx_mutex);
3812 
3813 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3814 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3815 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3816 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3817 	for (k = 0; k < adev->usec_timeout; k++) {
3818 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3819 			break;
3820 		udelay(1);
3821 	}
3822 }
3823 
3824 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3825 					       bool enable)
3826 {
3827 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3828 
3829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3830 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3831 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3832 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3833 
3834 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3835 }
3836 
3837 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3838 {
3839 	/* csib */
3840 	WREG32(mmRLC_CSIB_ADDR_HI,
3841 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3842 	WREG32(mmRLC_CSIB_ADDR_LO,
3843 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3844 	WREG32(mmRLC_CSIB_LENGTH,
3845 			adev->gfx.rlc.clear_state_size);
3846 }
3847 
3848 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3849 				int ind_offset,
3850 				int list_size,
3851 				int *unique_indices,
3852 				int *indices_count,
3853 				int max_indices,
3854 				int *ind_start_offsets,
3855 				int *offset_count,
3856 				int max_offset)
3857 {
3858 	int indices;
3859 	bool new_entry = true;
3860 
3861 	for (; ind_offset < list_size; ind_offset++) {
3862 
3863 		if (new_entry) {
3864 			new_entry = false;
3865 			ind_start_offsets[*offset_count] = ind_offset;
3866 			*offset_count = *offset_count + 1;
3867 			BUG_ON(*offset_count >= max_offset);
3868 		}
3869 
3870 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3871 			new_entry = true;
3872 			continue;
3873 		}
3874 
3875 		ind_offset += 2;
3876 
3877 		/* look for the matching indice */
3878 		for (indices = 0;
3879 			indices < *indices_count;
3880 			indices++) {
3881 			if (unique_indices[indices] ==
3882 				register_list_format[ind_offset])
3883 				break;
3884 		}
3885 
3886 		if (indices >= *indices_count) {
3887 			unique_indices[*indices_count] =
3888 				register_list_format[ind_offset];
3889 			indices = *indices_count;
3890 			*indices_count = *indices_count + 1;
3891 			BUG_ON(*indices_count >= max_indices);
3892 		}
3893 
3894 		register_list_format[ind_offset] = indices;
3895 	}
3896 }
3897 
3898 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3899 {
3900 	int i, temp, data;
3901 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3902 	int indices_count = 0;
3903 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3904 	int offset_count = 0;
3905 
3906 	int list_size;
3907 	unsigned int *register_list_format =
3908 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3909 	if (!register_list_format)
3910 		return -ENOMEM;
3911 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3912 			adev->gfx.rlc.reg_list_format_size_bytes);
3913 
3914 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3915 				RLC_FormatDirectRegListLength,
3916 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3917 				unique_indices,
3918 				&indices_count,
3919 				sizeof(unique_indices) / sizeof(int),
3920 				indirect_start_offsets,
3921 				&offset_count,
3922 				sizeof(indirect_start_offsets)/sizeof(int));
3923 
3924 	/* save and restore list */
3925 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3926 
3927 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3928 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3929 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3930 
3931 	/* indirect list */
3932 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3933 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3934 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3935 
3936 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3937 	list_size = list_size >> 1;
3938 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3939 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3940 
3941 	/* starting offsets starts */
3942 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3943 		adev->gfx.rlc.starting_offsets_start);
3944 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3945 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3946 				indirect_start_offsets[i]);
3947 
3948 	/* unique indices */
3949 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3950 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3951 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3952 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3953 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3954 	}
3955 	kfree(register_list_format);
3956 
3957 	return 0;
3958 }
3959 
3960 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3961 {
3962 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3963 }
3964 
3965 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3966 {
3967 	uint32_t data;
3968 
3969 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3970 			      AMD_PG_SUPPORT_GFX_SMG |
3971 			      AMD_PG_SUPPORT_GFX_DMG)) {
3972 		WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3973 
3974 		data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3975 		data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3976 		data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3977 		data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3978 		WREG32(mmRLC_PG_DELAY, data);
3979 
3980 		WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3981 		WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3982 	}
3983 }
3984 
3985 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3986 						bool enable)
3987 {
3988 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3989 }
3990 
3991 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3992 						  bool enable)
3993 {
3994 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3995 }
3996 
3997 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3998 {
3999 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
4000 }
4001 
4002 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4003 {
4004 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4005 			      AMD_PG_SUPPORT_GFX_SMG |
4006 			      AMD_PG_SUPPORT_GFX_DMG |
4007 			      AMD_PG_SUPPORT_CP |
4008 			      AMD_PG_SUPPORT_GDS |
4009 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
4010 		gfx_v8_0_init_csb(adev);
4011 		gfx_v8_0_init_save_restore_list(adev);
4012 		gfx_v8_0_enable_save_restore_machine(adev);
4013 
4014 		if ((adev->asic_type == CHIP_CARRIZO) ||
4015 		    (adev->asic_type == CHIP_STONEY)) {
4016 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4017 			gfx_v8_0_init_power_gating(adev);
4018 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4019 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4020 				cz_enable_sck_slow_down_on_power_up(adev, true);
4021 				cz_enable_sck_slow_down_on_power_down(adev, true);
4022 			} else {
4023 				cz_enable_sck_slow_down_on_power_up(adev, false);
4024 				cz_enable_sck_slow_down_on_power_down(adev, false);
4025 			}
4026 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4027 				cz_enable_cp_power_gating(adev, true);
4028 			else
4029 				cz_enable_cp_power_gating(adev, false);
4030 		} else if (adev->asic_type == CHIP_POLARIS11) {
4031 			gfx_v8_0_init_power_gating(adev);
4032 		}
4033 	}
4034 }
4035 
4036 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4037 {
4038 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4039 
4040 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4041 	gfx_v8_0_wait_for_rlc_serdes(adev);
4042 }
4043 
4044 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4045 {
4046 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4047 	udelay(50);
4048 
4049 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4050 	udelay(50);
4051 }
4052 
4053 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4054 {
4055 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4056 
4057 	/* carrizo do enable cp interrupt after cp inited */
4058 	if (!(adev->flags & AMD_IS_APU))
4059 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4060 
4061 	udelay(50);
4062 }
4063 
4064 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4065 {
4066 	const struct rlc_firmware_header_v2_0 *hdr;
4067 	const __le32 *fw_data;
4068 	unsigned i, fw_size;
4069 
4070 	if (!adev->gfx.rlc_fw)
4071 		return -EINVAL;
4072 
4073 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4074 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4075 
4076 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4077 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4078 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4079 
4080 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4081 	for (i = 0; i < fw_size; i++)
4082 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4083 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4084 
4085 	return 0;
4086 }
4087 
4088 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4089 {
4090 	int r;
4091 	u32 tmp;
4092 
4093 	gfx_v8_0_rlc_stop(adev);
4094 
4095 	/* disable CG */
4096 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4097 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4098 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4099 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4100 	if (adev->asic_type == CHIP_POLARIS11 ||
4101 	    adev->asic_type == CHIP_POLARIS10) {
4102 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4103 		tmp &= ~0x3;
4104 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4105 	}
4106 
4107 	/* disable PG */
4108 	WREG32(mmRLC_PG_CNTL, 0);
4109 
4110 	gfx_v8_0_rlc_reset(adev);
4111 	gfx_v8_0_init_pg(adev);
4112 
4113 	if (!adev->pp_enabled) {
4114 		if (!adev->firmware.smu_load) {
4115 			/* legacy rlc firmware loading */
4116 			r = gfx_v8_0_rlc_load_microcode(adev);
4117 			if (r)
4118 				return r;
4119 		} else {
4120 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4121 							AMDGPU_UCODE_ID_RLC_G);
4122 			if (r)
4123 				return -EINVAL;
4124 		}
4125 	}
4126 
4127 	gfx_v8_0_rlc_start(adev);
4128 
4129 	return 0;
4130 }
4131 
4132 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4133 {
4134 	int i;
4135 	u32 tmp = RREG32(mmCP_ME_CNTL);
4136 
4137 	if (enable) {
4138 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4139 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4140 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4141 	} else {
4142 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4143 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4144 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4145 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4146 			adev->gfx.gfx_ring[i].ready = false;
4147 	}
4148 	WREG32(mmCP_ME_CNTL, tmp);
4149 	udelay(50);
4150 }
4151 
4152 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4153 {
4154 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4155 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4156 	const struct gfx_firmware_header_v1_0 *me_hdr;
4157 	const __le32 *fw_data;
4158 	unsigned i, fw_size;
4159 
4160 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4161 		return -EINVAL;
4162 
4163 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4164 		adev->gfx.pfp_fw->data;
4165 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4166 		adev->gfx.ce_fw->data;
4167 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4168 		adev->gfx.me_fw->data;
4169 
4170 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4171 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4172 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4173 
4174 	gfx_v8_0_cp_gfx_enable(adev, false);
4175 
4176 	/* PFP */
4177 	fw_data = (const __le32 *)
4178 		(adev->gfx.pfp_fw->data +
4179 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4180 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4181 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4182 	for (i = 0; i < fw_size; i++)
4183 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4184 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4185 
4186 	/* CE */
4187 	fw_data = (const __le32 *)
4188 		(adev->gfx.ce_fw->data +
4189 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4190 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4191 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4192 	for (i = 0; i < fw_size; i++)
4193 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4194 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4195 
4196 	/* ME */
4197 	fw_data = (const __le32 *)
4198 		(adev->gfx.me_fw->data +
4199 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4200 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4201 	WREG32(mmCP_ME_RAM_WADDR, 0);
4202 	for (i = 0; i < fw_size; i++)
4203 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4204 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4205 
4206 	return 0;
4207 }
4208 
4209 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4210 {
4211 	u32 count = 0;
4212 	const struct cs_section_def *sect = NULL;
4213 	const struct cs_extent_def *ext = NULL;
4214 
4215 	/* begin clear state */
4216 	count += 2;
4217 	/* context control state */
4218 	count += 3;
4219 
4220 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4221 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4222 			if (sect->id == SECT_CONTEXT)
4223 				count += 2 + ext->reg_count;
4224 			else
4225 				return 0;
4226 		}
4227 	}
4228 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4229 	count += 4;
4230 	/* end clear state */
4231 	count += 2;
4232 	/* clear state */
4233 	count += 2;
4234 
4235 	return count;
4236 }
4237 
4238 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4239 {
4240 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4241 	const struct cs_section_def *sect = NULL;
4242 	const struct cs_extent_def *ext = NULL;
4243 	int r, i;
4244 
4245 	/* init the CP */
4246 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4247 	WREG32(mmCP_ENDIAN_SWAP, 0);
4248 	WREG32(mmCP_DEVICE_ID, 1);
4249 
4250 	gfx_v8_0_cp_gfx_enable(adev, true);
4251 
4252 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4253 	if (r) {
4254 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4255 		return r;
4256 	}
4257 
4258 	/* clear state buffer */
4259 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4260 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4261 
4262 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4263 	amdgpu_ring_write(ring, 0x80000000);
4264 	amdgpu_ring_write(ring, 0x80000000);
4265 
4266 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4267 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4268 			if (sect->id == SECT_CONTEXT) {
4269 				amdgpu_ring_write(ring,
4270 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4271 					       ext->reg_count));
4272 				amdgpu_ring_write(ring,
4273 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4274 				for (i = 0; i < ext->reg_count; i++)
4275 					amdgpu_ring_write(ring, ext->extent[i]);
4276 			}
4277 		}
4278 	}
4279 
4280 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4281 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4282 	switch (adev->asic_type) {
4283 	case CHIP_TONGA:
4284 	case CHIP_POLARIS10:
4285 		amdgpu_ring_write(ring, 0x16000012);
4286 		amdgpu_ring_write(ring, 0x0000002A);
4287 		break;
4288 	case CHIP_POLARIS11:
4289 		amdgpu_ring_write(ring, 0x16000012);
4290 		amdgpu_ring_write(ring, 0x00000000);
4291 		break;
4292 	case CHIP_FIJI:
4293 		amdgpu_ring_write(ring, 0x3a00161a);
4294 		amdgpu_ring_write(ring, 0x0000002e);
4295 		break;
4296 	case CHIP_CARRIZO:
4297 		amdgpu_ring_write(ring, 0x00000002);
4298 		amdgpu_ring_write(ring, 0x00000000);
4299 		break;
4300 	case CHIP_TOPAZ:
4301 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4302 				0x00000000 : 0x00000002);
4303 		amdgpu_ring_write(ring, 0x00000000);
4304 		break;
4305 	case CHIP_STONEY:
4306 		amdgpu_ring_write(ring, 0x00000000);
4307 		amdgpu_ring_write(ring, 0x00000000);
4308 		break;
4309 	default:
4310 		BUG();
4311 	}
4312 
4313 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4314 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4315 
4316 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4317 	amdgpu_ring_write(ring, 0);
4318 
4319 	/* init the CE partitions */
4320 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4321 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4322 	amdgpu_ring_write(ring, 0x8000);
4323 	amdgpu_ring_write(ring, 0x8000);
4324 
4325 	amdgpu_ring_commit(ring);
4326 
4327 	return 0;
4328 }
4329 
4330 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4331 {
4332 	struct amdgpu_ring *ring;
4333 	u32 tmp;
4334 	u32 rb_bufsz;
4335 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4336 	int r;
4337 
4338 	/* Set the write pointer delay */
4339 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4340 
4341 	/* set the RB to use vmid 0 */
4342 	WREG32(mmCP_RB_VMID, 0);
4343 
4344 	/* Set ring buffer size */
4345 	ring = &adev->gfx.gfx_ring[0];
4346 	rb_bufsz = order_base_2(ring->ring_size / 8);
4347 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4348 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4349 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4350 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4351 #ifdef __BIG_ENDIAN
4352 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4353 #endif
4354 	WREG32(mmCP_RB0_CNTL, tmp);
4355 
4356 	/* Initialize the ring buffer's read and write pointers */
4357 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4358 	ring->wptr = 0;
4359 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4360 
4361 	/* set the wb address wether it's enabled or not */
4362 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4363 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4364 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4365 
4366 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4367 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4368 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4369 	mdelay(1);
4370 	WREG32(mmCP_RB0_CNTL, tmp);
4371 
4372 	rb_addr = ring->gpu_addr >> 8;
4373 	WREG32(mmCP_RB0_BASE, rb_addr);
4374 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4375 
4376 	/* no gfx doorbells on iceland */
4377 	if (adev->asic_type != CHIP_TOPAZ) {
4378 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4379 		if (ring->use_doorbell) {
4380 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4381 					    DOORBELL_OFFSET, ring->doorbell_index);
4382 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4383 					    DOORBELL_HIT, 0);
4384 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4385 					    DOORBELL_EN, 1);
4386 		} else {
4387 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4388 					    DOORBELL_EN, 0);
4389 		}
4390 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4391 
4392 		if (adev->asic_type == CHIP_TONGA) {
4393 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4394 					    DOORBELL_RANGE_LOWER,
4395 					    AMDGPU_DOORBELL_GFX_RING0);
4396 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4397 
4398 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4399 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4400 		}
4401 
4402 	}
4403 
4404 	/* start the ring */
4405 	gfx_v8_0_cp_gfx_start(adev);
4406 	ring->ready = true;
4407 	r = amdgpu_ring_test_ring(ring);
4408 	if (r)
4409 		ring->ready = false;
4410 
4411 	return r;
4412 }
4413 
4414 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4415 {
4416 	int i;
4417 
4418 	if (enable) {
4419 		WREG32(mmCP_MEC_CNTL, 0);
4420 	} else {
4421 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4422 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4423 			adev->gfx.compute_ring[i].ready = false;
4424 	}
4425 	udelay(50);
4426 }
4427 
4428 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4429 {
4430 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4431 	const __le32 *fw_data;
4432 	unsigned i, fw_size;
4433 
4434 	if (!adev->gfx.mec_fw)
4435 		return -EINVAL;
4436 
4437 	gfx_v8_0_cp_compute_enable(adev, false);
4438 
4439 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4440 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4441 
4442 	fw_data = (const __le32 *)
4443 		(adev->gfx.mec_fw->data +
4444 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4445 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4446 
4447 	/* MEC1 */
4448 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4449 	for (i = 0; i < fw_size; i++)
4450 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4451 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4452 
4453 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4454 	if (adev->gfx.mec2_fw) {
4455 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4456 
4457 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4458 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4459 
4460 		fw_data = (const __le32 *)
4461 			(adev->gfx.mec2_fw->data +
4462 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4463 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4464 
4465 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4466 		for (i = 0; i < fw_size; i++)
4467 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4468 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4469 	}
4470 
4471 	return 0;
4472 }
4473 
4474 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4475 {
4476 	int i, r;
4477 
4478 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4479 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4480 
4481 		if (ring->mqd_obj) {
4482 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4483 			if (unlikely(r != 0))
4484 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4485 
4486 			amdgpu_bo_unpin(ring->mqd_obj);
4487 			amdgpu_bo_unreserve(ring->mqd_obj);
4488 
4489 			amdgpu_bo_unref(&ring->mqd_obj);
4490 			ring->mqd_obj = NULL;
4491 		}
4492 	}
4493 }
4494 
4495 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4496 {
4497 	int r, i, j;
4498 	u32 tmp;
4499 	bool use_doorbell = true;
4500 	u64 hqd_gpu_addr;
4501 	u64 mqd_gpu_addr;
4502 	u64 eop_gpu_addr;
4503 	u64 wb_gpu_addr;
4504 	u32 *buf;
4505 	struct vi_mqd *mqd;
4506 
4507 	/* init the queues.  */
4508 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4509 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4510 
4511 		if (ring->mqd_obj == NULL) {
4512 			r = amdgpu_bo_create(adev,
4513 					     sizeof(struct vi_mqd),
4514 					     PAGE_SIZE, true,
4515 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4516 					     NULL, &ring->mqd_obj);
4517 			if (r) {
4518 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4519 				return r;
4520 			}
4521 		}
4522 
4523 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4524 		if (unlikely(r != 0)) {
4525 			gfx_v8_0_cp_compute_fini(adev);
4526 			return r;
4527 		}
4528 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4529 				  &mqd_gpu_addr);
4530 		if (r) {
4531 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4532 			gfx_v8_0_cp_compute_fini(adev);
4533 			return r;
4534 		}
4535 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4536 		if (r) {
4537 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4538 			gfx_v8_0_cp_compute_fini(adev);
4539 			return r;
4540 		}
4541 
4542 		/* init the mqd struct */
4543 		memset(buf, 0, sizeof(struct vi_mqd));
4544 
4545 		mqd = (struct vi_mqd *)buf;
4546 		mqd->header = 0xC0310800;
4547 		mqd->compute_pipelinestat_enable = 0x00000001;
4548 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4549 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4550 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4551 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4552 		mqd->compute_misc_reserved = 0x00000003;
4553 
4554 		mutex_lock(&adev->srbm_mutex);
4555 		vi_srbm_select(adev, ring->me,
4556 			       ring->pipe,
4557 			       ring->queue, 0);
4558 
4559 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4560 		eop_gpu_addr >>= 8;
4561 
4562 		/* write the EOP addr */
4563 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4564 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4565 
4566 		/* set the VMID assigned */
4567 		WREG32(mmCP_HQD_VMID, 0);
4568 
4569 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4570 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4571 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4572 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4573 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4574 
4575 		/* disable wptr polling */
4576 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4577 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4578 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4579 
4580 		mqd->cp_hqd_eop_base_addr_lo =
4581 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4582 		mqd->cp_hqd_eop_base_addr_hi =
4583 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4584 
4585 		/* enable doorbell? */
4586 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4587 		if (use_doorbell) {
4588 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4589 		} else {
4590 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4591 		}
4592 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4593 		mqd->cp_hqd_pq_doorbell_control = tmp;
4594 
4595 		/* disable the queue if it's active */
4596 		mqd->cp_hqd_dequeue_request = 0;
4597 		mqd->cp_hqd_pq_rptr = 0;
4598 		mqd->cp_hqd_pq_wptr= 0;
4599 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4600 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4601 			for (j = 0; j < adev->usec_timeout; j++) {
4602 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4603 					break;
4604 				udelay(1);
4605 			}
4606 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4607 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4608 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4609 		}
4610 
4611 		/* set the pointer to the MQD */
4612 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4613 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4614 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4615 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4616 
4617 		/* set MQD vmid to 0 */
4618 		tmp = RREG32(mmCP_MQD_CONTROL);
4619 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4620 		WREG32(mmCP_MQD_CONTROL, tmp);
4621 		mqd->cp_mqd_control = tmp;
4622 
4623 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4624 		hqd_gpu_addr = ring->gpu_addr >> 8;
4625 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4626 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4627 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4628 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4629 
4630 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4631 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4632 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4633 				    (order_base_2(ring->ring_size / 4) - 1));
4634 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4635 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4636 #ifdef __BIG_ENDIAN
4637 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4638 #endif
4639 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4640 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4641 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4642 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4643 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4644 		mqd->cp_hqd_pq_control = tmp;
4645 
4646 		/* set the wb address wether it's enabled or not */
4647 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4648 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4649 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4650 			upper_32_bits(wb_gpu_addr) & 0xffff;
4651 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4652 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4653 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4654 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4655 
4656 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4657 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4658 		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4659 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4660 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4661 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4662 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4663 
4664 		/* enable the doorbell if requested */
4665 		if (use_doorbell) {
4666 			if ((adev->asic_type == CHIP_CARRIZO) ||
4667 			    (adev->asic_type == CHIP_FIJI) ||
4668 			    (adev->asic_type == CHIP_STONEY) ||
4669 			    (adev->asic_type == CHIP_POLARIS11) ||
4670 			    (adev->asic_type == CHIP_POLARIS10)) {
4671 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4672 				       AMDGPU_DOORBELL_KIQ << 2);
4673 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4674 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4675 			}
4676 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4677 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4678 					    DOORBELL_OFFSET, ring->doorbell_index);
4679 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4680 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4681 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4682 			mqd->cp_hqd_pq_doorbell_control = tmp;
4683 
4684 		} else {
4685 			mqd->cp_hqd_pq_doorbell_control = 0;
4686 		}
4687 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4688 		       mqd->cp_hqd_pq_doorbell_control);
4689 
4690 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4691 		ring->wptr = 0;
4692 		mqd->cp_hqd_pq_wptr = ring->wptr;
4693 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4694 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4695 
4696 		/* set the vmid for the queue */
4697 		mqd->cp_hqd_vmid = 0;
4698 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4699 
4700 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4701 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4702 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4703 		mqd->cp_hqd_persistent_state = tmp;
4704 		if (adev->asic_type == CHIP_STONEY ||
4705 			adev->asic_type == CHIP_POLARIS11 ||
4706 			adev->asic_type == CHIP_POLARIS10) {
4707 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4708 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4709 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4710 		}
4711 
4712 		/* activate the queue */
4713 		mqd->cp_hqd_active = 1;
4714 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4715 
4716 		vi_srbm_select(adev, 0, 0, 0, 0);
4717 		mutex_unlock(&adev->srbm_mutex);
4718 
4719 		amdgpu_bo_kunmap(ring->mqd_obj);
4720 		amdgpu_bo_unreserve(ring->mqd_obj);
4721 	}
4722 
4723 	if (use_doorbell) {
4724 		tmp = RREG32(mmCP_PQ_STATUS);
4725 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4726 		WREG32(mmCP_PQ_STATUS, tmp);
4727 	}
4728 
4729 	gfx_v8_0_cp_compute_enable(adev, true);
4730 
4731 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4732 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4733 
4734 		ring->ready = true;
4735 		r = amdgpu_ring_test_ring(ring);
4736 		if (r)
4737 			ring->ready = false;
4738 	}
4739 
4740 	return 0;
4741 }
4742 
4743 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4744 {
4745 	int r;
4746 
4747 	if (!(adev->flags & AMD_IS_APU))
4748 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4749 
4750 	if (!adev->pp_enabled) {
4751 		if (!adev->firmware.smu_load) {
4752 			/* legacy firmware loading */
4753 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4754 			if (r)
4755 				return r;
4756 
4757 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4758 			if (r)
4759 				return r;
4760 		} else {
4761 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4762 							AMDGPU_UCODE_ID_CP_CE);
4763 			if (r)
4764 				return -EINVAL;
4765 
4766 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4767 							AMDGPU_UCODE_ID_CP_PFP);
4768 			if (r)
4769 				return -EINVAL;
4770 
4771 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4772 							AMDGPU_UCODE_ID_CP_ME);
4773 			if (r)
4774 				return -EINVAL;
4775 
4776 			if (adev->asic_type == CHIP_TOPAZ) {
4777 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4778 				if (r)
4779 					return r;
4780 			} else {
4781 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4782 										 AMDGPU_UCODE_ID_CP_MEC1);
4783 				if (r)
4784 					return -EINVAL;
4785 			}
4786 		}
4787 	}
4788 
4789 	r = gfx_v8_0_cp_gfx_resume(adev);
4790 	if (r)
4791 		return r;
4792 
4793 	r = gfx_v8_0_cp_compute_resume(adev);
4794 	if (r)
4795 		return r;
4796 
4797 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4798 
4799 	return 0;
4800 }
4801 
4802 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4803 {
4804 	gfx_v8_0_cp_gfx_enable(adev, enable);
4805 	gfx_v8_0_cp_compute_enable(adev, enable);
4806 }
4807 
4808 static int gfx_v8_0_hw_init(void *handle)
4809 {
4810 	int r;
4811 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4812 
4813 	gfx_v8_0_init_golden_registers(adev);
4814 	gfx_v8_0_gpu_init(adev);
4815 
4816 	r = gfx_v8_0_rlc_resume(adev);
4817 	if (r)
4818 		return r;
4819 
4820 	r = gfx_v8_0_cp_resume(adev);
4821 
4822 	return r;
4823 }
4824 
4825 static int gfx_v8_0_hw_fini(void *handle)
4826 {
4827 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4828 
4829 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4830 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4831 	if (amdgpu_sriov_vf(adev)) {
4832 		pr_debug("For SRIOV client, shouldn't do anything.\n");
4833 		return 0;
4834 	}
4835 	gfx_v8_0_cp_enable(adev, false);
4836 	gfx_v8_0_rlc_stop(adev);
4837 	gfx_v8_0_cp_compute_fini(adev);
4838 
4839 	amdgpu_set_powergating_state(adev,
4840 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4841 
4842 	return 0;
4843 }
4844 
4845 static int gfx_v8_0_suspend(void *handle)
4846 {
4847 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4848 
4849 	return gfx_v8_0_hw_fini(adev);
4850 }
4851 
4852 static int gfx_v8_0_resume(void *handle)
4853 {
4854 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4855 
4856 	return gfx_v8_0_hw_init(adev);
4857 }
4858 
4859 static bool gfx_v8_0_is_idle(void *handle)
4860 {
4861 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4862 
4863 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4864 		return false;
4865 	else
4866 		return true;
4867 }
4868 
4869 static int gfx_v8_0_wait_for_idle(void *handle)
4870 {
4871 	unsigned i;
4872 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4873 
4874 	for (i = 0; i < adev->usec_timeout; i++) {
4875 		if (gfx_v8_0_is_idle(handle))
4876 			return 0;
4877 
4878 		udelay(1);
4879 	}
4880 	return -ETIMEDOUT;
4881 }
4882 
4883 static bool gfx_v8_0_check_soft_reset(void *handle)
4884 {
4885 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4887 	u32 tmp;
4888 
4889 	/* GRBM_STATUS */
4890 	tmp = RREG32(mmGRBM_STATUS);
4891 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4892 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4893 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4894 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4895 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4896 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4897 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4898 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4900 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4901 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4902 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4903 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4904 	}
4905 
4906 	/* GRBM_STATUS2 */
4907 	tmp = RREG32(mmGRBM_STATUS2);
4908 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4909 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4910 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4911 
4912 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4913 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4914 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4915 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4916 						SOFT_RESET_CPF, 1);
4917 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4918 						SOFT_RESET_CPC, 1);
4919 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4920 						SOFT_RESET_CPG, 1);
4921 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4922 						SOFT_RESET_GRBM, 1);
4923 	}
4924 
4925 	/* SRBM_STATUS */
4926 	tmp = RREG32(mmSRBM_STATUS);
4927 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4928 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4929 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4930 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4931 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4932 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4933 
4934 	if (grbm_soft_reset || srbm_soft_reset) {
4935 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
4936 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
4937 		return true;
4938 	} else {
4939 		adev->gfx.grbm_soft_reset = 0;
4940 		adev->gfx.srbm_soft_reset = 0;
4941 		return false;
4942 	}
4943 }
4944 
4945 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
4946 				  struct amdgpu_ring *ring)
4947 {
4948 	int i;
4949 
4950 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4951 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4952 		u32 tmp;
4953 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
4954 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
4955 				    DEQUEUE_REQ, 2);
4956 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
4957 		for (i = 0; i < adev->usec_timeout; i++) {
4958 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4959 				break;
4960 			udelay(1);
4961 		}
4962 	}
4963 }
4964 
4965 static int gfx_v8_0_pre_soft_reset(void *handle)
4966 {
4967 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4968 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4969 
4970 	if ((!adev->gfx.grbm_soft_reset) &&
4971 	    (!adev->gfx.srbm_soft_reset))
4972 		return 0;
4973 
4974 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
4975 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
4976 
4977 	/* stop the rlc */
4978 	gfx_v8_0_rlc_stop(adev);
4979 
4980 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4981 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4982 		/* Disable GFX parsing/prefetching */
4983 		gfx_v8_0_cp_gfx_enable(adev, false);
4984 
4985 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4986 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4987 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4988 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4989 		int i;
4990 
4991 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4992 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4993 
4994 			gfx_v8_0_inactive_hqd(adev, ring);
4995 		}
4996 		/* Disable MEC parsing/prefetching */
4997 		gfx_v8_0_cp_compute_enable(adev, false);
4998 	}
4999 
5000        return 0;
5001 }
5002 
5003 static int gfx_v8_0_soft_reset(void *handle)
5004 {
5005 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5006 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5007 	u32 tmp;
5008 
5009 	if ((!adev->gfx.grbm_soft_reset) &&
5010 	    (!adev->gfx.srbm_soft_reset))
5011 		return 0;
5012 
5013 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5014 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5015 
5016 	if (grbm_soft_reset || srbm_soft_reset) {
5017 		tmp = RREG32(mmGMCON_DEBUG);
5018 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5019 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5020 		WREG32(mmGMCON_DEBUG, tmp);
5021 		udelay(50);
5022 	}
5023 
5024 	if (grbm_soft_reset) {
5025 		tmp = RREG32(mmGRBM_SOFT_RESET);
5026 		tmp |= grbm_soft_reset;
5027 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5028 		WREG32(mmGRBM_SOFT_RESET, tmp);
5029 		tmp = RREG32(mmGRBM_SOFT_RESET);
5030 
5031 		udelay(50);
5032 
5033 		tmp &= ~grbm_soft_reset;
5034 		WREG32(mmGRBM_SOFT_RESET, tmp);
5035 		tmp = RREG32(mmGRBM_SOFT_RESET);
5036 	}
5037 
5038 	if (srbm_soft_reset) {
5039 		tmp = RREG32(mmSRBM_SOFT_RESET);
5040 		tmp |= srbm_soft_reset;
5041 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5042 		WREG32(mmSRBM_SOFT_RESET, tmp);
5043 		tmp = RREG32(mmSRBM_SOFT_RESET);
5044 
5045 		udelay(50);
5046 
5047 		tmp &= ~srbm_soft_reset;
5048 		WREG32(mmSRBM_SOFT_RESET, tmp);
5049 		tmp = RREG32(mmSRBM_SOFT_RESET);
5050 	}
5051 
5052 	if (grbm_soft_reset || srbm_soft_reset) {
5053 		tmp = RREG32(mmGMCON_DEBUG);
5054 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5055 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5056 		WREG32(mmGMCON_DEBUG, tmp);
5057 	}
5058 
5059 	/* Wait a little for things to settle down */
5060 	udelay(50);
5061 
5062 	return 0;
5063 }
5064 
5065 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5066 			      struct amdgpu_ring *ring)
5067 {
5068 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5069 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5070 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5071 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5072 	vi_srbm_select(adev, 0, 0, 0, 0);
5073 }
5074 
5075 static int gfx_v8_0_post_soft_reset(void *handle)
5076 {
5077 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5078 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5079 
5080 	if ((!adev->gfx.grbm_soft_reset) &&
5081 	    (!adev->gfx.srbm_soft_reset))
5082 		return 0;
5083 
5084 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5085 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5086 
5087 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5088 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5089 		gfx_v8_0_cp_gfx_resume(adev);
5090 
5091 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5092 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5093 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5094 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5095 		int i;
5096 
5097 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5098 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5099 
5100 			gfx_v8_0_init_hqd(adev, ring);
5101 		}
5102 		gfx_v8_0_cp_compute_resume(adev);
5103 	}
5104 	gfx_v8_0_rlc_start(adev);
5105 
5106 	return 0;
5107 }
5108 
5109 /**
5110  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5111  *
5112  * @adev: amdgpu_device pointer
5113  *
5114  * Fetches a GPU clock counter snapshot.
5115  * Returns the 64 bit clock counter snapshot.
5116  */
5117 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5118 {
5119 	uint64_t clock;
5120 
5121 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5122 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5123 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5124 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5125 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5126 	return clock;
5127 }
5128 
5129 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5130 					  uint32_t vmid,
5131 					  uint32_t gds_base, uint32_t gds_size,
5132 					  uint32_t gws_base, uint32_t gws_size,
5133 					  uint32_t oa_base, uint32_t oa_size)
5134 {
5135 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5136 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5137 
5138 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5139 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5140 
5141 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5142 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5143 
5144 	/* GDS Base */
5145 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5146 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5147 				WRITE_DATA_DST_SEL(0)));
5148 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5149 	amdgpu_ring_write(ring, 0);
5150 	amdgpu_ring_write(ring, gds_base);
5151 
5152 	/* GDS Size */
5153 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5154 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5155 				WRITE_DATA_DST_SEL(0)));
5156 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5157 	amdgpu_ring_write(ring, 0);
5158 	amdgpu_ring_write(ring, gds_size);
5159 
5160 	/* GWS */
5161 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5162 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5163 				WRITE_DATA_DST_SEL(0)));
5164 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5165 	amdgpu_ring_write(ring, 0);
5166 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5167 
5168 	/* OA */
5169 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5170 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5171 				WRITE_DATA_DST_SEL(0)));
5172 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5173 	amdgpu_ring_write(ring, 0);
5174 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5175 }
5176 
5177 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5178 {
5179 	WREG32(mmSQ_IND_INDEX,
5180 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5181 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5182 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5183 		(SQ_IND_INDEX__FORCE_READ_MASK));
5184 	return RREG32(mmSQ_IND_DATA);
5185 }
5186 
5187 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5188 			   uint32_t wave, uint32_t thread,
5189 			   uint32_t regno, uint32_t num, uint32_t *out)
5190 {
5191 	WREG32(mmSQ_IND_INDEX,
5192 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5193 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5194 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5195 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5196 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5197 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5198 	while (num--)
5199 		*(out++) = RREG32(mmSQ_IND_DATA);
5200 }
5201 
5202 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5203 {
5204 	/* type 0 wave data */
5205 	dst[(*no_fields)++] = 0;
5206 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5207 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5208 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5209 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5210 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5211 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5212 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5213 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5214 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5215 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5216 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5217 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5218 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5219 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5220 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5221 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5222 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5223 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5224 }
5225 
5226 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5227 				     uint32_t wave, uint32_t start,
5228 				     uint32_t size, uint32_t *dst)
5229 {
5230 	wave_read_regs(
5231 		adev, simd, wave, 0,
5232 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5233 }
5234 
5235 
5236 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5237 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5238 	.select_se_sh = &gfx_v8_0_select_se_sh,
5239 	.read_wave_data = &gfx_v8_0_read_wave_data,
5240 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5241 };
5242 
5243 static int gfx_v8_0_early_init(void *handle)
5244 {
5245 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5246 
5247 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5248 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5249 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5250 	gfx_v8_0_set_ring_funcs(adev);
5251 	gfx_v8_0_set_irq_funcs(adev);
5252 	gfx_v8_0_set_gds_init(adev);
5253 	gfx_v8_0_set_rlc_funcs(adev);
5254 
5255 	return 0;
5256 }
5257 
5258 static int gfx_v8_0_late_init(void *handle)
5259 {
5260 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5261 	int r;
5262 
5263 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5264 	if (r)
5265 		return r;
5266 
5267 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5268 	if (r)
5269 		return r;
5270 
5271 	/* requires IBs so do in late init after IB pool is initialized */
5272 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5273 	if (r)
5274 		return r;
5275 
5276 	amdgpu_set_powergating_state(adev,
5277 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5278 
5279 	return 0;
5280 }
5281 
5282 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5283 						       bool enable)
5284 {
5285 	if (adev->asic_type == CHIP_POLARIS11)
5286 		/* Send msg to SMU via Powerplay */
5287 		amdgpu_set_powergating_state(adev,
5288 					     AMD_IP_BLOCK_TYPE_SMC,
5289 					     enable ?
5290 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5291 
5292 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5293 }
5294 
5295 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5296 							bool enable)
5297 {
5298 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5299 }
5300 
5301 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5302 		bool enable)
5303 {
5304 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5305 }
5306 
5307 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5308 					  bool enable)
5309 {
5310 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5311 }
5312 
5313 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5314 						bool enable)
5315 {
5316 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5317 
5318 	/* Read any GFX register to wake up GFX. */
5319 	if (!enable)
5320 		RREG32(mmDB_RENDER_CONTROL);
5321 }
5322 
5323 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5324 					  bool enable)
5325 {
5326 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5327 		cz_enable_gfx_cg_power_gating(adev, true);
5328 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5329 			cz_enable_gfx_pipeline_power_gating(adev, true);
5330 	} else {
5331 		cz_enable_gfx_cg_power_gating(adev, false);
5332 		cz_enable_gfx_pipeline_power_gating(adev, false);
5333 	}
5334 }
5335 
5336 static int gfx_v8_0_set_powergating_state(void *handle,
5337 					  enum amd_powergating_state state)
5338 {
5339 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5340 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5341 
5342 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5343 		return 0;
5344 
5345 	switch (adev->asic_type) {
5346 	case CHIP_CARRIZO:
5347 	case CHIP_STONEY:
5348 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5349 			cz_update_gfx_cg_power_gating(adev, enable);
5350 
5351 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5352 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5353 		else
5354 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5355 
5356 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5357 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5358 		else
5359 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5360 		break;
5361 	case CHIP_POLARIS11:
5362 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5363 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5364 		else
5365 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5366 
5367 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5368 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5369 		else
5370 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5371 
5372 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5373 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5374 		else
5375 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5376 		break;
5377 	default:
5378 		break;
5379 	}
5380 
5381 	return 0;
5382 }
5383 
5384 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5385 				     uint32_t reg_addr, uint32_t cmd)
5386 {
5387 	uint32_t data;
5388 
5389 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5390 
5391 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5392 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5393 
5394 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5395 	if (adev->asic_type == CHIP_STONEY)
5396 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5397 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5398 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5399 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5400 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5401 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5402 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5403 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5404 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5405 	else
5406 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5407 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5408 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5409 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5410 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5411 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5412 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5413 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5414 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5415 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5416 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5417 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5418 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5419 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5420 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5421 
5422 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5423 }
5424 
5425 #define MSG_ENTER_RLC_SAFE_MODE     1
5426 #define MSG_EXIT_RLC_SAFE_MODE      0
5427 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5428 #define RLC_GPR_REG2__REQ__SHIFT 0
5429 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5430 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5431 
5432 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5433 {
5434 	u32 data = 0;
5435 	unsigned i;
5436 
5437 	data = RREG32(mmRLC_CNTL);
5438 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5439 		return;
5440 
5441 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5442 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5443 			       AMD_PG_SUPPORT_GFX_DMG))) {
5444 		data |= RLC_GPR_REG2__REQ_MASK;
5445 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5446 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5447 		WREG32(mmRLC_GPR_REG2, data);
5448 
5449 		for (i = 0; i < adev->usec_timeout; i++) {
5450 			if ((RREG32(mmRLC_GPM_STAT) &
5451 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5452 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5453 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5454 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5455 				break;
5456 			udelay(1);
5457 		}
5458 
5459 		for (i = 0; i < adev->usec_timeout; i++) {
5460 			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5461 				break;
5462 			udelay(1);
5463 		}
5464 		adev->gfx.rlc.in_safe_mode = true;
5465 	}
5466 }
5467 
5468 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5469 {
5470 	u32 data;
5471 	unsigned i;
5472 
5473 	data = RREG32(mmRLC_CNTL);
5474 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5475 		return;
5476 
5477 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5478 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5479 			       AMD_PG_SUPPORT_GFX_DMG))) {
5480 		data |= RLC_GPR_REG2__REQ_MASK;
5481 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5482 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5483 		WREG32(mmRLC_GPR_REG2, data);
5484 		adev->gfx.rlc.in_safe_mode = false;
5485 	}
5486 
5487 	for (i = 0; i < adev->usec_timeout; i++) {
5488 		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5489 			break;
5490 		udelay(1);
5491 	}
5492 }
5493 
5494 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5495 {
5496 	u32 data;
5497 	unsigned i;
5498 
5499 	data = RREG32(mmRLC_CNTL);
5500 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5501 		return;
5502 
5503 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5504 		data |= RLC_SAFE_MODE__CMD_MASK;
5505 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5506 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5507 		WREG32(mmRLC_SAFE_MODE, data);
5508 
5509 		for (i = 0; i < adev->usec_timeout; i++) {
5510 			if ((RREG32(mmRLC_GPM_STAT) &
5511 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5512 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5513 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5514 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5515 				break;
5516 			udelay(1);
5517 		}
5518 
5519 		for (i = 0; i < adev->usec_timeout; i++) {
5520 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5521 				break;
5522 			udelay(1);
5523 		}
5524 		adev->gfx.rlc.in_safe_mode = true;
5525 	}
5526 }
5527 
5528 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5529 {
5530 	u32 data = 0;
5531 	unsigned i;
5532 
5533 	data = RREG32(mmRLC_CNTL);
5534 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5535 		return;
5536 
5537 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5538 		if (adev->gfx.rlc.in_safe_mode) {
5539 			data |= RLC_SAFE_MODE__CMD_MASK;
5540 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5541 			WREG32(mmRLC_SAFE_MODE, data);
5542 			adev->gfx.rlc.in_safe_mode = false;
5543 		}
5544 	}
5545 
5546 	for (i = 0; i < adev->usec_timeout; i++) {
5547 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5548 			break;
5549 		udelay(1);
5550 	}
5551 }
5552 
5553 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5554 {
5555 	adev->gfx.rlc.in_safe_mode = true;
5556 }
5557 
5558 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5559 {
5560 	adev->gfx.rlc.in_safe_mode = false;
5561 }
5562 
5563 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5564 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5565 	.exit_safe_mode = cz_exit_rlc_safe_mode
5566 };
5567 
5568 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5569 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5570 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5571 };
5572 
5573 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5574 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5575 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5576 };
5577 
5578 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5579 						      bool enable)
5580 {
5581 	uint32_t temp, data;
5582 
5583 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5584 
5585 	/* It is disabled by HW by default */
5586 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5587 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5588 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5589 				/* 1 - RLC memory Light sleep */
5590 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5591 
5592 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5593 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5594 		}
5595 
5596 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5597 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5598 		if (adev->flags & AMD_IS_APU)
5599 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5600 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5601 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5602 		else
5603 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5604 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5605 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5606 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5607 
5608 		if (temp != data)
5609 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5610 
5611 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5612 		gfx_v8_0_wait_for_rlc_serdes(adev);
5613 
5614 		/* 5 - clear mgcg override */
5615 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5616 
5617 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5618 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5619 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5620 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5621 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5622 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5623 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5624 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5625 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5626 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5627 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5628 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5629 			if (temp != data)
5630 				WREG32(mmCGTS_SM_CTRL_REG, data);
5631 		}
5632 		udelay(50);
5633 
5634 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5635 		gfx_v8_0_wait_for_rlc_serdes(adev);
5636 	} else {
5637 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5638 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5639 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5640 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5641 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5642 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5643 		if (temp != data)
5644 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5645 
5646 		/* 2 - disable MGLS in RLC */
5647 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5648 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5649 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5650 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5651 		}
5652 
5653 		/* 3 - disable MGLS in CP */
5654 		data = RREG32(mmCP_MEM_SLP_CNTL);
5655 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5656 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5657 			WREG32(mmCP_MEM_SLP_CNTL, data);
5658 		}
5659 
5660 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5661 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5662 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5663 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5664 		if (temp != data)
5665 			WREG32(mmCGTS_SM_CTRL_REG, data);
5666 
5667 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5668 		gfx_v8_0_wait_for_rlc_serdes(adev);
5669 
5670 		/* 6 - set mgcg override */
5671 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5672 
5673 		udelay(50);
5674 
5675 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5676 		gfx_v8_0_wait_for_rlc_serdes(adev);
5677 	}
5678 
5679 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5680 }
5681 
5682 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5683 						      bool enable)
5684 {
5685 	uint32_t temp, temp1, data, data1;
5686 
5687 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5688 
5689 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5690 
5691 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5692 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5693 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5694 		if (temp1 != data1)
5695 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5696 
5697 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5698 		gfx_v8_0_wait_for_rlc_serdes(adev);
5699 
5700 		/* 2 - clear cgcg override */
5701 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5702 
5703 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5704 		gfx_v8_0_wait_for_rlc_serdes(adev);
5705 
5706 		/* 3 - write cmd to set CGLS */
5707 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5708 
5709 		/* 4 - enable cgcg */
5710 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5711 
5712 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5713 			/* enable cgls*/
5714 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5715 
5716 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5717 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5718 
5719 			if (temp1 != data1)
5720 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5721 		} else {
5722 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5723 		}
5724 
5725 		if (temp != data)
5726 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5727 
5728 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5729 		 * Cmp_busy/GFX_Idle interrupts
5730 		 */
5731 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5732 	} else {
5733 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5734 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5735 
5736 		/* TEST CGCG */
5737 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5738 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5739 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5740 		if (temp1 != data1)
5741 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5742 
5743 		/* read gfx register to wake up cgcg */
5744 		RREG32(mmCB_CGTT_SCLK_CTRL);
5745 		RREG32(mmCB_CGTT_SCLK_CTRL);
5746 		RREG32(mmCB_CGTT_SCLK_CTRL);
5747 		RREG32(mmCB_CGTT_SCLK_CTRL);
5748 
5749 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5750 		gfx_v8_0_wait_for_rlc_serdes(adev);
5751 
5752 		/* write cmd to Set CGCG Overrride */
5753 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5754 
5755 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5756 		gfx_v8_0_wait_for_rlc_serdes(adev);
5757 
5758 		/* write cmd to Clear CGLS */
5759 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5760 
5761 		/* disable cgcg, cgls should be disabled too. */
5762 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5763 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5764 		if (temp != data)
5765 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5766 	}
5767 
5768 	gfx_v8_0_wait_for_rlc_serdes(adev);
5769 
5770 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5771 }
5772 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5773 					    bool enable)
5774 {
5775 	if (enable) {
5776 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5777 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5778 		 */
5779 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5780 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5781 	} else {
5782 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5783 		 * ===  CGCG + CGLS ===
5784 		 */
5785 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5786 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5787 	}
5788 	return 0;
5789 }
5790 
5791 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5792 					  enum amd_clockgating_state state)
5793 {
5794 	uint32_t msg_id, pp_state;
5795 	void *pp_handle = adev->powerplay.pp_handle;
5796 
5797 	if (state == AMD_CG_STATE_UNGATE)
5798 		pp_state = 0;
5799 	else
5800 		pp_state = PP_STATE_CG | PP_STATE_LS;
5801 
5802 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5803 			PP_BLOCK_GFX_CG,
5804 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5805 			pp_state);
5806 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5807 
5808 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5809 			PP_BLOCK_GFX_MG,
5810 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5811 			pp_state);
5812 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5813 
5814 	return 0;
5815 }
5816 
5817 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5818 					  enum amd_clockgating_state state)
5819 {
5820 	uint32_t msg_id, pp_state;
5821 	void *pp_handle = adev->powerplay.pp_handle;
5822 
5823 	if (state == AMD_CG_STATE_UNGATE)
5824 		pp_state = 0;
5825 	else
5826 		pp_state = PP_STATE_CG | PP_STATE_LS;
5827 
5828 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5829 			PP_BLOCK_GFX_CG,
5830 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5831 			pp_state);
5832 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5833 
5834 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5835 			PP_BLOCK_GFX_3D,
5836 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5837 			pp_state);
5838 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5839 
5840 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5841 			PP_BLOCK_GFX_MG,
5842 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5843 			pp_state);
5844 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5845 
5846 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5847 			PP_BLOCK_GFX_RLC,
5848 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5849 			pp_state);
5850 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5851 
5852 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5853 			PP_BLOCK_GFX_CP,
5854 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
5855 			pp_state);
5856 	amd_set_clockgating_by_smu(pp_handle, msg_id);
5857 
5858 	return 0;
5859 }
5860 
5861 static int gfx_v8_0_set_clockgating_state(void *handle,
5862 					  enum amd_clockgating_state state)
5863 {
5864 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5865 
5866 	switch (adev->asic_type) {
5867 	case CHIP_FIJI:
5868 	case CHIP_CARRIZO:
5869 	case CHIP_STONEY:
5870 		gfx_v8_0_update_gfx_clock_gating(adev,
5871 						 state == AMD_CG_STATE_GATE ? true : false);
5872 		break;
5873 	case CHIP_TONGA:
5874 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5875 		break;
5876 	case CHIP_POLARIS10:
5877 	case CHIP_POLARIS11:
5878 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5879 		break;
5880 	default:
5881 		break;
5882 	}
5883 	return 0;
5884 }
5885 
5886 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5887 {
5888 	return ring->adev->wb.wb[ring->rptr_offs];
5889 }
5890 
5891 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5892 {
5893 	struct amdgpu_device *adev = ring->adev;
5894 
5895 	if (ring->use_doorbell)
5896 		/* XXX check if swapping is necessary on BE */
5897 		return ring->adev->wb.wb[ring->wptr_offs];
5898 	else
5899 		return RREG32(mmCP_RB0_WPTR);
5900 }
5901 
5902 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5903 {
5904 	struct amdgpu_device *adev = ring->adev;
5905 
5906 	if (ring->use_doorbell) {
5907 		/* XXX check if swapping is necessary on BE */
5908 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5909 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5910 	} else {
5911 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5912 		(void)RREG32(mmCP_RB0_WPTR);
5913 	}
5914 }
5915 
5916 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5917 {
5918 	u32 ref_and_mask, reg_mem_engine;
5919 
5920 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5921 		switch (ring->me) {
5922 		case 1:
5923 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5924 			break;
5925 		case 2:
5926 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5927 			break;
5928 		default:
5929 			return;
5930 		}
5931 		reg_mem_engine = 0;
5932 	} else {
5933 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5934 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5935 	}
5936 
5937 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5938 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5939 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5940 				 reg_mem_engine));
5941 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5942 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5943 	amdgpu_ring_write(ring, ref_and_mask);
5944 	amdgpu_ring_write(ring, ref_and_mask);
5945 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5946 }
5947 
5948 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
5949 {
5950 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
5951 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
5952 		EVENT_INDEX(4));
5953 
5954 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
5955 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
5956 		EVENT_INDEX(0));
5957 }
5958 
5959 
5960 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5961 {
5962 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5963 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5964 				 WRITE_DATA_DST_SEL(0) |
5965 				 WR_CONFIRM));
5966 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5967 	amdgpu_ring_write(ring, 0);
5968 	amdgpu_ring_write(ring, 1);
5969 
5970 }
5971 
5972 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5973 				      struct amdgpu_ib *ib,
5974 				      unsigned vm_id, bool ctx_switch)
5975 {
5976 	u32 header, control = 0;
5977 
5978 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5979 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5980 	else
5981 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5982 
5983 	control |= ib->length_dw | (vm_id << 24);
5984 
5985 	amdgpu_ring_write(ring, header);
5986 	amdgpu_ring_write(ring,
5987 #ifdef __BIG_ENDIAN
5988 			  (2 << 0) |
5989 #endif
5990 			  (ib->gpu_addr & 0xFFFFFFFC));
5991 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5992 	amdgpu_ring_write(ring, control);
5993 }
5994 
5995 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5996 					  struct amdgpu_ib *ib,
5997 					  unsigned vm_id, bool ctx_switch)
5998 {
5999 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6000 
6001 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6002 	amdgpu_ring_write(ring,
6003 #ifdef __BIG_ENDIAN
6004 				(2 << 0) |
6005 #endif
6006 				(ib->gpu_addr & 0xFFFFFFFC));
6007 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6008 	amdgpu_ring_write(ring, control);
6009 }
6010 
6011 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6012 					 u64 seq, unsigned flags)
6013 {
6014 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6015 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6016 
6017 	/* EVENT_WRITE_EOP - flush caches, send int */
6018 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6019 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6020 				 EOP_TC_ACTION_EN |
6021 				 EOP_TC_WB_ACTION_EN |
6022 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6023 				 EVENT_INDEX(5)));
6024 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6025 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6026 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6027 	amdgpu_ring_write(ring, lower_32_bits(seq));
6028 	amdgpu_ring_write(ring, upper_32_bits(seq));
6029 
6030 }
6031 
6032 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6033 {
6034 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6035 	uint32_t seq = ring->fence_drv.sync_seq;
6036 	uint64_t addr = ring->fence_drv.gpu_addr;
6037 
6038 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6039 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6040 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6041 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6042 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6043 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6044 	amdgpu_ring_write(ring, seq);
6045 	amdgpu_ring_write(ring, 0xffffffff);
6046 	amdgpu_ring_write(ring, 4); /* poll interval */
6047 }
6048 
6049 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6050 					unsigned vm_id, uint64_t pd_addr)
6051 {
6052 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6053 
6054 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6055 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6056 				 WRITE_DATA_DST_SEL(0)) |
6057 				 WR_CONFIRM);
6058 	if (vm_id < 8) {
6059 		amdgpu_ring_write(ring,
6060 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6061 	} else {
6062 		amdgpu_ring_write(ring,
6063 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6064 	}
6065 	amdgpu_ring_write(ring, 0);
6066 	amdgpu_ring_write(ring, pd_addr >> 12);
6067 
6068 	/* bits 0-15 are the VM contexts0-15 */
6069 	/* invalidate the cache */
6070 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6071 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6072 				 WRITE_DATA_DST_SEL(0)));
6073 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6074 	amdgpu_ring_write(ring, 0);
6075 	amdgpu_ring_write(ring, 1 << vm_id);
6076 
6077 	/* wait for the invalidate to complete */
6078 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6079 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6080 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6081 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6082 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6083 	amdgpu_ring_write(ring, 0);
6084 	amdgpu_ring_write(ring, 0); /* ref */
6085 	amdgpu_ring_write(ring, 0); /* mask */
6086 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6087 
6088 	/* compute doesn't have PFP */
6089 	if (usepfp) {
6090 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6091 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6092 		amdgpu_ring_write(ring, 0x0);
6093 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6094 		amdgpu_ring_insert_nop(ring, 128);
6095 	}
6096 }
6097 
6098 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6099 {
6100 	return ring->adev->wb.wb[ring->wptr_offs];
6101 }
6102 
6103 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6104 {
6105 	struct amdgpu_device *adev = ring->adev;
6106 
6107 	/* XXX check if swapping is necessary on BE */
6108 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6109 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6110 }
6111 
6112 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6113 					     u64 addr, u64 seq,
6114 					     unsigned flags)
6115 {
6116 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6117 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6118 
6119 	/* RELEASE_MEM - flush caches, send int */
6120 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6121 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6122 				 EOP_TC_ACTION_EN |
6123 				 EOP_TC_WB_ACTION_EN |
6124 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6125 				 EVENT_INDEX(5)));
6126 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6127 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6128 	amdgpu_ring_write(ring, upper_32_bits(addr));
6129 	amdgpu_ring_write(ring, lower_32_bits(seq));
6130 	amdgpu_ring_write(ring, upper_32_bits(seq));
6131 }
6132 
6133 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6134 {
6135 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6136 	amdgpu_ring_write(ring, 0);
6137 }
6138 
6139 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6140 {
6141 	uint32_t dw2 = 0;
6142 
6143 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6144 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6145 		gfx_v8_0_ring_emit_vgt_flush(ring);
6146 		/* set load_global_config & load_global_uconfig */
6147 		dw2 |= 0x8001;
6148 		/* set load_cs_sh_regs */
6149 		dw2 |= 0x01000000;
6150 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6151 		dw2 |= 0x10002;
6152 
6153 		/* set load_ce_ram if preamble presented */
6154 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6155 			dw2 |= 0x10000000;
6156 	} else {
6157 		/* still load_ce_ram if this is the first time preamble presented
6158 		 * although there is no context switch happens.
6159 		 */
6160 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6161 			dw2 |= 0x10000000;
6162 	}
6163 
6164 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6165 	amdgpu_ring_write(ring, dw2);
6166 	amdgpu_ring_write(ring, 0);
6167 }
6168 
6169 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6170 						 enum amdgpu_interrupt_state state)
6171 {
6172 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6173 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6174 }
6175 
6176 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6177 						     int me, int pipe,
6178 						     enum amdgpu_interrupt_state state)
6179 {
6180 	/*
6181 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6182 	 * handles the setting of interrupts for this specific pipe. All other
6183 	 * pipes' interrupts are set by amdkfd.
6184 	 */
6185 
6186 	if (me == 1) {
6187 		switch (pipe) {
6188 		case 0:
6189 			break;
6190 		default:
6191 			DRM_DEBUG("invalid pipe %d\n", pipe);
6192 			return;
6193 		}
6194 	} else {
6195 		DRM_DEBUG("invalid me %d\n", me);
6196 		return;
6197 	}
6198 
6199 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6200 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6201 }
6202 
6203 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6204 					     struct amdgpu_irq_src *source,
6205 					     unsigned type,
6206 					     enum amdgpu_interrupt_state state)
6207 {
6208 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6209 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6210 
6211 	return 0;
6212 }
6213 
6214 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6215 					      struct amdgpu_irq_src *source,
6216 					      unsigned type,
6217 					      enum amdgpu_interrupt_state state)
6218 {
6219 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6220 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6221 
6222 	return 0;
6223 }
6224 
6225 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6226 					    struct amdgpu_irq_src *src,
6227 					    unsigned type,
6228 					    enum amdgpu_interrupt_state state)
6229 {
6230 	switch (type) {
6231 	case AMDGPU_CP_IRQ_GFX_EOP:
6232 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6233 		break;
6234 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6235 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6236 		break;
6237 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6238 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6239 		break;
6240 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6241 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6242 		break;
6243 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6244 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6245 		break;
6246 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6247 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6248 		break;
6249 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6250 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6251 		break;
6252 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6253 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6254 		break;
6255 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6256 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6257 		break;
6258 	default:
6259 		break;
6260 	}
6261 	return 0;
6262 }
6263 
6264 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6265 			    struct amdgpu_irq_src *source,
6266 			    struct amdgpu_iv_entry *entry)
6267 {
6268 	int i;
6269 	u8 me_id, pipe_id, queue_id;
6270 	struct amdgpu_ring *ring;
6271 
6272 	DRM_DEBUG("IH: CP EOP\n");
6273 	me_id = (entry->ring_id & 0x0c) >> 2;
6274 	pipe_id = (entry->ring_id & 0x03) >> 0;
6275 	queue_id = (entry->ring_id & 0x70) >> 4;
6276 
6277 	switch (me_id) {
6278 	case 0:
6279 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6280 		break;
6281 	case 1:
6282 	case 2:
6283 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6284 			ring = &adev->gfx.compute_ring[i];
6285 			/* Per-queue interrupt is supported for MEC starting from VI.
6286 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6287 			  */
6288 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6289 				amdgpu_fence_process(ring);
6290 		}
6291 		break;
6292 	}
6293 	return 0;
6294 }
6295 
6296 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6297 				 struct amdgpu_irq_src *source,
6298 				 struct amdgpu_iv_entry *entry)
6299 {
6300 	DRM_ERROR("Illegal register access in command stream\n");
6301 	schedule_work(&adev->reset_work);
6302 	return 0;
6303 }
6304 
6305 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6306 				  struct amdgpu_irq_src *source,
6307 				  struct amdgpu_iv_entry *entry)
6308 {
6309 	DRM_ERROR("Illegal instruction in command stream\n");
6310 	schedule_work(&adev->reset_work);
6311 	return 0;
6312 }
6313 
6314 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6315 	.name = "gfx_v8_0",
6316 	.early_init = gfx_v8_0_early_init,
6317 	.late_init = gfx_v8_0_late_init,
6318 	.sw_init = gfx_v8_0_sw_init,
6319 	.sw_fini = gfx_v8_0_sw_fini,
6320 	.hw_init = gfx_v8_0_hw_init,
6321 	.hw_fini = gfx_v8_0_hw_fini,
6322 	.suspend = gfx_v8_0_suspend,
6323 	.resume = gfx_v8_0_resume,
6324 	.is_idle = gfx_v8_0_is_idle,
6325 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6326 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6327 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6328 	.soft_reset = gfx_v8_0_soft_reset,
6329 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6330 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6331 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6332 };
6333 
6334 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6335 	.type = AMDGPU_RING_TYPE_GFX,
6336 	.align_mask = 0xff,
6337 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6338 	.get_rptr = gfx_v8_0_ring_get_rptr,
6339 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6340 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6341 	.emit_frame_size =
6342 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6343 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6344 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6345 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6346 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6347 		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6348 		2 + /* gfx_v8_ring_emit_sb */
6349 		3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6350 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
6351 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6352 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6353 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6354 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6355 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6356 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6357 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6358 	.test_ring = gfx_v8_0_ring_test_ring,
6359 	.test_ib = gfx_v8_0_ring_test_ib,
6360 	.insert_nop = amdgpu_ring_insert_nop,
6361 	.pad_ib = amdgpu_ring_generic_pad_ib,
6362 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6363 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6364 };
6365 
6366 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6367 	.type = AMDGPU_RING_TYPE_COMPUTE,
6368 	.align_mask = 0xff,
6369 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6370 	.get_rptr = gfx_v8_0_ring_get_rptr,
6371 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6372 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6373 	.emit_frame_size =
6374 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6375 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6376 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6377 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6378 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6379 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6380 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
6381 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6382 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6383 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6384 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6385 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6386 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6387 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6388 	.test_ring = gfx_v8_0_ring_test_ring,
6389 	.test_ib = gfx_v8_0_ring_test_ib,
6390 	.insert_nop = amdgpu_ring_insert_nop,
6391 	.pad_ib = amdgpu_ring_generic_pad_ib,
6392 };
6393 
6394 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6395 {
6396 	int i;
6397 
6398 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6399 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6400 
6401 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6402 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6403 }
6404 
6405 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6406 	.set = gfx_v8_0_set_eop_interrupt_state,
6407 	.process = gfx_v8_0_eop_irq,
6408 };
6409 
6410 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6411 	.set = gfx_v8_0_set_priv_reg_fault_state,
6412 	.process = gfx_v8_0_priv_reg_irq,
6413 };
6414 
6415 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6416 	.set = gfx_v8_0_set_priv_inst_fault_state,
6417 	.process = gfx_v8_0_priv_inst_irq,
6418 };
6419 
6420 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6421 {
6422 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6423 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6424 
6425 	adev->gfx.priv_reg_irq.num_types = 1;
6426 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6427 
6428 	adev->gfx.priv_inst_irq.num_types = 1;
6429 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6430 }
6431 
6432 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6433 {
6434 	switch (adev->asic_type) {
6435 	case CHIP_TOPAZ:
6436 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6437 		break;
6438 	case CHIP_STONEY:
6439 	case CHIP_CARRIZO:
6440 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6441 		break;
6442 	default:
6443 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6444 		break;
6445 	}
6446 }
6447 
6448 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6449 {
6450 	/* init asci gds info */
6451 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6452 	adev->gds.gws.total_size = 64;
6453 	adev->gds.oa.total_size = 16;
6454 
6455 	if (adev->gds.mem.total_size == 64 * 1024) {
6456 		adev->gds.mem.gfx_partition_size = 4096;
6457 		adev->gds.mem.cs_partition_size = 4096;
6458 
6459 		adev->gds.gws.gfx_partition_size = 4;
6460 		adev->gds.gws.cs_partition_size = 4;
6461 
6462 		adev->gds.oa.gfx_partition_size = 4;
6463 		adev->gds.oa.cs_partition_size = 1;
6464 	} else {
6465 		adev->gds.mem.gfx_partition_size = 1024;
6466 		adev->gds.mem.cs_partition_size = 1024;
6467 
6468 		adev->gds.gws.gfx_partition_size = 16;
6469 		adev->gds.gws.cs_partition_size = 16;
6470 
6471 		adev->gds.oa.gfx_partition_size = 4;
6472 		adev->gds.oa.cs_partition_size = 4;
6473 	}
6474 }
6475 
6476 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6477 						 u32 bitmap)
6478 {
6479 	u32 data;
6480 
6481 	if (!bitmap)
6482 		return;
6483 
6484 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6485 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6486 
6487 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6488 }
6489 
6490 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6491 {
6492 	u32 data, mask;
6493 
6494 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6495 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6496 
6497 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6498 
6499 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6500 }
6501 
6502 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6503 {
6504 	int i, j, k, counter, active_cu_number = 0;
6505 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6506 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6507 	unsigned disable_masks[4 * 2];
6508 
6509 	memset(cu_info, 0, sizeof(*cu_info));
6510 
6511 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6512 
6513 	mutex_lock(&adev->grbm_idx_mutex);
6514 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6515 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6516 			mask = 1;
6517 			ao_bitmap = 0;
6518 			counter = 0;
6519 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6520 			if (i < 4 && j < 2)
6521 				gfx_v8_0_set_user_cu_inactive_bitmap(
6522 					adev, disable_masks[i * 2 + j]);
6523 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6524 			cu_info->bitmap[i][j] = bitmap;
6525 
6526 			for (k = 0; k < 16; k ++) {
6527 				if (bitmap & mask) {
6528 					if (counter < 2)
6529 						ao_bitmap |= mask;
6530 					counter ++;
6531 				}
6532 				mask <<= 1;
6533 			}
6534 			active_cu_number += counter;
6535 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6536 		}
6537 	}
6538 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6539 	mutex_unlock(&adev->grbm_idx_mutex);
6540 
6541 	cu_info->number = active_cu_number;
6542 	cu_info->ao_cu_mask = ao_cu_mask;
6543 }
6544 
6545 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
6546 {
6547 	.type = AMD_IP_BLOCK_TYPE_GFX,
6548 	.major = 8,
6549 	.minor = 0,
6550 	.rev = 0,
6551 	.funcs = &gfx_v8_0_ip_funcs,
6552 };
6553 
6554 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
6555 {
6556 	.type = AMD_IP_BLOCK_TYPE_GFX,
6557 	.major = 8,
6558 	.minor = 1,
6559 	.rev = 0,
6560 	.funcs = &gfx_v8_0_ip_funcs,
6561 };
6562