xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 071bf69a0220253a44acb8b2a27f7a262b9a46bf)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
274 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
278 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
280 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
283 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
288 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
289 };
290 
291 static const u32 polaris11_golden_common_all[] =
292 {
293 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299 };
300 
301 static const u32 golden_settings_polaris10_a11[] =
302 {
303 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
304 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305 	mmCB_HW_CONTROL_2, 0, 0x0f000000,
306 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
315 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 };
321 
322 static const u32 polaris10_golden_common_all[] =
323 {
324 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332 };
333 
334 static const u32 fiji_golden_common_all[] =
335 {
336 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
339 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
346 };
347 
348 static const u32 golden_settings_fiji_a10[] =
349 {
350 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
356 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
357 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
359 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
360 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
361 };
362 
363 static const u32 fiji_mgcg_cgcg_init[] =
364 {
365 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
366 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400 };
401 
402 static const u32 golden_settings_iceland_a11[] =
403 {
404 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
408 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
412 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
413 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
415 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
416 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419 };
420 
421 static const u32 iceland_golden_common_all[] =
422 {
423 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431 };
432 
433 static const u32 iceland_mgcg_cgcg_init[] =
434 {
435 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499 };
500 
501 static const u32 cz_golden_settings_a11[] =
502 {
503 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
506 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
508 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
509 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
510 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
511 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513 };
514 
515 static const u32 cz_golden_common_all[] =
516 {
517 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525 };
526 
527 static const u32 cz_mgcg_cgcg_init[] =
528 {
529 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604 };
605 
606 static const u32 stoney_golden_settings_a11[] =
607 {
608 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
610 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
614 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
615 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618 };
619 
620 static const u32 stoney_golden_common_all[] =
621 {
622 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630 };
631 
632 static const u32 stoney_mgcg_cgcg_init[] =
633 {
634 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640 };
641 
642 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
645 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
646 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
647 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
648 
649 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650 {
651 	switch (adev->asic_type) {
652 	case CHIP_TOPAZ:
653 		amdgpu_program_register_sequence(adev,
654 						 iceland_mgcg_cgcg_init,
655 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656 		amdgpu_program_register_sequence(adev,
657 						 golden_settings_iceland_a11,
658 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659 		amdgpu_program_register_sequence(adev,
660 						 iceland_golden_common_all,
661 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
662 		break;
663 	case CHIP_FIJI:
664 		amdgpu_program_register_sequence(adev,
665 						 fiji_mgcg_cgcg_init,
666 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667 		amdgpu_program_register_sequence(adev,
668 						 golden_settings_fiji_a10,
669 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670 		amdgpu_program_register_sequence(adev,
671 						 fiji_golden_common_all,
672 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
673 		break;
674 
675 	case CHIP_TONGA:
676 		amdgpu_program_register_sequence(adev,
677 						 tonga_mgcg_cgcg_init,
678 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679 		amdgpu_program_register_sequence(adev,
680 						 golden_settings_tonga_a11,
681 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682 		amdgpu_program_register_sequence(adev,
683 						 tonga_golden_common_all,
684 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
685 		break;
686 	case CHIP_POLARIS11:
687 		amdgpu_program_register_sequence(adev,
688 						 golden_settings_polaris11_a11,
689 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
690 		amdgpu_program_register_sequence(adev,
691 						 polaris11_golden_common_all,
692 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
693 		break;
694 	case CHIP_POLARIS10:
695 		amdgpu_program_register_sequence(adev,
696 						 golden_settings_polaris10_a11,
697 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
698 		amdgpu_program_register_sequence(adev,
699 						 polaris10_golden_common_all,
700 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
701 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
702 		if (adev->pdev->revision == 0xc7) {
703 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705 		}
706 		break;
707 	case CHIP_CARRIZO:
708 		amdgpu_program_register_sequence(adev,
709 						 cz_mgcg_cgcg_init,
710 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711 		amdgpu_program_register_sequence(adev,
712 						 cz_golden_settings_a11,
713 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714 		amdgpu_program_register_sequence(adev,
715 						 cz_golden_common_all,
716 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
717 		break;
718 	case CHIP_STONEY:
719 		amdgpu_program_register_sequence(adev,
720 						 stoney_mgcg_cgcg_init,
721 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722 		amdgpu_program_register_sequence(adev,
723 						 stoney_golden_settings_a11,
724 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725 		amdgpu_program_register_sequence(adev,
726 						 stoney_golden_common_all,
727 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
728 		break;
729 	default:
730 		break;
731 	}
732 }
733 
734 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735 {
736 	int i;
737 
738 	adev->gfx.scratch.num_reg = 7;
739 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741 		adev->gfx.scratch.free[i] = true;
742 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743 	}
744 }
745 
746 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747 {
748 	struct amdgpu_device *adev = ring->adev;
749 	uint32_t scratch;
750 	uint32_t tmp = 0;
751 	unsigned i;
752 	int r;
753 
754 	r = amdgpu_gfx_scratch_get(adev, &scratch);
755 	if (r) {
756 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757 		return r;
758 	}
759 	WREG32(scratch, 0xCAFEDEAD);
760 	r = amdgpu_ring_alloc(ring, 3);
761 	if (r) {
762 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763 			  ring->idx, r);
764 		amdgpu_gfx_scratch_free(adev, scratch);
765 		return r;
766 	}
767 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769 	amdgpu_ring_write(ring, 0xDEADBEEF);
770 	amdgpu_ring_commit(ring);
771 
772 	for (i = 0; i < adev->usec_timeout; i++) {
773 		tmp = RREG32(scratch);
774 		if (tmp == 0xDEADBEEF)
775 			break;
776 		DRM_UDELAY(1);
777 	}
778 	if (i < adev->usec_timeout) {
779 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
780 			 ring->idx, i);
781 	} else {
782 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783 			  ring->idx, scratch, tmp);
784 		r = -EINVAL;
785 	}
786 	amdgpu_gfx_scratch_free(adev, scratch);
787 	return r;
788 }
789 
790 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
791 {
792 	struct amdgpu_device *adev = ring->adev;
793 	struct amdgpu_ib ib;
794 	struct fence *f = NULL;
795 	uint32_t scratch;
796 	uint32_t tmp = 0;
797 	long r;
798 
799 	r = amdgpu_gfx_scratch_get(adev, &scratch);
800 	if (r) {
801 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
802 		return r;
803 	}
804 	WREG32(scratch, 0xCAFEDEAD);
805 	memset(&ib, 0, sizeof(ib));
806 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
807 	if (r) {
808 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
809 		goto err1;
810 	}
811 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
812 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
813 	ib.ptr[2] = 0xDEADBEEF;
814 	ib.length_dw = 3;
815 
816 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
817 	if (r)
818 		goto err2;
819 
820 	r = fence_wait_timeout(f, false, timeout);
821 	if (r == 0) {
822 		DRM_ERROR("amdgpu: IB test timed out.\n");
823 		r = -ETIMEDOUT;
824 		goto err2;
825 	} else if (r < 0) {
826 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
827 		goto err2;
828 	}
829 	tmp = RREG32(scratch);
830 	if (tmp == 0xDEADBEEF) {
831 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
832 		r = 0;
833 	} else {
834 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
835 			  scratch, tmp);
836 		r = -EINVAL;
837 	}
838 err2:
839 	amdgpu_ib_free(adev, &ib, NULL);
840 	fence_put(f);
841 err1:
842 	amdgpu_gfx_scratch_free(adev, scratch);
843 	return r;
844 }
845 
846 
847 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
848 	release_firmware(adev->gfx.pfp_fw);
849 	adev->gfx.pfp_fw = NULL;
850 	release_firmware(adev->gfx.me_fw);
851 	adev->gfx.me_fw = NULL;
852 	release_firmware(adev->gfx.ce_fw);
853 	adev->gfx.ce_fw = NULL;
854 	release_firmware(adev->gfx.rlc_fw);
855 	adev->gfx.rlc_fw = NULL;
856 	release_firmware(adev->gfx.mec_fw);
857 	adev->gfx.mec_fw = NULL;
858 	if ((adev->asic_type != CHIP_STONEY) &&
859 	    (adev->asic_type != CHIP_TOPAZ))
860 		release_firmware(adev->gfx.mec2_fw);
861 	adev->gfx.mec2_fw = NULL;
862 
863 	kfree(adev->gfx.rlc.register_list_format);
864 }
865 
866 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
867 {
868 	const char *chip_name;
869 	char fw_name[30];
870 	int err;
871 	struct amdgpu_firmware_info *info = NULL;
872 	const struct common_firmware_header *header = NULL;
873 	const struct gfx_firmware_header_v1_0 *cp_hdr;
874 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
875 	unsigned int *tmp = NULL, i;
876 
877 	DRM_DEBUG("\n");
878 
879 	switch (adev->asic_type) {
880 	case CHIP_TOPAZ:
881 		chip_name = "topaz";
882 		break;
883 	case CHIP_TONGA:
884 		chip_name = "tonga";
885 		break;
886 	case CHIP_CARRIZO:
887 		chip_name = "carrizo";
888 		break;
889 	case CHIP_FIJI:
890 		chip_name = "fiji";
891 		break;
892 	case CHIP_POLARIS11:
893 		chip_name = "polaris11";
894 		break;
895 	case CHIP_POLARIS10:
896 		chip_name = "polaris10";
897 		break;
898 	case CHIP_STONEY:
899 		chip_name = "stoney";
900 		break;
901 	default:
902 		BUG();
903 	}
904 
905 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
906 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
907 	if (err)
908 		goto out;
909 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
910 	if (err)
911 		goto out;
912 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
913 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
914 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
915 
916 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
917 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
918 	if (err)
919 		goto out;
920 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
921 	if (err)
922 		goto out;
923 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
924 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
925 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
926 
927 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
928 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
929 	if (err)
930 		goto out;
931 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
932 	if (err)
933 		goto out;
934 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
935 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
936 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
937 
938 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
939 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
940 	if (err)
941 		goto out;
942 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
943 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
944 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
945 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
946 
947 	adev->gfx.rlc.save_and_restore_offset =
948 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
949 	adev->gfx.rlc.clear_state_descriptor_offset =
950 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
951 	adev->gfx.rlc.avail_scratch_ram_locations =
952 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
953 	adev->gfx.rlc.reg_restore_list_size =
954 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
955 	adev->gfx.rlc.reg_list_format_start =
956 			le32_to_cpu(rlc_hdr->reg_list_format_start);
957 	adev->gfx.rlc.reg_list_format_separate_start =
958 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
959 	adev->gfx.rlc.starting_offsets_start =
960 			le32_to_cpu(rlc_hdr->starting_offsets_start);
961 	adev->gfx.rlc.reg_list_format_size_bytes =
962 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
963 	adev->gfx.rlc.reg_list_size_bytes =
964 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
965 
966 	adev->gfx.rlc.register_list_format =
967 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
968 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
969 
970 	if (!adev->gfx.rlc.register_list_format) {
971 		err = -ENOMEM;
972 		goto out;
973 	}
974 
975 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
976 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
977 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
978 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
979 
980 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
981 
982 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
984 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
985 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
986 
987 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
988 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
989 	if (err)
990 		goto out;
991 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
992 	if (err)
993 		goto out;
994 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
995 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997 
998 	if ((adev->asic_type != CHIP_STONEY) &&
999 	    (adev->asic_type != CHIP_TOPAZ)) {
1000 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1001 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1002 		if (!err) {
1003 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1004 			if (err)
1005 				goto out;
1006 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1007 				adev->gfx.mec2_fw->data;
1008 			adev->gfx.mec2_fw_version =
1009 				le32_to_cpu(cp_hdr->header.ucode_version);
1010 			adev->gfx.mec2_feature_version =
1011 				le32_to_cpu(cp_hdr->ucode_feature_version);
1012 		} else {
1013 			err = 0;
1014 			adev->gfx.mec2_fw = NULL;
1015 		}
1016 	}
1017 
1018 	if (adev->firmware.smu_load) {
1019 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1020 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1021 		info->fw = adev->gfx.pfp_fw;
1022 		header = (const struct common_firmware_header *)info->fw->data;
1023 		adev->firmware.fw_size +=
1024 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1025 
1026 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1027 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1028 		info->fw = adev->gfx.me_fw;
1029 		header = (const struct common_firmware_header *)info->fw->data;
1030 		adev->firmware.fw_size +=
1031 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032 
1033 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1034 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1035 		info->fw = adev->gfx.ce_fw;
1036 		header = (const struct common_firmware_header *)info->fw->data;
1037 		adev->firmware.fw_size +=
1038 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039 
1040 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1041 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1042 		info->fw = adev->gfx.rlc_fw;
1043 		header = (const struct common_firmware_header *)info->fw->data;
1044 		adev->firmware.fw_size +=
1045 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046 
1047 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1048 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1049 		info->fw = adev->gfx.mec_fw;
1050 		header = (const struct common_firmware_header *)info->fw->data;
1051 		adev->firmware.fw_size +=
1052 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053 
1054 		if (adev->gfx.mec2_fw) {
1055 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1056 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1057 			info->fw = adev->gfx.mec2_fw;
1058 			header = (const struct common_firmware_header *)info->fw->data;
1059 			adev->firmware.fw_size +=
1060 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 		}
1062 
1063 	}
1064 
1065 out:
1066 	if (err) {
1067 		dev_err(adev->dev,
1068 			"gfx8: Failed to load firmware \"%s\"\n",
1069 			fw_name);
1070 		release_firmware(adev->gfx.pfp_fw);
1071 		adev->gfx.pfp_fw = NULL;
1072 		release_firmware(adev->gfx.me_fw);
1073 		adev->gfx.me_fw = NULL;
1074 		release_firmware(adev->gfx.ce_fw);
1075 		adev->gfx.ce_fw = NULL;
1076 		release_firmware(adev->gfx.rlc_fw);
1077 		adev->gfx.rlc_fw = NULL;
1078 		release_firmware(adev->gfx.mec_fw);
1079 		adev->gfx.mec_fw = NULL;
1080 		release_firmware(adev->gfx.mec2_fw);
1081 		adev->gfx.mec2_fw = NULL;
1082 	}
1083 	return err;
1084 }
1085 
1086 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1087 				    volatile u32 *buffer)
1088 {
1089 	u32 count = 0, i;
1090 	const struct cs_section_def *sect = NULL;
1091 	const struct cs_extent_def *ext = NULL;
1092 
1093 	if (adev->gfx.rlc.cs_data == NULL)
1094 		return;
1095 	if (buffer == NULL)
1096 		return;
1097 
1098 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1099 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1100 
1101 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1102 	buffer[count++] = cpu_to_le32(0x80000000);
1103 	buffer[count++] = cpu_to_le32(0x80000000);
1104 
1105 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1106 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1107 			if (sect->id == SECT_CONTEXT) {
1108 				buffer[count++] =
1109 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1110 				buffer[count++] = cpu_to_le32(ext->reg_index -
1111 						PACKET3_SET_CONTEXT_REG_START);
1112 				for (i = 0; i < ext->reg_count; i++)
1113 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1114 			} else {
1115 				return;
1116 			}
1117 		}
1118 	}
1119 
1120 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1121 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1122 			PACKET3_SET_CONTEXT_REG_START);
1123 	switch (adev->asic_type) {
1124 	case CHIP_TONGA:
1125 	case CHIP_POLARIS10:
1126 		buffer[count++] = cpu_to_le32(0x16000012);
1127 		buffer[count++] = cpu_to_le32(0x0000002A);
1128 		break;
1129 	case CHIP_POLARIS11:
1130 		buffer[count++] = cpu_to_le32(0x16000012);
1131 		buffer[count++] = cpu_to_le32(0x00000000);
1132 		break;
1133 	case CHIP_FIJI:
1134 		buffer[count++] = cpu_to_le32(0x3a00161a);
1135 		buffer[count++] = cpu_to_le32(0x0000002e);
1136 		break;
1137 	case CHIP_TOPAZ:
1138 	case CHIP_CARRIZO:
1139 		buffer[count++] = cpu_to_le32(0x00000002);
1140 		buffer[count++] = cpu_to_le32(0x00000000);
1141 		break;
1142 	case CHIP_STONEY:
1143 		buffer[count++] = cpu_to_le32(0x00000000);
1144 		buffer[count++] = cpu_to_le32(0x00000000);
1145 		break;
1146 	default:
1147 		buffer[count++] = cpu_to_le32(0x00000000);
1148 		buffer[count++] = cpu_to_le32(0x00000000);
1149 		break;
1150 	}
1151 
1152 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1153 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1154 
1155 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1156 	buffer[count++] = cpu_to_le32(0);
1157 }
1158 
1159 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1160 {
1161 	const __le32 *fw_data;
1162 	volatile u32 *dst_ptr;
1163 	int me, i, max_me = 4;
1164 	u32 bo_offset = 0;
1165 	u32 table_offset, table_size;
1166 
1167 	if (adev->asic_type == CHIP_CARRIZO)
1168 		max_me = 5;
1169 
1170 	/* write the cp table buffer */
1171 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1172 	for (me = 0; me < max_me; me++) {
1173 		if (me == 0) {
1174 			const struct gfx_firmware_header_v1_0 *hdr =
1175 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1176 			fw_data = (const __le32 *)
1177 				(adev->gfx.ce_fw->data +
1178 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1179 			table_offset = le32_to_cpu(hdr->jt_offset);
1180 			table_size = le32_to_cpu(hdr->jt_size);
1181 		} else if (me == 1) {
1182 			const struct gfx_firmware_header_v1_0 *hdr =
1183 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1184 			fw_data = (const __le32 *)
1185 				(adev->gfx.pfp_fw->data +
1186 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1187 			table_offset = le32_to_cpu(hdr->jt_offset);
1188 			table_size = le32_to_cpu(hdr->jt_size);
1189 		} else if (me == 2) {
1190 			const struct gfx_firmware_header_v1_0 *hdr =
1191 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1192 			fw_data = (const __le32 *)
1193 				(adev->gfx.me_fw->data +
1194 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1195 			table_offset = le32_to_cpu(hdr->jt_offset);
1196 			table_size = le32_to_cpu(hdr->jt_size);
1197 		} else if (me == 3) {
1198 			const struct gfx_firmware_header_v1_0 *hdr =
1199 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1200 			fw_data = (const __le32 *)
1201 				(adev->gfx.mec_fw->data +
1202 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1203 			table_offset = le32_to_cpu(hdr->jt_offset);
1204 			table_size = le32_to_cpu(hdr->jt_size);
1205 		} else  if (me == 4) {
1206 			const struct gfx_firmware_header_v1_0 *hdr =
1207 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1208 			fw_data = (const __le32 *)
1209 				(adev->gfx.mec2_fw->data +
1210 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1211 			table_offset = le32_to_cpu(hdr->jt_offset);
1212 			table_size = le32_to_cpu(hdr->jt_size);
1213 		}
1214 
1215 		for (i = 0; i < table_size; i ++) {
1216 			dst_ptr[bo_offset + i] =
1217 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1218 		}
1219 
1220 		bo_offset += table_size;
1221 	}
1222 }
1223 
1224 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1225 {
1226 	int r;
1227 
1228 	/* clear state block */
1229 	if (adev->gfx.rlc.clear_state_obj) {
1230 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1231 		if (unlikely(r != 0))
1232 			dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1233 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1234 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1235 
1236 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1237 		adev->gfx.rlc.clear_state_obj = NULL;
1238 	}
1239 
1240 	/* jump table block */
1241 	if (adev->gfx.rlc.cp_table_obj) {
1242 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1243 		if (unlikely(r != 0))
1244 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1245 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1246 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1247 
1248 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1249 		adev->gfx.rlc.cp_table_obj = NULL;
1250 	}
1251 }
1252 
1253 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1254 {
1255 	volatile u32 *dst_ptr;
1256 	u32 dws;
1257 	const struct cs_section_def *cs_data;
1258 	int r;
1259 
1260 	adev->gfx.rlc.cs_data = vi_cs_data;
1261 
1262 	cs_data = adev->gfx.rlc.cs_data;
1263 
1264 	if (cs_data) {
1265 		/* clear state block */
1266 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1267 
1268 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1269 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1270 					     AMDGPU_GEM_DOMAIN_VRAM,
1271 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1272 					     NULL, NULL,
1273 					     &adev->gfx.rlc.clear_state_obj);
1274 			if (r) {
1275 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1276 				gfx_v8_0_rlc_fini(adev);
1277 				return r;
1278 			}
1279 		}
1280 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1281 		if (unlikely(r != 0)) {
1282 			gfx_v8_0_rlc_fini(adev);
1283 			return r;
1284 		}
1285 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1286 				  &adev->gfx.rlc.clear_state_gpu_addr);
1287 		if (r) {
1288 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1289 			dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1290 			gfx_v8_0_rlc_fini(adev);
1291 			return r;
1292 		}
1293 
1294 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1295 		if (r) {
1296 			dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1297 			gfx_v8_0_rlc_fini(adev);
1298 			return r;
1299 		}
1300 		/* set up the cs buffer */
1301 		dst_ptr = adev->gfx.rlc.cs_ptr;
1302 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1303 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1304 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1305 	}
1306 
1307 	if ((adev->asic_type == CHIP_CARRIZO) ||
1308 	    (adev->asic_type == CHIP_STONEY)) {
1309 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1310 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1311 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1312 					     AMDGPU_GEM_DOMAIN_VRAM,
1313 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1314 					     NULL, NULL,
1315 					     &adev->gfx.rlc.cp_table_obj);
1316 			if (r) {
1317 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1318 				return r;
1319 			}
1320 		}
1321 
1322 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1323 		if (unlikely(r != 0)) {
1324 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1325 			return r;
1326 		}
1327 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1328 				  &adev->gfx.rlc.cp_table_gpu_addr);
1329 		if (r) {
1330 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1331 			dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1332 			return r;
1333 		}
1334 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1335 		if (r) {
1336 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1337 			return r;
1338 		}
1339 
1340 		cz_init_cp_jump_table(adev);
1341 
1342 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1343 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1344 
1345 	}
1346 
1347 	return 0;
1348 }
1349 
1350 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1351 {
1352 	int r;
1353 
1354 	if (adev->gfx.mec.hpd_eop_obj) {
1355 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1356 		if (unlikely(r != 0))
1357 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1358 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1359 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1360 
1361 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1362 		adev->gfx.mec.hpd_eop_obj = NULL;
1363 	}
1364 }
1365 
1366 #define MEC_HPD_SIZE 2048
1367 
1368 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1369 {
1370 	int r;
1371 	u32 *hpd;
1372 
1373 	/*
1374 	 * we assign only 1 pipe because all other pipes will
1375 	 * be handled by KFD
1376 	 */
1377 	adev->gfx.mec.num_mec = 1;
1378 	adev->gfx.mec.num_pipe = 1;
1379 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1380 
1381 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1382 		r = amdgpu_bo_create(adev,
1383 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1384 				     PAGE_SIZE, true,
1385 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1386 				     &adev->gfx.mec.hpd_eop_obj);
1387 		if (r) {
1388 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1389 			return r;
1390 		}
1391 	}
1392 
1393 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1394 	if (unlikely(r != 0)) {
1395 		gfx_v8_0_mec_fini(adev);
1396 		return r;
1397 	}
1398 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1399 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1400 	if (r) {
1401 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1402 		gfx_v8_0_mec_fini(adev);
1403 		return r;
1404 	}
1405 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1406 	if (r) {
1407 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1408 		gfx_v8_0_mec_fini(adev);
1409 		return r;
1410 	}
1411 
1412 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1413 
1414 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1415 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1416 
1417 	return 0;
1418 }
1419 
1420 static const u32 vgpr_init_compute_shader[] =
1421 {
1422 	0x7e000209, 0x7e020208,
1423 	0x7e040207, 0x7e060206,
1424 	0x7e080205, 0x7e0a0204,
1425 	0x7e0c0203, 0x7e0e0202,
1426 	0x7e100201, 0x7e120200,
1427 	0x7e140209, 0x7e160208,
1428 	0x7e180207, 0x7e1a0206,
1429 	0x7e1c0205, 0x7e1e0204,
1430 	0x7e200203, 0x7e220202,
1431 	0x7e240201, 0x7e260200,
1432 	0x7e280209, 0x7e2a0208,
1433 	0x7e2c0207, 0x7e2e0206,
1434 	0x7e300205, 0x7e320204,
1435 	0x7e340203, 0x7e360202,
1436 	0x7e380201, 0x7e3a0200,
1437 	0x7e3c0209, 0x7e3e0208,
1438 	0x7e400207, 0x7e420206,
1439 	0x7e440205, 0x7e460204,
1440 	0x7e480203, 0x7e4a0202,
1441 	0x7e4c0201, 0x7e4e0200,
1442 	0x7e500209, 0x7e520208,
1443 	0x7e540207, 0x7e560206,
1444 	0x7e580205, 0x7e5a0204,
1445 	0x7e5c0203, 0x7e5e0202,
1446 	0x7e600201, 0x7e620200,
1447 	0x7e640209, 0x7e660208,
1448 	0x7e680207, 0x7e6a0206,
1449 	0x7e6c0205, 0x7e6e0204,
1450 	0x7e700203, 0x7e720202,
1451 	0x7e740201, 0x7e760200,
1452 	0x7e780209, 0x7e7a0208,
1453 	0x7e7c0207, 0x7e7e0206,
1454 	0xbf8a0000, 0xbf810000,
1455 };
1456 
1457 static const u32 sgpr_init_compute_shader[] =
1458 {
1459 	0xbe8a0100, 0xbe8c0102,
1460 	0xbe8e0104, 0xbe900106,
1461 	0xbe920108, 0xbe940100,
1462 	0xbe960102, 0xbe980104,
1463 	0xbe9a0106, 0xbe9c0108,
1464 	0xbe9e0100, 0xbea00102,
1465 	0xbea20104, 0xbea40106,
1466 	0xbea60108, 0xbea80100,
1467 	0xbeaa0102, 0xbeac0104,
1468 	0xbeae0106, 0xbeb00108,
1469 	0xbeb20100, 0xbeb40102,
1470 	0xbeb60104, 0xbeb80106,
1471 	0xbeba0108, 0xbebc0100,
1472 	0xbebe0102, 0xbec00104,
1473 	0xbec20106, 0xbec40108,
1474 	0xbec60100, 0xbec80102,
1475 	0xbee60004, 0xbee70005,
1476 	0xbeea0006, 0xbeeb0007,
1477 	0xbee80008, 0xbee90009,
1478 	0xbefc0000, 0xbf8a0000,
1479 	0xbf810000, 0x00000000,
1480 };
1481 
1482 static const u32 vgpr_init_regs[] =
1483 {
1484 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1485 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1486 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1487 	mmCOMPUTE_NUM_THREAD_Y, 1,
1488 	mmCOMPUTE_NUM_THREAD_Z, 1,
1489 	mmCOMPUTE_PGM_RSRC2, 20,
1490 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1491 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1492 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1493 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1494 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1495 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1496 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1497 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1498 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1499 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1500 };
1501 
1502 static const u32 sgpr1_init_regs[] =
1503 {
1504 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1505 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1506 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1507 	mmCOMPUTE_NUM_THREAD_Y, 1,
1508 	mmCOMPUTE_NUM_THREAD_Z, 1,
1509 	mmCOMPUTE_PGM_RSRC2, 20,
1510 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1511 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1512 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1513 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1514 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1515 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1516 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1517 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1518 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1519 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1520 };
1521 
1522 static const u32 sgpr2_init_regs[] =
1523 {
1524 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1525 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1526 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1527 	mmCOMPUTE_NUM_THREAD_Y, 1,
1528 	mmCOMPUTE_NUM_THREAD_Z, 1,
1529 	mmCOMPUTE_PGM_RSRC2, 20,
1530 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1531 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1532 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1533 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1534 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1535 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1536 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1537 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1538 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1539 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1540 };
1541 
1542 static const u32 sec_ded_counter_registers[] =
1543 {
1544 	mmCPC_EDC_ATC_CNT,
1545 	mmCPC_EDC_SCRATCH_CNT,
1546 	mmCPC_EDC_UCODE_CNT,
1547 	mmCPF_EDC_ATC_CNT,
1548 	mmCPF_EDC_ROQ_CNT,
1549 	mmCPF_EDC_TAG_CNT,
1550 	mmCPG_EDC_ATC_CNT,
1551 	mmCPG_EDC_DMA_CNT,
1552 	mmCPG_EDC_TAG_CNT,
1553 	mmDC_EDC_CSINVOC_CNT,
1554 	mmDC_EDC_RESTORE_CNT,
1555 	mmDC_EDC_STATE_CNT,
1556 	mmGDS_EDC_CNT,
1557 	mmGDS_EDC_GRBM_CNT,
1558 	mmGDS_EDC_OA_DED,
1559 	mmSPI_EDC_CNT,
1560 	mmSQC_ATC_EDC_GATCL1_CNT,
1561 	mmSQC_EDC_CNT,
1562 	mmSQ_EDC_DED_CNT,
1563 	mmSQ_EDC_INFO,
1564 	mmSQ_EDC_SEC_CNT,
1565 	mmTCC_EDC_CNT,
1566 	mmTCP_ATC_EDC_GATCL1_CNT,
1567 	mmTCP_EDC_CNT,
1568 	mmTD_EDC_CNT
1569 };
1570 
1571 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1572 {
1573 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1574 	struct amdgpu_ib ib;
1575 	struct fence *f = NULL;
1576 	int r, i;
1577 	u32 tmp;
1578 	unsigned total_size, vgpr_offset, sgpr_offset;
1579 	u64 gpu_addr;
1580 
1581 	/* only supported on CZ */
1582 	if (adev->asic_type != CHIP_CARRIZO)
1583 		return 0;
1584 
1585 	/* bail if the compute ring is not ready */
1586 	if (!ring->ready)
1587 		return 0;
1588 
1589 	tmp = RREG32(mmGB_EDC_MODE);
1590 	WREG32(mmGB_EDC_MODE, 0);
1591 
1592 	total_size =
1593 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1594 	total_size +=
1595 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1596 	total_size +=
1597 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598 	total_size = ALIGN(total_size, 256);
1599 	vgpr_offset = total_size;
1600 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1601 	sgpr_offset = total_size;
1602 	total_size += sizeof(sgpr_init_compute_shader);
1603 
1604 	/* allocate an indirect buffer to put the commands in */
1605 	memset(&ib, 0, sizeof(ib));
1606 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1607 	if (r) {
1608 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1609 		return r;
1610 	}
1611 
1612 	/* load the compute shaders */
1613 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1614 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1615 
1616 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1617 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1618 
1619 	/* init the ib length to 0 */
1620 	ib.length_dw = 0;
1621 
1622 	/* VGPR */
1623 	/* write the register state for the compute dispatch */
1624 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1625 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1626 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1627 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1628 	}
1629 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1630 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1631 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1632 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1633 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1634 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1635 
1636 	/* write dispatch packet */
1637 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1638 	ib.ptr[ib.length_dw++] = 8; /* x */
1639 	ib.ptr[ib.length_dw++] = 1; /* y */
1640 	ib.ptr[ib.length_dw++] = 1; /* z */
1641 	ib.ptr[ib.length_dw++] =
1642 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1643 
1644 	/* write CS partial flush packet */
1645 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1646 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1647 
1648 	/* SGPR1 */
1649 	/* write the register state for the compute dispatch */
1650 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1651 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1652 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1653 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1654 	}
1655 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1656 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1657 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1658 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1659 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1660 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1661 
1662 	/* write dispatch packet */
1663 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1664 	ib.ptr[ib.length_dw++] = 8; /* x */
1665 	ib.ptr[ib.length_dw++] = 1; /* y */
1666 	ib.ptr[ib.length_dw++] = 1; /* z */
1667 	ib.ptr[ib.length_dw++] =
1668 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1669 
1670 	/* write CS partial flush packet */
1671 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1672 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1673 
1674 	/* SGPR2 */
1675 	/* write the register state for the compute dispatch */
1676 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1677 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1678 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1679 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1680 	}
1681 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1682 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1683 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1684 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1685 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1686 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1687 
1688 	/* write dispatch packet */
1689 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1690 	ib.ptr[ib.length_dw++] = 8; /* x */
1691 	ib.ptr[ib.length_dw++] = 1; /* y */
1692 	ib.ptr[ib.length_dw++] = 1; /* z */
1693 	ib.ptr[ib.length_dw++] =
1694 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1695 
1696 	/* write CS partial flush packet */
1697 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1698 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1699 
1700 	/* shedule the ib on the ring */
1701 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1702 	if (r) {
1703 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1704 		goto fail;
1705 	}
1706 
1707 	/* wait for the GPU to finish processing the IB */
1708 	r = fence_wait(f, false);
1709 	if (r) {
1710 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1711 		goto fail;
1712 	}
1713 
1714 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1715 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1716 	WREG32(mmGB_EDC_MODE, tmp);
1717 
1718 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1719 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1720 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1721 
1722 
1723 	/* read back registers to clear the counters */
1724 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1725 		RREG32(sec_ded_counter_registers[i]);
1726 
1727 fail:
1728 	amdgpu_ib_free(adev, &ib, NULL);
1729 	fence_put(f);
1730 
1731 	return r;
1732 }
1733 
1734 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1735 {
1736 	u32 gb_addr_config;
1737 	u32 mc_shared_chmap, mc_arb_ramcfg;
1738 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1739 	u32 tmp;
1740 	int ret;
1741 
1742 	switch (adev->asic_type) {
1743 	case CHIP_TOPAZ:
1744 		adev->gfx.config.max_shader_engines = 1;
1745 		adev->gfx.config.max_tile_pipes = 2;
1746 		adev->gfx.config.max_cu_per_sh = 6;
1747 		adev->gfx.config.max_sh_per_se = 1;
1748 		adev->gfx.config.max_backends_per_se = 2;
1749 		adev->gfx.config.max_texture_channel_caches = 2;
1750 		adev->gfx.config.max_gprs = 256;
1751 		adev->gfx.config.max_gs_threads = 32;
1752 		adev->gfx.config.max_hw_contexts = 8;
1753 
1754 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1755 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1756 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1757 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1758 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1759 		break;
1760 	case CHIP_FIJI:
1761 		adev->gfx.config.max_shader_engines = 4;
1762 		adev->gfx.config.max_tile_pipes = 16;
1763 		adev->gfx.config.max_cu_per_sh = 16;
1764 		adev->gfx.config.max_sh_per_se = 1;
1765 		adev->gfx.config.max_backends_per_se = 4;
1766 		adev->gfx.config.max_texture_channel_caches = 16;
1767 		adev->gfx.config.max_gprs = 256;
1768 		adev->gfx.config.max_gs_threads = 32;
1769 		adev->gfx.config.max_hw_contexts = 8;
1770 
1771 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1772 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1773 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1774 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1775 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1776 		break;
1777 	case CHIP_POLARIS11:
1778 		ret = amdgpu_atombios_get_gfx_info(adev);
1779 		if (ret)
1780 			return ret;
1781 		adev->gfx.config.max_gprs = 256;
1782 		adev->gfx.config.max_gs_threads = 32;
1783 		adev->gfx.config.max_hw_contexts = 8;
1784 
1785 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1786 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1787 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1788 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1789 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1790 		break;
1791 	case CHIP_POLARIS10:
1792 		ret = amdgpu_atombios_get_gfx_info(adev);
1793 		if (ret)
1794 			return ret;
1795 		adev->gfx.config.max_gprs = 256;
1796 		adev->gfx.config.max_gs_threads = 32;
1797 		adev->gfx.config.max_hw_contexts = 8;
1798 
1799 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1800 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1801 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1802 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1803 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1804 		break;
1805 	case CHIP_TONGA:
1806 		adev->gfx.config.max_shader_engines = 4;
1807 		adev->gfx.config.max_tile_pipes = 8;
1808 		adev->gfx.config.max_cu_per_sh = 8;
1809 		adev->gfx.config.max_sh_per_se = 1;
1810 		adev->gfx.config.max_backends_per_se = 2;
1811 		adev->gfx.config.max_texture_channel_caches = 8;
1812 		adev->gfx.config.max_gprs = 256;
1813 		adev->gfx.config.max_gs_threads = 32;
1814 		adev->gfx.config.max_hw_contexts = 8;
1815 
1816 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1820 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1821 		break;
1822 	case CHIP_CARRIZO:
1823 		adev->gfx.config.max_shader_engines = 1;
1824 		adev->gfx.config.max_tile_pipes = 2;
1825 		adev->gfx.config.max_sh_per_se = 1;
1826 		adev->gfx.config.max_backends_per_se = 2;
1827 
1828 		switch (adev->pdev->revision) {
1829 		case 0xc4:
1830 		case 0x84:
1831 		case 0xc8:
1832 		case 0xcc:
1833 		case 0xe1:
1834 		case 0xe3:
1835 			/* B10 */
1836 			adev->gfx.config.max_cu_per_sh = 8;
1837 			break;
1838 		case 0xc5:
1839 		case 0x81:
1840 		case 0x85:
1841 		case 0xc9:
1842 		case 0xcd:
1843 		case 0xe2:
1844 		case 0xe4:
1845 			/* B8 */
1846 			adev->gfx.config.max_cu_per_sh = 6;
1847 			break;
1848 		case 0xc6:
1849 		case 0xca:
1850 		case 0xce:
1851 		case 0x88:
1852 			/* B6 */
1853 			adev->gfx.config.max_cu_per_sh = 6;
1854 			break;
1855 		case 0xc7:
1856 		case 0x87:
1857 		case 0xcb:
1858 		case 0xe5:
1859 		case 0x89:
1860 		default:
1861 			/* B4 */
1862 			adev->gfx.config.max_cu_per_sh = 4;
1863 			break;
1864 		}
1865 
1866 		adev->gfx.config.max_texture_channel_caches = 2;
1867 		adev->gfx.config.max_gprs = 256;
1868 		adev->gfx.config.max_gs_threads = 32;
1869 		adev->gfx.config.max_hw_contexts = 8;
1870 
1871 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1875 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1876 		break;
1877 	case CHIP_STONEY:
1878 		adev->gfx.config.max_shader_engines = 1;
1879 		adev->gfx.config.max_tile_pipes = 2;
1880 		adev->gfx.config.max_sh_per_se = 1;
1881 		adev->gfx.config.max_backends_per_se = 1;
1882 
1883 		switch (adev->pdev->revision) {
1884 		case 0xc0:
1885 		case 0xc1:
1886 		case 0xc2:
1887 		case 0xc4:
1888 		case 0xc8:
1889 		case 0xc9:
1890 			adev->gfx.config.max_cu_per_sh = 3;
1891 			break;
1892 		case 0xd0:
1893 		case 0xd1:
1894 		case 0xd2:
1895 		default:
1896 			adev->gfx.config.max_cu_per_sh = 2;
1897 			break;
1898 		}
1899 
1900 		adev->gfx.config.max_texture_channel_caches = 2;
1901 		adev->gfx.config.max_gprs = 256;
1902 		adev->gfx.config.max_gs_threads = 16;
1903 		adev->gfx.config.max_hw_contexts = 8;
1904 
1905 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1908 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1909 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1910 		break;
1911 	default:
1912 		adev->gfx.config.max_shader_engines = 2;
1913 		adev->gfx.config.max_tile_pipes = 4;
1914 		adev->gfx.config.max_cu_per_sh = 2;
1915 		adev->gfx.config.max_sh_per_se = 1;
1916 		adev->gfx.config.max_backends_per_se = 2;
1917 		adev->gfx.config.max_texture_channel_caches = 4;
1918 		adev->gfx.config.max_gprs = 256;
1919 		adev->gfx.config.max_gs_threads = 32;
1920 		adev->gfx.config.max_hw_contexts = 8;
1921 
1922 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1923 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1924 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1925 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1926 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1927 		break;
1928 	}
1929 
1930 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1931 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1932 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1933 
1934 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1935 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1936 	if (adev->flags & AMD_IS_APU) {
1937 		/* Get memory bank mapping mode. */
1938 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1939 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941 
1942 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1943 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1944 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1945 
1946 		/* Validate settings in case only one DIMM installed. */
1947 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1948 			dimm00_addr_map = 0;
1949 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1950 			dimm01_addr_map = 0;
1951 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1952 			dimm10_addr_map = 0;
1953 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1954 			dimm11_addr_map = 0;
1955 
1956 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1957 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1958 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1959 			adev->gfx.config.mem_row_size_in_kb = 2;
1960 		else
1961 			adev->gfx.config.mem_row_size_in_kb = 1;
1962 	} else {
1963 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1964 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1965 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1966 			adev->gfx.config.mem_row_size_in_kb = 4;
1967 	}
1968 
1969 	adev->gfx.config.shader_engine_tile_size = 32;
1970 	adev->gfx.config.num_gpus = 1;
1971 	adev->gfx.config.multi_gpu_tile_size = 64;
1972 
1973 	/* fix up row size */
1974 	switch (adev->gfx.config.mem_row_size_in_kb) {
1975 	case 1:
1976 	default:
1977 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1978 		break;
1979 	case 2:
1980 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1981 		break;
1982 	case 4:
1983 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1984 		break;
1985 	}
1986 	adev->gfx.config.gb_addr_config = gb_addr_config;
1987 
1988 	return 0;
1989 }
1990 
1991 static int gfx_v8_0_sw_init(void *handle)
1992 {
1993 	int i, r;
1994 	struct amdgpu_ring *ring;
1995 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1996 
1997 	/* EOP Event */
1998 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1999 	if (r)
2000 		return r;
2001 
2002 	/* Privileged reg */
2003 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2004 	if (r)
2005 		return r;
2006 
2007 	/* Privileged inst */
2008 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2009 	if (r)
2010 		return r;
2011 
2012 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2013 
2014 	gfx_v8_0_scratch_init(adev);
2015 
2016 	r = gfx_v8_0_init_microcode(adev);
2017 	if (r) {
2018 		DRM_ERROR("Failed to load gfx firmware!\n");
2019 		return r;
2020 	}
2021 
2022 	r = gfx_v8_0_rlc_init(adev);
2023 	if (r) {
2024 		DRM_ERROR("Failed to init rlc BOs!\n");
2025 		return r;
2026 	}
2027 
2028 	r = gfx_v8_0_mec_init(adev);
2029 	if (r) {
2030 		DRM_ERROR("Failed to init MEC BOs!\n");
2031 		return r;
2032 	}
2033 
2034 	/* set up the gfx ring */
2035 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2036 		ring = &adev->gfx.gfx_ring[i];
2037 		ring->ring_obj = NULL;
2038 		sprintf(ring->name, "gfx");
2039 		/* no gfx doorbells on iceland */
2040 		if (adev->asic_type != CHIP_TOPAZ) {
2041 			ring->use_doorbell = true;
2042 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2043 		}
2044 
2045 		r = amdgpu_ring_init(adev, ring, 1024,
2046 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2047 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2048 				     AMDGPU_RING_TYPE_GFX);
2049 		if (r)
2050 			return r;
2051 	}
2052 
2053 	/* set up the compute queues */
2054 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2055 		unsigned irq_type;
2056 
2057 		/* max 32 queues per MEC */
2058 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2059 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2060 			break;
2061 		}
2062 		ring = &adev->gfx.compute_ring[i];
2063 		ring->ring_obj = NULL;
2064 		ring->use_doorbell = true;
2065 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2066 		ring->me = 1; /* first MEC */
2067 		ring->pipe = i / 8;
2068 		ring->queue = i % 8;
2069 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2070 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2071 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2072 		r = amdgpu_ring_init(adev, ring, 1024,
2073 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2074 				     &adev->gfx.eop_irq, irq_type,
2075 				     AMDGPU_RING_TYPE_COMPUTE);
2076 		if (r)
2077 			return r;
2078 	}
2079 
2080 	/* reserve GDS, GWS and OA resource for gfx */
2081 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2082 			PAGE_SIZE, true,
2083 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2084 			NULL, &adev->gds.gds_gfx_bo);
2085 	if (r)
2086 		return r;
2087 
2088 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2089 		PAGE_SIZE, true,
2090 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2091 		NULL, &adev->gds.gws_gfx_bo);
2092 	if (r)
2093 		return r;
2094 
2095 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2096 			PAGE_SIZE, true,
2097 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2098 			NULL, &adev->gds.oa_gfx_bo);
2099 	if (r)
2100 		return r;
2101 
2102 	adev->gfx.ce_ram_size = 0x8000;
2103 
2104 	r = gfx_v8_0_gpu_early_init(adev);
2105 	if (r)
2106 		return r;
2107 
2108 	return 0;
2109 }
2110 
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113 	int i;
2114 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115 
2116 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2117 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2118 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2119 
2120 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124 
2125 	gfx_v8_0_mec_fini(adev);
2126 
2127 	gfx_v8_0_rlc_fini(adev);
2128 
2129 	gfx_v8_0_free_microcode(adev);
2130 
2131 	return 0;
2132 }
2133 
2134 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2135 {
2136 	uint32_t *modearray, *mod2array;
2137 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2138 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2139 	u32 reg_offset;
2140 
2141 	modearray = adev->gfx.config.tile_mode_array;
2142 	mod2array = adev->gfx.config.macrotile_mode_array;
2143 
2144 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2145 		modearray[reg_offset] = 0;
2146 
2147 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2148 		mod2array[reg_offset] = 0;
2149 
2150 	switch (adev->asic_type) {
2151 	case CHIP_TOPAZ:
2152 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 				PIPE_CONFIG(ADDR_SURF_P2) |
2154 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2155 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 				PIPE_CONFIG(ADDR_SURF_P2) |
2158 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2159 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161 				PIPE_CONFIG(ADDR_SURF_P2) |
2162 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2163 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165 				PIPE_CONFIG(ADDR_SURF_P2) |
2166 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2167 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 				PIPE_CONFIG(ADDR_SURF_P2) |
2170 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 				PIPE_CONFIG(ADDR_SURF_P2) |
2174 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177 				PIPE_CONFIG(ADDR_SURF_P2) |
2178 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2179 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2181 				PIPE_CONFIG(ADDR_SURF_P2));
2182 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2183 				PIPE_CONFIG(ADDR_SURF_P2) |
2184 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187 				 PIPE_CONFIG(ADDR_SURF_P2) |
2188 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191 				 PIPE_CONFIG(ADDR_SURF_P2) |
2192 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2193 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195 				 PIPE_CONFIG(ADDR_SURF_P2) |
2196 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2199 				 PIPE_CONFIG(ADDR_SURF_P2) |
2200 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2203 				 PIPE_CONFIG(ADDR_SURF_P2) |
2204 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2207 				 PIPE_CONFIG(ADDR_SURF_P2) |
2208 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2210 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211 				 PIPE_CONFIG(ADDR_SURF_P2) |
2212 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2213 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2215 				 PIPE_CONFIG(ADDR_SURF_P2) |
2216 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2219 				 PIPE_CONFIG(ADDR_SURF_P2) |
2220 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2223 				 PIPE_CONFIG(ADDR_SURF_P2) |
2224 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2227 				 PIPE_CONFIG(ADDR_SURF_P2) |
2228 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2231 				 PIPE_CONFIG(ADDR_SURF_P2) |
2232 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2235 				 PIPE_CONFIG(ADDR_SURF_P2) |
2236 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2239 				 PIPE_CONFIG(ADDR_SURF_P2) |
2240 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2243 				 PIPE_CONFIG(ADDR_SURF_P2) |
2244 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247 				 PIPE_CONFIG(ADDR_SURF_P2) |
2248 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2250 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251 				 PIPE_CONFIG(ADDR_SURF_P2) |
2252 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2253 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254 
2255 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 				NUM_BANKS(ADDR_SURF_8_BANK));
2259 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262 				NUM_BANKS(ADDR_SURF_8_BANK));
2263 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266 				NUM_BANKS(ADDR_SURF_8_BANK));
2267 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270 				NUM_BANKS(ADDR_SURF_8_BANK));
2271 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2273 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274 				NUM_BANKS(ADDR_SURF_8_BANK));
2275 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278 				NUM_BANKS(ADDR_SURF_8_BANK));
2279 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282 				NUM_BANKS(ADDR_SURF_8_BANK));
2283 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2285 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 				NUM_BANKS(ADDR_SURF_16_BANK));
2287 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2288 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290 				NUM_BANKS(ADDR_SURF_16_BANK));
2291 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 				 NUM_BANKS(ADDR_SURF_16_BANK));
2295 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2296 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298 				 NUM_BANKS(ADDR_SURF_16_BANK));
2299 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2301 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302 				 NUM_BANKS(ADDR_SURF_16_BANK));
2303 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 				 NUM_BANKS(ADDR_SURF_16_BANK));
2307 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310 				 NUM_BANKS(ADDR_SURF_8_BANK));
2311 
2312 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2313 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2314 			    reg_offset != 23)
2315 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2316 
2317 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2318 			if (reg_offset != 7)
2319 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2320 
2321 		break;
2322 	case CHIP_FIJI:
2323 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2330 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2334 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2338 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2344 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2354 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2355 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2356 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2357 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2382 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2391 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2398 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2406 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2414 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2418 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2422 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2426 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2429 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2443 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445 
2446 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 				NUM_BANKS(ADDR_SURF_8_BANK));
2450 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 				NUM_BANKS(ADDR_SURF_8_BANK));
2454 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457 				NUM_BANKS(ADDR_SURF_8_BANK));
2458 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 				NUM_BANKS(ADDR_SURF_8_BANK));
2462 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465 				NUM_BANKS(ADDR_SURF_8_BANK));
2466 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 				NUM_BANKS(ADDR_SURF_8_BANK));
2470 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 				NUM_BANKS(ADDR_SURF_8_BANK));
2474 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2476 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 				NUM_BANKS(ADDR_SURF_8_BANK));
2478 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 				NUM_BANKS(ADDR_SURF_8_BANK));
2482 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 				 NUM_BANKS(ADDR_SURF_8_BANK));
2486 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 				 NUM_BANKS(ADDR_SURF_8_BANK));
2490 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493 				 NUM_BANKS(ADDR_SURF_8_BANK));
2494 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 				 NUM_BANKS(ADDR_SURF_8_BANK));
2498 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501 				 NUM_BANKS(ADDR_SURF_4_BANK));
2502 
2503 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2504 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2505 
2506 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2507 			if (reg_offset != 7)
2508 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2509 
2510 		break;
2511 	case CHIP_TONGA:
2512 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2515 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2523 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2527 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2543 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2571 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2587 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2595 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2603 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2607 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2609 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2611 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2615 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2617 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2619 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2632 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634 
2635 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 				NUM_BANKS(ADDR_SURF_16_BANK));
2639 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 				NUM_BANKS(ADDR_SURF_16_BANK));
2643 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646 				NUM_BANKS(ADDR_SURF_16_BANK));
2647 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 				NUM_BANKS(ADDR_SURF_16_BANK));
2651 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2653 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2654 				NUM_BANKS(ADDR_SURF_16_BANK));
2655 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658 				NUM_BANKS(ADDR_SURF_16_BANK));
2659 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662 				NUM_BANKS(ADDR_SURF_16_BANK));
2663 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2665 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666 				NUM_BANKS(ADDR_SURF_16_BANK));
2667 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670 				NUM_BANKS(ADDR_SURF_16_BANK));
2671 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2673 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674 				 NUM_BANKS(ADDR_SURF_16_BANK));
2675 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2678 				 NUM_BANKS(ADDR_SURF_16_BANK));
2679 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682 				 NUM_BANKS(ADDR_SURF_8_BANK));
2683 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686 				 NUM_BANKS(ADDR_SURF_4_BANK));
2687 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2689 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2690 				 NUM_BANKS(ADDR_SURF_4_BANK));
2691 
2692 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2693 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2694 
2695 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2696 			if (reg_offset != 7)
2697 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2698 
2699 		break;
2700 	case CHIP_POLARIS11:
2701 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2704 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2708 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2712 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2716 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2720 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2722 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2726 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2734 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2735 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2738 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2744 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2747 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2752 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2760 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2764 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2767 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2772 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2784 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2788 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2796 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2800 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2802 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2804 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2810 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2819 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823 
2824 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827 				NUM_BANKS(ADDR_SURF_16_BANK));
2828 
2829 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832 				NUM_BANKS(ADDR_SURF_16_BANK));
2833 
2834 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2836 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837 				NUM_BANKS(ADDR_SURF_16_BANK));
2838 
2839 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842 				NUM_BANKS(ADDR_SURF_16_BANK));
2843 
2844 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 				NUM_BANKS(ADDR_SURF_16_BANK));
2848 
2849 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 				NUM_BANKS(ADDR_SURF_16_BANK));
2853 
2854 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2857 				NUM_BANKS(ADDR_SURF_16_BANK));
2858 
2859 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2861 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862 				NUM_BANKS(ADDR_SURF_16_BANK));
2863 
2864 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 				NUM_BANKS(ADDR_SURF_16_BANK));
2868 
2869 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2871 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 				NUM_BANKS(ADDR_SURF_16_BANK));
2873 
2874 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2876 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2877 				NUM_BANKS(ADDR_SURF_16_BANK));
2878 
2879 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2882 				NUM_BANKS(ADDR_SURF_16_BANK));
2883 
2884 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2887 				NUM_BANKS(ADDR_SURF_8_BANK));
2888 
2889 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2892 				NUM_BANKS(ADDR_SURF_4_BANK));
2893 
2894 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2895 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2896 
2897 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2898 			if (reg_offset != 7)
2899 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2900 
2901 		break;
2902 	case CHIP_POLARIS10:
2903 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2906 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2910 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2918 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2922 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2936 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2937 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2949 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2951 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2962 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2966 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2969 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2971 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2974 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2986 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2990 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2995 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2998 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3000 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3002 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3004 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3006 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3012 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3021 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3023 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025 
3026 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029 				NUM_BANKS(ADDR_SURF_16_BANK));
3030 
3031 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3033 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 				NUM_BANKS(ADDR_SURF_16_BANK));
3035 
3036 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 				NUM_BANKS(ADDR_SURF_16_BANK));
3040 
3041 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3043 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3044 				NUM_BANKS(ADDR_SURF_16_BANK));
3045 
3046 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3048 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049 				NUM_BANKS(ADDR_SURF_16_BANK));
3050 
3051 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054 				NUM_BANKS(ADDR_SURF_16_BANK));
3055 
3056 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3058 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3059 				NUM_BANKS(ADDR_SURF_16_BANK));
3060 
3061 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3063 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 				NUM_BANKS(ADDR_SURF_16_BANK));
3065 
3066 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3068 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069 				NUM_BANKS(ADDR_SURF_16_BANK));
3070 
3071 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3073 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3074 				NUM_BANKS(ADDR_SURF_16_BANK));
3075 
3076 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3079 				NUM_BANKS(ADDR_SURF_16_BANK));
3080 
3081 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3084 				NUM_BANKS(ADDR_SURF_8_BANK));
3085 
3086 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3089 				NUM_BANKS(ADDR_SURF_4_BANK));
3090 
3091 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3092 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3093 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3094 				NUM_BANKS(ADDR_SURF_4_BANK));
3095 
3096 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3097 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3098 
3099 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3100 			if (reg_offset != 7)
3101 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3102 
3103 		break;
3104 	case CHIP_STONEY:
3105 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106 				PIPE_CONFIG(ADDR_SURF_P2) |
3107 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3108 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 				PIPE_CONFIG(ADDR_SURF_P2) |
3111 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3112 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114 				PIPE_CONFIG(ADDR_SURF_P2) |
3115 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3116 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118 				PIPE_CONFIG(ADDR_SURF_P2) |
3119 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3120 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 				PIPE_CONFIG(ADDR_SURF_P2) |
3123 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3124 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3126 				PIPE_CONFIG(ADDR_SURF_P2) |
3127 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130 				PIPE_CONFIG(ADDR_SURF_P2) |
3131 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3134 				PIPE_CONFIG(ADDR_SURF_P2));
3135 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3136 				PIPE_CONFIG(ADDR_SURF_P2) |
3137 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3138 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 				 PIPE_CONFIG(ADDR_SURF_P2) |
3141 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144 				 PIPE_CONFIG(ADDR_SURF_P2) |
3145 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 				 PIPE_CONFIG(ADDR_SURF_P2) |
3149 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3152 				 PIPE_CONFIG(ADDR_SURF_P2) |
3153 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3156 				 PIPE_CONFIG(ADDR_SURF_P2) |
3157 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3160 				 PIPE_CONFIG(ADDR_SURF_P2) |
3161 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3163 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3164 				 PIPE_CONFIG(ADDR_SURF_P2) |
3165 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168 				 PIPE_CONFIG(ADDR_SURF_P2) |
3169 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3170 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3172 				 PIPE_CONFIG(ADDR_SURF_P2) |
3173 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3176 				 PIPE_CONFIG(ADDR_SURF_P2) |
3177 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3180 				 PIPE_CONFIG(ADDR_SURF_P2) |
3181 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3184 				 PIPE_CONFIG(ADDR_SURF_P2) |
3185 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3186 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3188 				 PIPE_CONFIG(ADDR_SURF_P2) |
3189 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3190 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3192 				 PIPE_CONFIG(ADDR_SURF_P2) |
3193 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3196 				 PIPE_CONFIG(ADDR_SURF_P2) |
3197 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3198 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 				 PIPE_CONFIG(ADDR_SURF_P2) |
3201 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3204 				 PIPE_CONFIG(ADDR_SURF_P2) |
3205 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3207 
3208 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211 				NUM_BANKS(ADDR_SURF_8_BANK));
3212 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3214 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215 				NUM_BANKS(ADDR_SURF_8_BANK));
3216 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219 				NUM_BANKS(ADDR_SURF_8_BANK));
3220 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223 				NUM_BANKS(ADDR_SURF_8_BANK));
3224 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227 				NUM_BANKS(ADDR_SURF_8_BANK));
3228 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231 				NUM_BANKS(ADDR_SURF_8_BANK));
3232 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235 				NUM_BANKS(ADDR_SURF_8_BANK));
3236 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3237 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3238 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239 				NUM_BANKS(ADDR_SURF_16_BANK));
3240 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3242 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243 				NUM_BANKS(ADDR_SURF_16_BANK));
3244 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3245 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 				 NUM_BANKS(ADDR_SURF_16_BANK));
3248 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3250 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251 				 NUM_BANKS(ADDR_SURF_16_BANK));
3252 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255 				 NUM_BANKS(ADDR_SURF_16_BANK));
3256 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 				 NUM_BANKS(ADDR_SURF_16_BANK));
3260 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3263 				 NUM_BANKS(ADDR_SURF_8_BANK));
3264 
3265 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3266 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3267 			    reg_offset != 23)
3268 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3269 
3270 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3271 			if (reg_offset != 7)
3272 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3273 
3274 		break;
3275 	default:
3276 		dev_warn(adev->dev,
3277 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3278 			 adev->asic_type);
3279 
3280 	case CHIP_CARRIZO:
3281 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282 				PIPE_CONFIG(ADDR_SURF_P2) |
3283 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3284 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286 				PIPE_CONFIG(ADDR_SURF_P2) |
3287 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3288 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290 				PIPE_CONFIG(ADDR_SURF_P2) |
3291 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3292 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294 				PIPE_CONFIG(ADDR_SURF_P2) |
3295 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3296 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298 				PIPE_CONFIG(ADDR_SURF_P2) |
3299 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3300 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3302 				PIPE_CONFIG(ADDR_SURF_P2) |
3303 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3306 				PIPE_CONFIG(ADDR_SURF_P2) |
3307 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3310 				PIPE_CONFIG(ADDR_SURF_P2));
3311 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3312 				PIPE_CONFIG(ADDR_SURF_P2) |
3313 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3314 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316 				 PIPE_CONFIG(ADDR_SURF_P2) |
3317 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320 				 PIPE_CONFIG(ADDR_SURF_P2) |
3321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3324 				 PIPE_CONFIG(ADDR_SURF_P2) |
3325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3328 				 PIPE_CONFIG(ADDR_SURF_P2) |
3329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3332 				 PIPE_CONFIG(ADDR_SURF_P2) |
3333 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3336 				 PIPE_CONFIG(ADDR_SURF_P2) |
3337 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3339 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3340 				 PIPE_CONFIG(ADDR_SURF_P2) |
3341 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344 				 PIPE_CONFIG(ADDR_SURF_P2) |
3345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3348 				 PIPE_CONFIG(ADDR_SURF_P2) |
3349 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3352 				 PIPE_CONFIG(ADDR_SURF_P2) |
3353 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3356 				 PIPE_CONFIG(ADDR_SURF_P2) |
3357 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3360 				 PIPE_CONFIG(ADDR_SURF_P2) |
3361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3364 				 PIPE_CONFIG(ADDR_SURF_P2) |
3365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3368 				 PIPE_CONFIG(ADDR_SURF_P2) |
3369 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3372 				 PIPE_CONFIG(ADDR_SURF_P2) |
3373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376 				 PIPE_CONFIG(ADDR_SURF_P2) |
3377 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3380 				 PIPE_CONFIG(ADDR_SURF_P2) |
3381 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3383 
3384 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3386 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387 				NUM_BANKS(ADDR_SURF_8_BANK));
3388 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3390 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391 				NUM_BANKS(ADDR_SURF_8_BANK));
3392 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395 				NUM_BANKS(ADDR_SURF_8_BANK));
3396 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399 				NUM_BANKS(ADDR_SURF_8_BANK));
3400 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403 				NUM_BANKS(ADDR_SURF_8_BANK));
3404 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407 				NUM_BANKS(ADDR_SURF_8_BANK));
3408 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411 				NUM_BANKS(ADDR_SURF_8_BANK));
3412 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3413 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3414 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415 				NUM_BANKS(ADDR_SURF_16_BANK));
3416 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3418 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419 				NUM_BANKS(ADDR_SURF_16_BANK));
3420 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3421 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423 				 NUM_BANKS(ADDR_SURF_16_BANK));
3424 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3426 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427 				 NUM_BANKS(ADDR_SURF_16_BANK));
3428 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3429 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431 				 NUM_BANKS(ADDR_SURF_16_BANK));
3432 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3434 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435 				 NUM_BANKS(ADDR_SURF_16_BANK));
3436 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3439 				 NUM_BANKS(ADDR_SURF_8_BANK));
3440 
3441 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3442 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3443 			    reg_offset != 23)
3444 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3445 
3446 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3447 			if (reg_offset != 7)
3448 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3449 
3450 		break;
3451 	}
3452 }
3453 
3454 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3455 				  u32 se_num, u32 sh_num, u32 instance)
3456 {
3457 	u32 data;
3458 
3459 	if (instance == 0xffffffff)
3460 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3461 	else
3462 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3463 
3464 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3465 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3466 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3467 	} else if (se_num == 0xffffffff) {
3468 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3469 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3470 	} else if (sh_num == 0xffffffff) {
3471 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3472 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3473 	} else {
3474 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3475 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3476 	}
3477 	WREG32(mmGRBM_GFX_INDEX, data);
3478 }
3479 
3480 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3481 {
3482 	return (u32)((1ULL << bit_width) - 1);
3483 }
3484 
3485 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3486 {
3487 	u32 data, mask;
3488 
3489 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
3490 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3491 
3492 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3493 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3494 
3495 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3496 				       adev->gfx.config.max_sh_per_se);
3497 
3498 	return (~data) & mask;
3499 }
3500 
3501 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3502 {
3503 	int i, j;
3504 	u32 data;
3505 	u32 active_rbs = 0;
3506 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3507 					adev->gfx.config.max_sh_per_se;
3508 
3509 	mutex_lock(&adev->grbm_idx_mutex);
3510 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3511 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3512 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3513 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3514 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3515 					       rb_bitmap_width_per_sh);
3516 		}
3517 	}
3518 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3519 	mutex_unlock(&adev->grbm_idx_mutex);
3520 
3521 	adev->gfx.config.backend_enable_mask = active_rbs;
3522 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3523 }
3524 
3525 /**
3526  * gfx_v8_0_init_compute_vmid - gart enable
3527  *
3528  * @rdev: amdgpu_device pointer
3529  *
3530  * Initialize compute vmid sh_mem registers
3531  *
3532  */
3533 #define DEFAULT_SH_MEM_BASES	(0x6000)
3534 #define FIRST_COMPUTE_VMID	(8)
3535 #define LAST_COMPUTE_VMID	(16)
3536 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3537 {
3538 	int i;
3539 	uint32_t sh_mem_config;
3540 	uint32_t sh_mem_bases;
3541 
3542 	/*
3543 	 * Configure apertures:
3544 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3545 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3546 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3547 	 */
3548 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3549 
3550 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3551 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3552 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3553 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3554 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3555 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3556 
3557 	mutex_lock(&adev->srbm_mutex);
3558 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3559 		vi_srbm_select(adev, 0, 0, 0, i);
3560 		/* CP and shaders */
3561 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3562 		WREG32(mmSH_MEM_APE1_BASE, 1);
3563 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3564 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3565 	}
3566 	vi_srbm_select(adev, 0, 0, 0, 0);
3567 	mutex_unlock(&adev->srbm_mutex);
3568 }
3569 
3570 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3571 {
3572 	u32 tmp;
3573 	int i;
3574 
3575 	tmp = RREG32(mmGRBM_CNTL);
3576 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3577 	WREG32(mmGRBM_CNTL, tmp);
3578 
3579 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3580 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3581 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3582 
3583 	gfx_v8_0_tiling_mode_table_init(adev);
3584 
3585 	gfx_v8_0_setup_rb(adev);
3586 	gfx_v8_0_get_cu_info(adev);
3587 
3588 	/* XXX SH_MEM regs */
3589 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3590 	mutex_lock(&adev->srbm_mutex);
3591 	for (i = 0; i < 16; i++) {
3592 		vi_srbm_select(adev, 0, 0, 0, i);
3593 		/* CP and shaders */
3594 		if (i == 0) {
3595 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3596 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3597 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3598 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3599 			WREG32(mmSH_MEM_CONFIG, tmp);
3600 		} else {
3601 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3602 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3603 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3604 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3605 			WREG32(mmSH_MEM_CONFIG, tmp);
3606 		}
3607 
3608 		WREG32(mmSH_MEM_APE1_BASE, 1);
3609 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3610 		WREG32(mmSH_MEM_BASES, 0);
3611 	}
3612 	vi_srbm_select(adev, 0, 0, 0, 0);
3613 	mutex_unlock(&adev->srbm_mutex);
3614 
3615 	gfx_v8_0_init_compute_vmid(adev);
3616 
3617 	mutex_lock(&adev->grbm_idx_mutex);
3618 	/*
3619 	 * making sure that the following register writes will be broadcasted
3620 	 * to all the shaders
3621 	 */
3622 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3623 
3624 	WREG32(mmPA_SC_FIFO_SIZE,
3625 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3626 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3627 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3628 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3629 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3630 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3631 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3632 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3633 	mutex_unlock(&adev->grbm_idx_mutex);
3634 
3635 }
3636 
3637 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3638 {
3639 	u32 i, j, k;
3640 	u32 mask;
3641 
3642 	mutex_lock(&adev->grbm_idx_mutex);
3643 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3644 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3645 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3646 			for (k = 0; k < adev->usec_timeout; k++) {
3647 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3648 					break;
3649 				udelay(1);
3650 			}
3651 		}
3652 	}
3653 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3654 	mutex_unlock(&adev->grbm_idx_mutex);
3655 
3656 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3657 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3658 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3659 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3660 	for (k = 0; k < adev->usec_timeout; k++) {
3661 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3662 			break;
3663 		udelay(1);
3664 	}
3665 }
3666 
3667 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3668 					       bool enable)
3669 {
3670 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3671 
3672 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3673 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3674 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3675 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3676 
3677 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3678 }
3679 
3680 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3681 {
3682 	/* csib */
3683 	WREG32(mmRLC_CSIB_ADDR_HI,
3684 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3685 	WREG32(mmRLC_CSIB_ADDR_LO,
3686 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3687 	WREG32(mmRLC_CSIB_LENGTH,
3688 			adev->gfx.rlc.clear_state_size);
3689 }
3690 
3691 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3692 				int ind_offset,
3693 				int list_size,
3694 				int *unique_indices,
3695 				int *indices_count,
3696 				int max_indices,
3697 				int *ind_start_offsets,
3698 				int *offset_count,
3699 				int max_offset)
3700 {
3701 	int indices;
3702 	bool new_entry = true;
3703 
3704 	for (; ind_offset < list_size; ind_offset++) {
3705 
3706 		if (new_entry) {
3707 			new_entry = false;
3708 			ind_start_offsets[*offset_count] = ind_offset;
3709 			*offset_count = *offset_count + 1;
3710 			BUG_ON(*offset_count >= max_offset);
3711 		}
3712 
3713 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3714 			new_entry = true;
3715 			continue;
3716 		}
3717 
3718 		ind_offset += 2;
3719 
3720 		/* look for the matching indice */
3721 		for (indices = 0;
3722 			indices < *indices_count;
3723 			indices++) {
3724 			if (unique_indices[indices] ==
3725 				register_list_format[ind_offset])
3726 				break;
3727 		}
3728 
3729 		if (indices >= *indices_count) {
3730 			unique_indices[*indices_count] =
3731 				register_list_format[ind_offset];
3732 			indices = *indices_count;
3733 			*indices_count = *indices_count + 1;
3734 			BUG_ON(*indices_count >= max_indices);
3735 		}
3736 
3737 		register_list_format[ind_offset] = indices;
3738 	}
3739 }
3740 
3741 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3742 {
3743 	int i, temp, data;
3744 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3745 	int indices_count = 0;
3746 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3747 	int offset_count = 0;
3748 
3749 	int list_size;
3750 	unsigned int *register_list_format =
3751 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3752 	if (register_list_format == NULL)
3753 		return -ENOMEM;
3754 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3755 			adev->gfx.rlc.reg_list_format_size_bytes);
3756 
3757 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3758 				RLC_FormatDirectRegListLength,
3759 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3760 				unique_indices,
3761 				&indices_count,
3762 				sizeof(unique_indices) / sizeof(int),
3763 				indirect_start_offsets,
3764 				&offset_count,
3765 				sizeof(indirect_start_offsets)/sizeof(int));
3766 
3767 	/* save and restore list */
3768 	temp = RREG32(mmRLC_SRM_CNTL);
3769 	temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3770 	WREG32(mmRLC_SRM_CNTL, temp);
3771 
3772 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3773 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3774 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3775 
3776 	/* indirect list */
3777 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3778 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3779 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3780 
3781 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3782 	list_size = list_size >> 1;
3783 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3784 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3785 
3786 	/* starting offsets starts */
3787 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3788 		adev->gfx.rlc.starting_offsets_start);
3789 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3790 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3791 				indirect_start_offsets[i]);
3792 
3793 	/* unique indices */
3794 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3795 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3796 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3797 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3798 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3799 	}
3800 	kfree(register_list_format);
3801 
3802 	return 0;
3803 }
3804 
3805 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3806 {
3807 	uint32_t data;
3808 
3809 	data = RREG32(mmRLC_SRM_CNTL);
3810 	data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3811 	WREG32(mmRLC_SRM_CNTL, data);
3812 }
3813 
3814 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3815 {
3816 	uint32_t data;
3817 
3818 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3819 			      AMD_PG_SUPPORT_GFX_SMG |
3820 			      AMD_PG_SUPPORT_GFX_DMG)) {
3821 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3822 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3823 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3824 		WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3825 
3826 		data = 0;
3827 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3828 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3829 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3830 		data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3831 		WREG32(mmRLC_PG_DELAY, data);
3832 
3833 		data = RREG32(mmRLC_PG_DELAY_2);
3834 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3835 		data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3836 		WREG32(mmRLC_PG_DELAY_2, data);
3837 
3838 		data = RREG32(mmRLC_AUTO_PG_CTRL);
3839 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3840 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3841 		WREG32(mmRLC_AUTO_PG_CTRL, data);
3842 	}
3843 }
3844 
3845 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3846 						bool enable)
3847 {
3848 	u32 data, orig;
3849 
3850 	orig = data = RREG32(mmRLC_PG_CNTL);
3851 
3852 	if (enable)
3853 		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3854 	else
3855 		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3856 
3857 	if (orig != data)
3858 		WREG32(mmRLC_PG_CNTL, data);
3859 }
3860 
3861 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3862 						  bool enable)
3863 {
3864 	u32 data, orig;
3865 
3866 	orig = data = RREG32(mmRLC_PG_CNTL);
3867 
3868 	if (enable)
3869 		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3870 	else
3871 		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3872 
3873 	if (orig != data)
3874 		WREG32(mmRLC_PG_CNTL, data);
3875 }
3876 
3877 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3878 {
3879 	u32 data, orig;
3880 
3881 	orig = data = RREG32(mmRLC_PG_CNTL);
3882 
3883 	if (enable)
3884 		data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3885 	else
3886 		data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3887 
3888 	if (orig != data)
3889 		WREG32(mmRLC_PG_CNTL, data);
3890 }
3891 
3892 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3893 {
3894 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3895 			      AMD_PG_SUPPORT_GFX_SMG |
3896 			      AMD_PG_SUPPORT_GFX_DMG |
3897 			      AMD_PG_SUPPORT_CP |
3898 			      AMD_PG_SUPPORT_GDS |
3899 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3900 		gfx_v8_0_init_csb(adev);
3901 		gfx_v8_0_init_save_restore_list(adev);
3902 		gfx_v8_0_enable_save_restore_machine(adev);
3903 
3904 		if ((adev->asic_type == CHIP_CARRIZO) ||
3905 		    (adev->asic_type == CHIP_STONEY)) {
3906 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3907 			gfx_v8_0_init_power_gating(adev);
3908 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3909 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3910 				cz_enable_sck_slow_down_on_power_up(adev, true);
3911 				cz_enable_sck_slow_down_on_power_down(adev, true);
3912 			} else {
3913 				cz_enable_sck_slow_down_on_power_up(adev, false);
3914 				cz_enable_sck_slow_down_on_power_down(adev, false);
3915 			}
3916 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3917 				cz_enable_cp_power_gating(adev, true);
3918 			else
3919 				cz_enable_cp_power_gating(adev, false);
3920 		} else if (adev->asic_type == CHIP_POLARIS11) {
3921 			gfx_v8_0_init_power_gating(adev);
3922 		}
3923 	}
3924 }
3925 
3926 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3927 {
3928 	u32 tmp = RREG32(mmRLC_CNTL);
3929 
3930 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3931 	WREG32(mmRLC_CNTL, tmp);
3932 
3933 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3934 
3935 	gfx_v8_0_wait_for_rlc_serdes(adev);
3936 }
3937 
3938 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3939 {
3940 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3941 
3942 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3943 	WREG32(mmGRBM_SOFT_RESET, tmp);
3944 	udelay(50);
3945 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3946 	WREG32(mmGRBM_SOFT_RESET, tmp);
3947 	udelay(50);
3948 }
3949 
3950 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3951 {
3952 	u32 tmp = RREG32(mmRLC_CNTL);
3953 
3954 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3955 	WREG32(mmRLC_CNTL, tmp);
3956 
3957 	/* carrizo do enable cp interrupt after cp inited */
3958 	if (!(adev->flags & AMD_IS_APU))
3959 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3960 
3961 	udelay(50);
3962 }
3963 
3964 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3965 {
3966 	const struct rlc_firmware_header_v2_0 *hdr;
3967 	const __le32 *fw_data;
3968 	unsigned i, fw_size;
3969 
3970 	if (!adev->gfx.rlc_fw)
3971 		return -EINVAL;
3972 
3973 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3974 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3975 
3976 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3977 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3978 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3979 
3980 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3981 	for (i = 0; i < fw_size; i++)
3982 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3983 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3984 
3985 	return 0;
3986 }
3987 
3988 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3989 {
3990 	int r;
3991 
3992 	gfx_v8_0_rlc_stop(adev);
3993 
3994 	/* disable CG */
3995 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3996 	if (adev->asic_type == CHIP_POLARIS11 ||
3997 		adev->asic_type == CHIP_POLARIS10)
3998 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3999 
4000 	/* disable PG */
4001 	WREG32(mmRLC_PG_CNTL, 0);
4002 
4003 	gfx_v8_0_rlc_reset(adev);
4004 
4005 	gfx_v8_0_init_pg(adev);
4006 
4007 	if (!adev->pp_enabled) {
4008 		if (!adev->firmware.smu_load) {
4009 			/* legacy rlc firmware loading */
4010 			r = gfx_v8_0_rlc_load_microcode(adev);
4011 			if (r)
4012 				return r;
4013 		} else {
4014 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4015 							AMDGPU_UCODE_ID_RLC_G);
4016 			if (r)
4017 				return -EINVAL;
4018 		}
4019 	}
4020 
4021 	gfx_v8_0_rlc_start(adev);
4022 
4023 	return 0;
4024 }
4025 
4026 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4027 {
4028 	int i;
4029 	u32 tmp = RREG32(mmCP_ME_CNTL);
4030 
4031 	if (enable) {
4032 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4033 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4034 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4035 	} else {
4036 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4037 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4038 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4039 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4040 			adev->gfx.gfx_ring[i].ready = false;
4041 	}
4042 	WREG32(mmCP_ME_CNTL, tmp);
4043 	udelay(50);
4044 }
4045 
4046 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4047 {
4048 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4049 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4050 	const struct gfx_firmware_header_v1_0 *me_hdr;
4051 	const __le32 *fw_data;
4052 	unsigned i, fw_size;
4053 
4054 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4055 		return -EINVAL;
4056 
4057 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4058 		adev->gfx.pfp_fw->data;
4059 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4060 		adev->gfx.ce_fw->data;
4061 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4062 		adev->gfx.me_fw->data;
4063 
4064 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4065 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4066 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4067 
4068 	gfx_v8_0_cp_gfx_enable(adev, false);
4069 
4070 	/* PFP */
4071 	fw_data = (const __le32 *)
4072 		(adev->gfx.pfp_fw->data +
4073 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4074 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4075 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4076 	for (i = 0; i < fw_size; i++)
4077 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4078 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4079 
4080 	/* CE */
4081 	fw_data = (const __le32 *)
4082 		(adev->gfx.ce_fw->data +
4083 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4084 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4085 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4086 	for (i = 0; i < fw_size; i++)
4087 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4088 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4089 
4090 	/* ME */
4091 	fw_data = (const __le32 *)
4092 		(adev->gfx.me_fw->data +
4093 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4094 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4095 	WREG32(mmCP_ME_RAM_WADDR, 0);
4096 	for (i = 0; i < fw_size; i++)
4097 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4098 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4099 
4100 	return 0;
4101 }
4102 
4103 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4104 {
4105 	u32 count = 0;
4106 	const struct cs_section_def *sect = NULL;
4107 	const struct cs_extent_def *ext = NULL;
4108 
4109 	/* begin clear state */
4110 	count += 2;
4111 	/* context control state */
4112 	count += 3;
4113 
4114 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4115 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4116 			if (sect->id == SECT_CONTEXT)
4117 				count += 2 + ext->reg_count;
4118 			else
4119 				return 0;
4120 		}
4121 	}
4122 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4123 	count += 4;
4124 	/* end clear state */
4125 	count += 2;
4126 	/* clear state */
4127 	count += 2;
4128 
4129 	return count;
4130 }
4131 
4132 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4133 {
4134 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4135 	const struct cs_section_def *sect = NULL;
4136 	const struct cs_extent_def *ext = NULL;
4137 	int r, i;
4138 
4139 	/* init the CP */
4140 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4141 	WREG32(mmCP_ENDIAN_SWAP, 0);
4142 	WREG32(mmCP_DEVICE_ID, 1);
4143 
4144 	gfx_v8_0_cp_gfx_enable(adev, true);
4145 
4146 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4147 	if (r) {
4148 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4149 		return r;
4150 	}
4151 
4152 	/* clear state buffer */
4153 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4154 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4155 
4156 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4157 	amdgpu_ring_write(ring, 0x80000000);
4158 	amdgpu_ring_write(ring, 0x80000000);
4159 
4160 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4161 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4162 			if (sect->id == SECT_CONTEXT) {
4163 				amdgpu_ring_write(ring,
4164 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4165 					       ext->reg_count));
4166 				amdgpu_ring_write(ring,
4167 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4168 				for (i = 0; i < ext->reg_count; i++)
4169 					amdgpu_ring_write(ring, ext->extent[i]);
4170 			}
4171 		}
4172 	}
4173 
4174 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4175 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4176 	switch (adev->asic_type) {
4177 	case CHIP_TONGA:
4178 	case CHIP_POLARIS10:
4179 		amdgpu_ring_write(ring, 0x16000012);
4180 		amdgpu_ring_write(ring, 0x0000002A);
4181 		break;
4182 	case CHIP_POLARIS11:
4183 		amdgpu_ring_write(ring, 0x16000012);
4184 		amdgpu_ring_write(ring, 0x00000000);
4185 		break;
4186 	case CHIP_FIJI:
4187 		amdgpu_ring_write(ring, 0x3a00161a);
4188 		amdgpu_ring_write(ring, 0x0000002e);
4189 		break;
4190 	case CHIP_CARRIZO:
4191 		amdgpu_ring_write(ring, 0x00000002);
4192 		amdgpu_ring_write(ring, 0x00000000);
4193 		break;
4194 	case CHIP_TOPAZ:
4195 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4196 				0x00000000 : 0x00000002);
4197 		amdgpu_ring_write(ring, 0x00000000);
4198 		break;
4199 	case CHIP_STONEY:
4200 		amdgpu_ring_write(ring, 0x00000000);
4201 		amdgpu_ring_write(ring, 0x00000000);
4202 		break;
4203 	default:
4204 		BUG();
4205 	}
4206 
4207 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4208 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4209 
4210 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4211 	amdgpu_ring_write(ring, 0);
4212 
4213 	/* init the CE partitions */
4214 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4215 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4216 	amdgpu_ring_write(ring, 0x8000);
4217 	amdgpu_ring_write(ring, 0x8000);
4218 
4219 	amdgpu_ring_commit(ring);
4220 
4221 	return 0;
4222 }
4223 
4224 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4225 {
4226 	struct amdgpu_ring *ring;
4227 	u32 tmp;
4228 	u32 rb_bufsz;
4229 	u64 rb_addr, rptr_addr;
4230 	int r;
4231 
4232 	/* Set the write pointer delay */
4233 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4234 
4235 	/* set the RB to use vmid 0 */
4236 	WREG32(mmCP_RB_VMID, 0);
4237 
4238 	/* Set ring buffer size */
4239 	ring = &adev->gfx.gfx_ring[0];
4240 	rb_bufsz = order_base_2(ring->ring_size / 8);
4241 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4242 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4243 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4244 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4245 #ifdef __BIG_ENDIAN
4246 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4247 #endif
4248 	WREG32(mmCP_RB0_CNTL, tmp);
4249 
4250 	/* Initialize the ring buffer's read and write pointers */
4251 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4252 	ring->wptr = 0;
4253 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4254 
4255 	/* set the wb address wether it's enabled or not */
4256 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4257 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4258 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4259 
4260 	mdelay(1);
4261 	WREG32(mmCP_RB0_CNTL, tmp);
4262 
4263 	rb_addr = ring->gpu_addr >> 8;
4264 	WREG32(mmCP_RB0_BASE, rb_addr);
4265 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4266 
4267 	/* no gfx doorbells on iceland */
4268 	if (adev->asic_type != CHIP_TOPAZ) {
4269 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4270 		if (ring->use_doorbell) {
4271 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4272 					    DOORBELL_OFFSET, ring->doorbell_index);
4273 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4274 					    DOORBELL_HIT, 0);
4275 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4276 					    DOORBELL_EN, 1);
4277 		} else {
4278 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4279 					    DOORBELL_EN, 0);
4280 		}
4281 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4282 
4283 		if (adev->asic_type == CHIP_TONGA) {
4284 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4285 					    DOORBELL_RANGE_LOWER,
4286 					    AMDGPU_DOORBELL_GFX_RING0);
4287 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4288 
4289 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4290 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4291 		}
4292 
4293 	}
4294 
4295 	/* start the ring */
4296 	gfx_v8_0_cp_gfx_start(adev);
4297 	ring->ready = true;
4298 	r = amdgpu_ring_test_ring(ring);
4299 	if (r) {
4300 		ring->ready = false;
4301 		return r;
4302 	}
4303 
4304 	return 0;
4305 }
4306 
4307 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4308 {
4309 	int i;
4310 
4311 	if (enable) {
4312 		WREG32(mmCP_MEC_CNTL, 0);
4313 	} else {
4314 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4315 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4316 			adev->gfx.compute_ring[i].ready = false;
4317 	}
4318 	udelay(50);
4319 }
4320 
4321 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4322 {
4323 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4324 	const __le32 *fw_data;
4325 	unsigned i, fw_size;
4326 
4327 	if (!adev->gfx.mec_fw)
4328 		return -EINVAL;
4329 
4330 	gfx_v8_0_cp_compute_enable(adev, false);
4331 
4332 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4333 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4334 
4335 	fw_data = (const __le32 *)
4336 		(adev->gfx.mec_fw->data +
4337 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4338 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4339 
4340 	/* MEC1 */
4341 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4342 	for (i = 0; i < fw_size; i++)
4343 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4344 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4345 
4346 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4347 	if (adev->gfx.mec2_fw) {
4348 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4349 
4350 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4351 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4352 
4353 		fw_data = (const __le32 *)
4354 			(adev->gfx.mec2_fw->data +
4355 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4356 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4357 
4358 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4359 		for (i = 0; i < fw_size; i++)
4360 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4361 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4362 	}
4363 
4364 	return 0;
4365 }
4366 
4367 struct vi_mqd {
4368 	uint32_t header;  /* ordinal0 */
4369 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4370 	uint32_t compute_dim_x;  /* ordinal2 */
4371 	uint32_t compute_dim_y;  /* ordinal3 */
4372 	uint32_t compute_dim_z;  /* ordinal4 */
4373 	uint32_t compute_start_x;  /* ordinal5 */
4374 	uint32_t compute_start_y;  /* ordinal6 */
4375 	uint32_t compute_start_z;  /* ordinal7 */
4376 	uint32_t compute_num_thread_x;  /* ordinal8 */
4377 	uint32_t compute_num_thread_y;  /* ordinal9 */
4378 	uint32_t compute_num_thread_z;  /* ordinal10 */
4379 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4380 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4381 	uint32_t compute_pgm_lo;  /* ordinal13 */
4382 	uint32_t compute_pgm_hi;  /* ordinal14 */
4383 	uint32_t compute_tba_lo;  /* ordinal15 */
4384 	uint32_t compute_tba_hi;  /* ordinal16 */
4385 	uint32_t compute_tma_lo;  /* ordinal17 */
4386 	uint32_t compute_tma_hi;  /* ordinal18 */
4387 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4388 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4389 	uint32_t compute_vmid;  /* ordinal21 */
4390 	uint32_t compute_resource_limits;  /* ordinal22 */
4391 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4392 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4393 	uint32_t compute_tmpring_size;  /* ordinal25 */
4394 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4395 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4396 	uint32_t compute_restart_x;  /* ordinal28 */
4397 	uint32_t compute_restart_y;  /* ordinal29 */
4398 	uint32_t compute_restart_z;  /* ordinal30 */
4399 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4400 	uint32_t compute_misc_reserved;  /* ordinal32 */
4401 	uint32_t compute_dispatch_id;  /* ordinal33 */
4402 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4403 	uint32_t compute_relaunch;  /* ordinal35 */
4404 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4405 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4406 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4407 	uint32_t reserved9;  /* ordinal39 */
4408 	uint32_t reserved10;  /* ordinal40 */
4409 	uint32_t reserved11;  /* ordinal41 */
4410 	uint32_t reserved12;  /* ordinal42 */
4411 	uint32_t reserved13;  /* ordinal43 */
4412 	uint32_t reserved14;  /* ordinal44 */
4413 	uint32_t reserved15;  /* ordinal45 */
4414 	uint32_t reserved16;  /* ordinal46 */
4415 	uint32_t reserved17;  /* ordinal47 */
4416 	uint32_t reserved18;  /* ordinal48 */
4417 	uint32_t reserved19;  /* ordinal49 */
4418 	uint32_t reserved20;  /* ordinal50 */
4419 	uint32_t reserved21;  /* ordinal51 */
4420 	uint32_t reserved22;  /* ordinal52 */
4421 	uint32_t reserved23;  /* ordinal53 */
4422 	uint32_t reserved24;  /* ordinal54 */
4423 	uint32_t reserved25;  /* ordinal55 */
4424 	uint32_t reserved26;  /* ordinal56 */
4425 	uint32_t reserved27;  /* ordinal57 */
4426 	uint32_t reserved28;  /* ordinal58 */
4427 	uint32_t reserved29;  /* ordinal59 */
4428 	uint32_t reserved30;  /* ordinal60 */
4429 	uint32_t reserved31;  /* ordinal61 */
4430 	uint32_t reserved32;  /* ordinal62 */
4431 	uint32_t reserved33;  /* ordinal63 */
4432 	uint32_t reserved34;  /* ordinal64 */
4433 	uint32_t compute_user_data_0;  /* ordinal65 */
4434 	uint32_t compute_user_data_1;  /* ordinal66 */
4435 	uint32_t compute_user_data_2;  /* ordinal67 */
4436 	uint32_t compute_user_data_3;  /* ordinal68 */
4437 	uint32_t compute_user_data_4;  /* ordinal69 */
4438 	uint32_t compute_user_data_5;  /* ordinal70 */
4439 	uint32_t compute_user_data_6;  /* ordinal71 */
4440 	uint32_t compute_user_data_7;  /* ordinal72 */
4441 	uint32_t compute_user_data_8;  /* ordinal73 */
4442 	uint32_t compute_user_data_9;  /* ordinal74 */
4443 	uint32_t compute_user_data_10;  /* ordinal75 */
4444 	uint32_t compute_user_data_11;  /* ordinal76 */
4445 	uint32_t compute_user_data_12;  /* ordinal77 */
4446 	uint32_t compute_user_data_13;  /* ordinal78 */
4447 	uint32_t compute_user_data_14;  /* ordinal79 */
4448 	uint32_t compute_user_data_15;  /* ordinal80 */
4449 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4450 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4451 	uint32_t reserved35;  /* ordinal83 */
4452 	uint32_t reserved36;  /* ordinal84 */
4453 	uint32_t reserved37;  /* ordinal85 */
4454 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4455 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4456 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4457 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4458 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4459 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4460 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4461 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4462 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4463 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4464 	uint32_t reserved38;  /* ordinal96 */
4465 	uint32_t reserved39;  /* ordinal97 */
4466 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4467 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4468 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4469 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4470 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4471 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4472 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4473 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4474 	uint32_t reserved40;  /* ordinal106 */
4475 	uint32_t reserved41;  /* ordinal107 */
4476 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4477 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4478 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4479 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4480 	uint32_t reserved42;  /* ordinal112 */
4481 	uint32_t reserved43;  /* ordinal113 */
4482 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4483 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4484 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4485 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4486 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4487 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4488 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4489 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4490 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4491 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4492 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4493 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4494 	uint32_t reserved44;  /* ordinal126 */
4495 	uint32_t reserved45;  /* ordinal127 */
4496 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4497 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4498 	uint32_t cp_hqd_active;  /* ordinal130 */
4499 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4500 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4501 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4502 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4503 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4504 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4505 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4506 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4507 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4508 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4509 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4510 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4511 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4512 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4513 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4514 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4515 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4516 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4517 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4518 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4519 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4520 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4521 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4522 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4523 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4524 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4525 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4526 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4527 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4528 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4529 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4530 	uint32_t cp_mqd_control;  /* ordinal162 */
4531 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4532 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4533 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4534 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4535 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4536 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4537 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4538 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4539 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4540 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4541 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4542 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4543 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4544 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4545 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4546 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4547 	uint32_t cp_hqd_error;  /* ordinal179 */
4548 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4549 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4550 	uint32_t reserved46;  /* ordinal182 */
4551 	uint32_t reserved47;  /* ordinal183 */
4552 	uint32_t reserved48;  /* ordinal184 */
4553 	uint32_t reserved49;  /* ordinal185 */
4554 	uint32_t reserved50;  /* ordinal186 */
4555 	uint32_t reserved51;  /* ordinal187 */
4556 	uint32_t reserved52;  /* ordinal188 */
4557 	uint32_t reserved53;  /* ordinal189 */
4558 	uint32_t reserved54;  /* ordinal190 */
4559 	uint32_t reserved55;  /* ordinal191 */
4560 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4561 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4562 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4563 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4564 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4565 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4566 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4567 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4568 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4569 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4570 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4571 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4572 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4573 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4574 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4575 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4576 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4577 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4578 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4579 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4580 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4581 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4582 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4583 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4584 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4585 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4586 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4587 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4588 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4589 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4590 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4591 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4592 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4593 	uint32_t reserved56;  /* ordinal225 */
4594 	uint32_t reserved57;  /* ordinal226 */
4595 	uint32_t reserved58;  /* ordinal227 */
4596 	uint32_t set_resources_header;  /* ordinal228 */
4597 	uint32_t set_resources_dw1;  /* ordinal229 */
4598 	uint32_t set_resources_dw2;  /* ordinal230 */
4599 	uint32_t set_resources_dw3;  /* ordinal231 */
4600 	uint32_t set_resources_dw4;  /* ordinal232 */
4601 	uint32_t set_resources_dw5;  /* ordinal233 */
4602 	uint32_t set_resources_dw6;  /* ordinal234 */
4603 	uint32_t set_resources_dw7;  /* ordinal235 */
4604 	uint32_t reserved59;  /* ordinal236 */
4605 	uint32_t reserved60;  /* ordinal237 */
4606 	uint32_t reserved61;  /* ordinal238 */
4607 	uint32_t reserved62;  /* ordinal239 */
4608 	uint32_t reserved63;  /* ordinal240 */
4609 	uint32_t reserved64;  /* ordinal241 */
4610 	uint32_t reserved65;  /* ordinal242 */
4611 	uint32_t reserved66;  /* ordinal243 */
4612 	uint32_t reserved67;  /* ordinal244 */
4613 	uint32_t reserved68;  /* ordinal245 */
4614 	uint32_t reserved69;  /* ordinal246 */
4615 	uint32_t reserved70;  /* ordinal247 */
4616 	uint32_t reserved71;  /* ordinal248 */
4617 	uint32_t reserved72;  /* ordinal249 */
4618 	uint32_t reserved73;  /* ordinal250 */
4619 	uint32_t reserved74;  /* ordinal251 */
4620 	uint32_t reserved75;  /* ordinal252 */
4621 	uint32_t reserved76;  /* ordinal253 */
4622 	uint32_t reserved77;  /* ordinal254 */
4623 	uint32_t reserved78;  /* ordinal255 */
4624 
4625 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4626 };
4627 
4628 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4629 {
4630 	int i, r;
4631 
4632 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4633 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4634 
4635 		if (ring->mqd_obj) {
4636 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4637 			if (unlikely(r != 0))
4638 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4639 
4640 			amdgpu_bo_unpin(ring->mqd_obj);
4641 			amdgpu_bo_unreserve(ring->mqd_obj);
4642 
4643 			amdgpu_bo_unref(&ring->mqd_obj);
4644 			ring->mqd_obj = NULL;
4645 		}
4646 	}
4647 }
4648 
4649 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4650 {
4651 	int r, i, j;
4652 	u32 tmp;
4653 	bool use_doorbell = true;
4654 	u64 hqd_gpu_addr;
4655 	u64 mqd_gpu_addr;
4656 	u64 eop_gpu_addr;
4657 	u64 wb_gpu_addr;
4658 	u32 *buf;
4659 	struct vi_mqd *mqd;
4660 
4661 	/* init the pipes */
4662 	mutex_lock(&adev->srbm_mutex);
4663 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4664 		int me = (i < 4) ? 1 : 2;
4665 		int pipe = (i < 4) ? i : (i - 4);
4666 
4667 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4668 		eop_gpu_addr >>= 8;
4669 
4670 		vi_srbm_select(adev, me, pipe, 0, 0);
4671 
4672 		/* write the EOP addr */
4673 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4674 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4675 
4676 		/* set the VMID assigned */
4677 		WREG32(mmCP_HQD_VMID, 0);
4678 
4679 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4680 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4681 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4682 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4683 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4684 	}
4685 	vi_srbm_select(adev, 0, 0, 0, 0);
4686 	mutex_unlock(&adev->srbm_mutex);
4687 
4688 	/* init the queues.  Just two for now. */
4689 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4690 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4691 
4692 		if (ring->mqd_obj == NULL) {
4693 			r = amdgpu_bo_create(adev,
4694 					     sizeof(struct vi_mqd),
4695 					     PAGE_SIZE, true,
4696 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4697 					     NULL, &ring->mqd_obj);
4698 			if (r) {
4699 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4700 				return r;
4701 			}
4702 		}
4703 
4704 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4705 		if (unlikely(r != 0)) {
4706 			gfx_v8_0_cp_compute_fini(adev);
4707 			return r;
4708 		}
4709 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4710 				  &mqd_gpu_addr);
4711 		if (r) {
4712 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4713 			gfx_v8_0_cp_compute_fini(adev);
4714 			return r;
4715 		}
4716 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4717 		if (r) {
4718 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4719 			gfx_v8_0_cp_compute_fini(adev);
4720 			return r;
4721 		}
4722 
4723 		/* init the mqd struct */
4724 		memset(buf, 0, sizeof(struct vi_mqd));
4725 
4726 		mqd = (struct vi_mqd *)buf;
4727 		mqd->header = 0xC0310800;
4728 		mqd->compute_pipelinestat_enable = 0x00000001;
4729 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4730 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4731 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4732 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4733 		mqd->compute_misc_reserved = 0x00000003;
4734 
4735 		mutex_lock(&adev->srbm_mutex);
4736 		vi_srbm_select(adev, ring->me,
4737 			       ring->pipe,
4738 			       ring->queue, 0);
4739 
4740 		/* disable wptr polling */
4741 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4742 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4743 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4744 
4745 		mqd->cp_hqd_eop_base_addr_lo =
4746 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4747 		mqd->cp_hqd_eop_base_addr_hi =
4748 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4749 
4750 		/* enable doorbell? */
4751 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4752 		if (use_doorbell) {
4753 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4754 		} else {
4755 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4756 		}
4757 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4758 		mqd->cp_hqd_pq_doorbell_control = tmp;
4759 
4760 		/* disable the queue if it's active */
4761 		mqd->cp_hqd_dequeue_request = 0;
4762 		mqd->cp_hqd_pq_rptr = 0;
4763 		mqd->cp_hqd_pq_wptr= 0;
4764 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4765 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4766 			for (j = 0; j < adev->usec_timeout; j++) {
4767 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4768 					break;
4769 				udelay(1);
4770 			}
4771 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4772 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4773 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4774 		}
4775 
4776 		/* set the pointer to the MQD */
4777 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4778 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4779 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4780 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4781 
4782 		/* set MQD vmid to 0 */
4783 		tmp = RREG32(mmCP_MQD_CONTROL);
4784 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4785 		WREG32(mmCP_MQD_CONTROL, tmp);
4786 		mqd->cp_mqd_control = tmp;
4787 
4788 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4789 		hqd_gpu_addr = ring->gpu_addr >> 8;
4790 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4791 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4792 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4793 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4794 
4795 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4796 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4797 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4798 				    (order_base_2(ring->ring_size / 4) - 1));
4799 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4800 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4801 #ifdef __BIG_ENDIAN
4802 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4803 #endif
4804 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4805 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4806 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4807 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4808 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4809 		mqd->cp_hqd_pq_control = tmp;
4810 
4811 		/* set the wb address wether it's enabled or not */
4812 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4813 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4814 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4815 			upper_32_bits(wb_gpu_addr) & 0xffff;
4816 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4817 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4818 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4819 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4820 
4821 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4822 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4823 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4824 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4825 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4826 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4827 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4828 
4829 		/* enable the doorbell if requested */
4830 		if (use_doorbell) {
4831 			if ((adev->asic_type == CHIP_CARRIZO) ||
4832 			    (adev->asic_type == CHIP_FIJI) ||
4833 			    (adev->asic_type == CHIP_STONEY) ||
4834 			    (adev->asic_type == CHIP_POLARIS11) ||
4835 			    (adev->asic_type == CHIP_POLARIS10)) {
4836 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4837 				       AMDGPU_DOORBELL_KIQ << 2);
4838 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4839 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4840 			}
4841 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4842 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4843 					    DOORBELL_OFFSET, ring->doorbell_index);
4844 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4845 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4846 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4847 			mqd->cp_hqd_pq_doorbell_control = tmp;
4848 
4849 		} else {
4850 			mqd->cp_hqd_pq_doorbell_control = 0;
4851 		}
4852 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4853 		       mqd->cp_hqd_pq_doorbell_control);
4854 
4855 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4856 		ring->wptr = 0;
4857 		mqd->cp_hqd_pq_wptr = ring->wptr;
4858 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4859 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4860 
4861 		/* set the vmid for the queue */
4862 		mqd->cp_hqd_vmid = 0;
4863 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4864 
4865 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4866 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4867 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4868 		mqd->cp_hqd_persistent_state = tmp;
4869 		if (adev->asic_type == CHIP_STONEY ||
4870 			adev->asic_type == CHIP_POLARIS11 ||
4871 			adev->asic_type == CHIP_POLARIS10) {
4872 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4873 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4874 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4875 		}
4876 
4877 		/* activate the queue */
4878 		mqd->cp_hqd_active = 1;
4879 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4880 
4881 		vi_srbm_select(adev, 0, 0, 0, 0);
4882 		mutex_unlock(&adev->srbm_mutex);
4883 
4884 		amdgpu_bo_kunmap(ring->mqd_obj);
4885 		amdgpu_bo_unreserve(ring->mqd_obj);
4886 	}
4887 
4888 	if (use_doorbell) {
4889 		tmp = RREG32(mmCP_PQ_STATUS);
4890 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4891 		WREG32(mmCP_PQ_STATUS, tmp);
4892 	}
4893 
4894 	gfx_v8_0_cp_compute_enable(adev, true);
4895 
4896 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4897 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4898 
4899 		ring->ready = true;
4900 		r = amdgpu_ring_test_ring(ring);
4901 		if (r)
4902 			ring->ready = false;
4903 	}
4904 
4905 	return 0;
4906 }
4907 
4908 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4909 {
4910 	int r;
4911 
4912 	if (!(adev->flags & AMD_IS_APU))
4913 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4914 
4915 	if (!adev->pp_enabled) {
4916 		if (!adev->firmware.smu_load) {
4917 			/* legacy firmware loading */
4918 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
4919 			if (r)
4920 				return r;
4921 
4922 			r = gfx_v8_0_cp_compute_load_microcode(adev);
4923 			if (r)
4924 				return r;
4925 		} else {
4926 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4927 							AMDGPU_UCODE_ID_CP_CE);
4928 			if (r)
4929 				return -EINVAL;
4930 
4931 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4932 							AMDGPU_UCODE_ID_CP_PFP);
4933 			if (r)
4934 				return -EINVAL;
4935 
4936 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4937 							AMDGPU_UCODE_ID_CP_ME);
4938 			if (r)
4939 				return -EINVAL;
4940 
4941 			if (adev->asic_type == CHIP_TOPAZ) {
4942 				r = gfx_v8_0_cp_compute_load_microcode(adev);
4943 				if (r)
4944 					return r;
4945 			} else {
4946 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4947 										 AMDGPU_UCODE_ID_CP_MEC1);
4948 				if (r)
4949 					return -EINVAL;
4950 			}
4951 		}
4952 	}
4953 
4954 	r = gfx_v8_0_cp_gfx_resume(adev);
4955 	if (r)
4956 		return r;
4957 
4958 	r = gfx_v8_0_cp_compute_resume(adev);
4959 	if (r)
4960 		return r;
4961 
4962 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4963 
4964 	return 0;
4965 }
4966 
4967 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4968 {
4969 	gfx_v8_0_cp_gfx_enable(adev, enable);
4970 	gfx_v8_0_cp_compute_enable(adev, enable);
4971 }
4972 
4973 static int gfx_v8_0_hw_init(void *handle)
4974 {
4975 	int r;
4976 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4977 
4978 	gfx_v8_0_init_golden_registers(adev);
4979 
4980 	gfx_v8_0_gpu_init(adev);
4981 
4982 	r = gfx_v8_0_rlc_resume(adev);
4983 	if (r)
4984 		return r;
4985 
4986 	r = gfx_v8_0_cp_resume(adev);
4987 	if (r)
4988 		return r;
4989 
4990 	return r;
4991 }
4992 
4993 static int gfx_v8_0_hw_fini(void *handle)
4994 {
4995 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4996 
4997 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4998 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4999 	gfx_v8_0_cp_enable(adev, false);
5000 	gfx_v8_0_rlc_stop(adev);
5001 	gfx_v8_0_cp_compute_fini(adev);
5002 
5003 	amdgpu_set_powergating_state(adev,
5004 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5005 
5006 	return 0;
5007 }
5008 
5009 static int gfx_v8_0_suspend(void *handle)
5010 {
5011 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5012 
5013 	return gfx_v8_0_hw_fini(adev);
5014 }
5015 
5016 static int gfx_v8_0_resume(void *handle)
5017 {
5018 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5019 
5020 	return gfx_v8_0_hw_init(adev);
5021 }
5022 
5023 static bool gfx_v8_0_is_idle(void *handle)
5024 {
5025 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5026 
5027 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5028 		return false;
5029 	else
5030 		return true;
5031 }
5032 
5033 static int gfx_v8_0_wait_for_idle(void *handle)
5034 {
5035 	unsigned i;
5036 	u32 tmp;
5037 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5038 
5039 	for (i = 0; i < adev->usec_timeout; i++) {
5040 		/* read MC_STATUS */
5041 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5042 
5043 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5044 			return 0;
5045 		udelay(1);
5046 	}
5047 	return -ETIMEDOUT;
5048 }
5049 
5050 static int gfx_v8_0_soft_reset(void *handle)
5051 {
5052 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5053 	u32 tmp;
5054 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5055 
5056 	/* GRBM_STATUS */
5057 	tmp = RREG32(mmGRBM_STATUS);
5058 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5059 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5060 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5061 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5062 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5063 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5064 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5066 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5067 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5068 	}
5069 
5070 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5071 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5072 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5073 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5074 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5075 	}
5076 
5077 	/* GRBM_STATUS2 */
5078 	tmp = RREG32(mmGRBM_STATUS2);
5079 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5080 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5081 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5082 
5083 	/* SRBM_STATUS */
5084 	tmp = RREG32(mmSRBM_STATUS);
5085 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5086 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5088 
5089 	if (grbm_soft_reset || srbm_soft_reset) {
5090 		/* stop the rlc */
5091 		gfx_v8_0_rlc_stop(adev);
5092 
5093 		/* Disable GFX parsing/prefetching */
5094 		gfx_v8_0_cp_gfx_enable(adev, false);
5095 
5096 		/* Disable MEC parsing/prefetching */
5097 		gfx_v8_0_cp_compute_enable(adev, false);
5098 
5099 		if (grbm_soft_reset || srbm_soft_reset) {
5100 			tmp = RREG32(mmGMCON_DEBUG);
5101 			tmp = REG_SET_FIELD(tmp,
5102 					    GMCON_DEBUG, GFX_STALL, 1);
5103 			tmp = REG_SET_FIELD(tmp,
5104 					    GMCON_DEBUG, GFX_CLEAR, 1);
5105 			WREG32(mmGMCON_DEBUG, tmp);
5106 
5107 			udelay(50);
5108 		}
5109 
5110 		if (grbm_soft_reset) {
5111 			tmp = RREG32(mmGRBM_SOFT_RESET);
5112 			tmp |= grbm_soft_reset;
5113 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5114 			WREG32(mmGRBM_SOFT_RESET, tmp);
5115 			tmp = RREG32(mmGRBM_SOFT_RESET);
5116 
5117 			udelay(50);
5118 
5119 			tmp &= ~grbm_soft_reset;
5120 			WREG32(mmGRBM_SOFT_RESET, tmp);
5121 			tmp = RREG32(mmGRBM_SOFT_RESET);
5122 		}
5123 
5124 		if (srbm_soft_reset) {
5125 			tmp = RREG32(mmSRBM_SOFT_RESET);
5126 			tmp |= srbm_soft_reset;
5127 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5128 			WREG32(mmSRBM_SOFT_RESET, tmp);
5129 			tmp = RREG32(mmSRBM_SOFT_RESET);
5130 
5131 			udelay(50);
5132 
5133 			tmp &= ~srbm_soft_reset;
5134 			WREG32(mmSRBM_SOFT_RESET, tmp);
5135 			tmp = RREG32(mmSRBM_SOFT_RESET);
5136 		}
5137 
5138 		if (grbm_soft_reset || srbm_soft_reset) {
5139 			tmp = RREG32(mmGMCON_DEBUG);
5140 			tmp = REG_SET_FIELD(tmp,
5141 					    GMCON_DEBUG, GFX_STALL, 0);
5142 			tmp = REG_SET_FIELD(tmp,
5143 					    GMCON_DEBUG, GFX_CLEAR, 0);
5144 			WREG32(mmGMCON_DEBUG, tmp);
5145 		}
5146 
5147 		/* Wait a little for things to settle down */
5148 		udelay(50);
5149 	}
5150 	return 0;
5151 }
5152 
5153 /**
5154  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5155  *
5156  * @adev: amdgpu_device pointer
5157  *
5158  * Fetches a GPU clock counter snapshot.
5159  * Returns the 64 bit clock counter snapshot.
5160  */
5161 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5162 {
5163 	uint64_t clock;
5164 
5165 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5166 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5167 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5168 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5169 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5170 	return clock;
5171 }
5172 
5173 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5174 					  uint32_t vmid,
5175 					  uint32_t gds_base, uint32_t gds_size,
5176 					  uint32_t gws_base, uint32_t gws_size,
5177 					  uint32_t oa_base, uint32_t oa_size)
5178 {
5179 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5180 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5181 
5182 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5183 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5184 
5185 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5186 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5187 
5188 	/* GDS Base */
5189 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191 				WRITE_DATA_DST_SEL(0)));
5192 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5193 	amdgpu_ring_write(ring, 0);
5194 	amdgpu_ring_write(ring, gds_base);
5195 
5196 	/* GDS Size */
5197 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199 				WRITE_DATA_DST_SEL(0)));
5200 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5201 	amdgpu_ring_write(ring, 0);
5202 	amdgpu_ring_write(ring, gds_size);
5203 
5204 	/* GWS */
5205 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5206 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5207 				WRITE_DATA_DST_SEL(0)));
5208 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5209 	amdgpu_ring_write(ring, 0);
5210 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5211 
5212 	/* OA */
5213 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5214 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5215 				WRITE_DATA_DST_SEL(0)));
5216 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5217 	amdgpu_ring_write(ring, 0);
5218 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5219 }
5220 
5221 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5222 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5223 	.select_se_sh = &gfx_v8_0_select_se_sh,
5224 };
5225 
5226 static int gfx_v8_0_early_init(void *handle)
5227 {
5228 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5229 
5230 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5231 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5232 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5233 	gfx_v8_0_set_ring_funcs(adev);
5234 	gfx_v8_0_set_irq_funcs(adev);
5235 	gfx_v8_0_set_gds_init(adev);
5236 	gfx_v8_0_set_rlc_funcs(adev);
5237 
5238 	return 0;
5239 }
5240 
5241 static int gfx_v8_0_late_init(void *handle)
5242 {
5243 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5244 	int r;
5245 
5246 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5247 	if (r)
5248 		return r;
5249 
5250 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5251 	if (r)
5252 		return r;
5253 
5254 	/* requires IBs so do in late init after IB pool is initialized */
5255 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5256 	if (r)
5257 		return r;
5258 
5259 	amdgpu_set_powergating_state(adev,
5260 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5261 
5262 	return 0;
5263 }
5264 
5265 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5266 						       bool enable)
5267 {
5268 	uint32_t data, temp;
5269 
5270 	if (adev->asic_type == CHIP_POLARIS11)
5271 		/* Send msg to SMU via Powerplay */
5272 		amdgpu_set_powergating_state(adev,
5273 					     AMD_IP_BLOCK_TYPE_SMC,
5274 					     enable ?
5275 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5276 
5277 	temp = data = RREG32(mmRLC_PG_CNTL);
5278 	/* Enable static MGPG */
5279 	if (enable)
5280 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5281 	else
5282 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5283 
5284 	if (temp != data)
5285 		WREG32(mmRLC_PG_CNTL, data);
5286 }
5287 
5288 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5289 							bool enable)
5290 {
5291 	uint32_t data, temp;
5292 
5293 	temp = data = RREG32(mmRLC_PG_CNTL);
5294 	/* Enable dynamic MGPG */
5295 	if (enable)
5296 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5297 	else
5298 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5299 
5300 	if (temp != data)
5301 		WREG32(mmRLC_PG_CNTL, data);
5302 }
5303 
5304 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5305 		bool enable)
5306 {
5307 	uint32_t data, temp;
5308 
5309 	temp = data = RREG32(mmRLC_PG_CNTL);
5310 	/* Enable quick PG */
5311 	if (enable)
5312 		data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5313 	else
5314 		data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5315 
5316 	if (temp != data)
5317 		WREG32(mmRLC_PG_CNTL, data);
5318 }
5319 
5320 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5321 					  bool enable)
5322 {
5323 	u32 data, orig;
5324 
5325 	orig = data = RREG32(mmRLC_PG_CNTL);
5326 
5327 	if (enable)
5328 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5329 	else
5330 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5331 
5332 	if (orig != data)
5333 		WREG32(mmRLC_PG_CNTL, data);
5334 }
5335 
5336 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5337 						bool enable)
5338 {
5339 	u32 data, orig;
5340 
5341 	orig = data = RREG32(mmRLC_PG_CNTL);
5342 
5343 	if (enable)
5344 		data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5345 	else
5346 		data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5347 
5348 	if (orig != data)
5349 		WREG32(mmRLC_PG_CNTL, data);
5350 
5351 	/* Read any GFX register to wake up GFX. */
5352 	if (!enable)
5353 		data = RREG32(mmDB_RENDER_CONTROL);
5354 }
5355 
5356 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5357 					  bool enable)
5358 {
5359 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5360 		cz_enable_gfx_cg_power_gating(adev, true);
5361 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5362 			cz_enable_gfx_pipeline_power_gating(adev, true);
5363 	} else {
5364 		cz_enable_gfx_cg_power_gating(adev, false);
5365 		cz_enable_gfx_pipeline_power_gating(adev, false);
5366 	}
5367 }
5368 
5369 static int gfx_v8_0_set_powergating_state(void *handle,
5370 					  enum amd_powergating_state state)
5371 {
5372 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5373 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5374 
5375 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5376 		return 0;
5377 
5378 	switch (adev->asic_type) {
5379 	case CHIP_CARRIZO:
5380 	case CHIP_STONEY:
5381 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5382 			cz_update_gfx_cg_power_gating(adev, enable);
5383 
5384 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5385 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5386 		else
5387 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5388 
5389 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5390 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5391 		else
5392 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5393 		break;
5394 	case CHIP_POLARIS11:
5395 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5396 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5397 		else
5398 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5399 
5400 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5401 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5402 		else
5403 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5404 
5405 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5406 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5407 		else
5408 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5409 		break;
5410 	default:
5411 		break;
5412 	}
5413 
5414 	return 0;
5415 }
5416 
5417 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5418 				     uint32_t reg_addr, uint32_t cmd)
5419 {
5420 	uint32_t data;
5421 
5422 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5423 
5424 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5425 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5426 
5427 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5428 	if (adev->asic_type == CHIP_STONEY)
5429 			data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5430 			RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5431 			RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5432 			RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5433 			RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5434 			RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5435 			RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5436 			RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5437 			RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5438 	else
5439 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5440 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5441 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5442 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5443 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5444 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5445 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5446 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5447 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5448 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5449 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5450 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5451 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5452 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5453 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5454 
5455 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5456 }
5457 
5458 #define MSG_ENTER_RLC_SAFE_MODE     1
5459 #define MSG_EXIT_RLC_SAFE_MODE      0
5460 
5461 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5462 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5463 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5464 
5465 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5466 {
5467 	u32 data = 0;
5468 	unsigned i;
5469 
5470 	data = RREG32(mmRLC_CNTL);
5471 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5472 		return;
5473 
5474 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5475 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5476 			       AMD_PG_SUPPORT_GFX_DMG))) {
5477 		data |= RLC_GPR_REG2__REQ_MASK;
5478 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5479 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5480 		WREG32(mmRLC_GPR_REG2, data);
5481 
5482 		for (i = 0; i < adev->usec_timeout; i++) {
5483 			if ((RREG32(mmRLC_GPM_STAT) &
5484 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5485 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5486 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5487 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5488 				break;
5489 			udelay(1);
5490 		}
5491 
5492 		for (i = 0; i < adev->usec_timeout; i++) {
5493 			if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5494 				break;
5495 			udelay(1);
5496 		}
5497 		adev->gfx.rlc.in_safe_mode = true;
5498 	}
5499 }
5500 
5501 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5502 {
5503 	u32 data;
5504 	unsigned i;
5505 
5506 	data = RREG32(mmRLC_CNTL);
5507 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5508 		return;
5509 
5510 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5511 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5512 			       AMD_PG_SUPPORT_GFX_DMG))) {
5513 		data |= RLC_GPR_REG2__REQ_MASK;
5514 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5515 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5516 		WREG32(mmRLC_GPR_REG2, data);
5517 		adev->gfx.rlc.in_safe_mode = false;
5518 	}
5519 
5520 	for (i = 0; i < adev->usec_timeout; i++) {
5521 		if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5522 			break;
5523 		udelay(1);
5524 	}
5525 }
5526 
5527 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5528 {
5529 	u32 data;
5530 	unsigned i;
5531 
5532 	data = RREG32(mmRLC_CNTL);
5533 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5534 		return;
5535 
5536 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5537 		data |= RLC_SAFE_MODE__CMD_MASK;
5538 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5539 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5540 		WREG32(mmRLC_SAFE_MODE, data);
5541 
5542 		for (i = 0; i < adev->usec_timeout; i++) {
5543 			if ((RREG32(mmRLC_GPM_STAT) &
5544 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5545 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5546 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5547 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5548 				break;
5549 			udelay(1);
5550 		}
5551 
5552 		for (i = 0; i < adev->usec_timeout; i++) {
5553 			if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5554 				break;
5555 			udelay(1);
5556 		}
5557 		adev->gfx.rlc.in_safe_mode = true;
5558 	}
5559 }
5560 
5561 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5562 {
5563 	u32 data = 0;
5564 	unsigned i;
5565 
5566 	data = RREG32(mmRLC_CNTL);
5567 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5568 		return;
5569 
5570 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5571 		if (adev->gfx.rlc.in_safe_mode) {
5572 			data |= RLC_SAFE_MODE__CMD_MASK;
5573 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5574 			WREG32(mmRLC_SAFE_MODE, data);
5575 			adev->gfx.rlc.in_safe_mode = false;
5576 		}
5577 	}
5578 
5579 	for (i = 0; i < adev->usec_timeout; i++) {
5580 		if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5581 			break;
5582 		udelay(1);
5583 	}
5584 }
5585 
5586 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5587 {
5588 	adev->gfx.rlc.in_safe_mode = true;
5589 }
5590 
5591 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5592 {
5593 	adev->gfx.rlc.in_safe_mode = false;
5594 }
5595 
5596 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5597 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5598 	.exit_safe_mode = cz_exit_rlc_safe_mode
5599 };
5600 
5601 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5602 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5603 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5604 };
5605 
5606 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5607 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5608 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5609 };
5610 
5611 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5612 						      bool enable)
5613 {
5614 	uint32_t temp, data;
5615 
5616 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5617 
5618 	/* It is disabled by HW by default */
5619 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5620 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5621 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5622 				/* 1 - RLC memory Light sleep */
5623 				temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5624 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5625 				if (temp != data)
5626 					WREG32(mmRLC_MEM_SLP_CNTL, data);
5627 			}
5628 
5629 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5630 				/* 2 - CP memory Light sleep */
5631 				temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5632 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5633 				if (temp != data)
5634 					WREG32(mmCP_MEM_SLP_CNTL, data);
5635 			}
5636 		}
5637 
5638 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5639 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5640 		if (adev->flags & AMD_IS_APU)
5641 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5642 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5643 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5644 		else
5645 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5646 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5647 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5648 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5649 
5650 		if (temp != data)
5651 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5652 
5653 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5654 		gfx_v8_0_wait_for_rlc_serdes(adev);
5655 
5656 		/* 5 - clear mgcg override */
5657 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5658 
5659 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5660 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5661 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5662 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5663 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5664 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5665 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5666 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5667 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5668 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5669 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5670 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5671 			if (temp != data)
5672 				WREG32(mmCGTS_SM_CTRL_REG, data);
5673 		}
5674 		udelay(50);
5675 
5676 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5677 		gfx_v8_0_wait_for_rlc_serdes(adev);
5678 	} else {
5679 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5680 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5681 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5682 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5683 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5684 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5685 		if (temp != data)
5686 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5687 
5688 		/* 2 - disable MGLS in RLC */
5689 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5690 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5691 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5692 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5693 		}
5694 
5695 		/* 3 - disable MGLS in CP */
5696 		data = RREG32(mmCP_MEM_SLP_CNTL);
5697 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5698 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5699 			WREG32(mmCP_MEM_SLP_CNTL, data);
5700 		}
5701 
5702 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5703 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5704 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5705 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5706 		if (temp != data)
5707 			WREG32(mmCGTS_SM_CTRL_REG, data);
5708 
5709 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5710 		gfx_v8_0_wait_for_rlc_serdes(adev);
5711 
5712 		/* 6 - set mgcg override */
5713 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5714 
5715 		udelay(50);
5716 
5717 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5718 		gfx_v8_0_wait_for_rlc_serdes(adev);
5719 	}
5720 
5721 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5722 }
5723 
5724 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5725 						      bool enable)
5726 {
5727 	uint32_t temp, temp1, data, data1;
5728 
5729 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5730 
5731 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5732 
5733 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5734 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5735 		 * Cmp_busy/GFX_Idle interrupts
5736 		 */
5737 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5738 
5739 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5740 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5741 		if (temp1 != data1)
5742 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5743 
5744 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745 		gfx_v8_0_wait_for_rlc_serdes(adev);
5746 
5747 		/* 3 - clear cgcg override */
5748 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5749 
5750 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5751 		gfx_v8_0_wait_for_rlc_serdes(adev);
5752 
5753 		/* 4 - write cmd to set CGLS */
5754 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5755 
5756 		/* 5 - enable cgcg */
5757 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5758 
5759 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5760 			/* enable cgls*/
5761 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5762 
5763 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5764 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5765 
5766 			if (temp1 != data1)
5767 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5768 		} else {
5769 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5770 		}
5771 
5772 		if (temp != data)
5773 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5774 	} else {
5775 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5776 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5777 
5778 		/* TEST CGCG */
5779 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5780 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5781 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5782 		if (temp1 != data1)
5783 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5784 
5785 		/* read gfx register to wake up cgcg */
5786 		RREG32(mmCB_CGTT_SCLK_CTRL);
5787 		RREG32(mmCB_CGTT_SCLK_CTRL);
5788 		RREG32(mmCB_CGTT_SCLK_CTRL);
5789 		RREG32(mmCB_CGTT_SCLK_CTRL);
5790 
5791 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5792 		gfx_v8_0_wait_for_rlc_serdes(adev);
5793 
5794 		/* write cmd to Set CGCG Overrride */
5795 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5796 
5797 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5798 		gfx_v8_0_wait_for_rlc_serdes(adev);
5799 
5800 		/* write cmd to Clear CGLS */
5801 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5802 
5803 		/* disable cgcg, cgls should be disabled too. */
5804 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5805 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5806 		if (temp != data)
5807 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5808 	}
5809 
5810 	gfx_v8_0_wait_for_rlc_serdes(adev);
5811 
5812 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5813 }
5814 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5815 					    bool enable)
5816 {
5817 	if (enable) {
5818 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5819 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5820 		 */
5821 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5822 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5823 	} else {
5824 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5825 		 * ===  CGCG + CGLS ===
5826 		 */
5827 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5829 	}
5830 	return 0;
5831 }
5832 
5833 static int gfx_v8_0_set_clockgating_state(void *handle,
5834 					  enum amd_clockgating_state state)
5835 {
5836 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5837 
5838 	switch (adev->asic_type) {
5839 	case CHIP_FIJI:
5840 	case CHIP_CARRIZO:
5841 	case CHIP_STONEY:
5842 		gfx_v8_0_update_gfx_clock_gating(adev,
5843 						 state == AMD_CG_STATE_GATE ? true : false);
5844 		break;
5845 	default:
5846 		break;
5847 	}
5848 	return 0;
5849 }
5850 
5851 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5852 {
5853 	u32 rptr;
5854 
5855 	rptr = ring->adev->wb.wb[ring->rptr_offs];
5856 
5857 	return rptr;
5858 }
5859 
5860 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5861 {
5862 	struct amdgpu_device *adev = ring->adev;
5863 	u32 wptr;
5864 
5865 	if (ring->use_doorbell)
5866 		/* XXX check if swapping is necessary on BE */
5867 		wptr = ring->adev->wb.wb[ring->wptr_offs];
5868 	else
5869 		wptr = RREG32(mmCP_RB0_WPTR);
5870 
5871 	return wptr;
5872 }
5873 
5874 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5875 {
5876 	struct amdgpu_device *adev = ring->adev;
5877 
5878 	if (ring->use_doorbell) {
5879 		/* XXX check if swapping is necessary on BE */
5880 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
5881 		WDOORBELL32(ring->doorbell_index, ring->wptr);
5882 	} else {
5883 		WREG32(mmCP_RB0_WPTR, ring->wptr);
5884 		(void)RREG32(mmCP_RB0_WPTR);
5885 	}
5886 }
5887 
5888 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5889 {
5890 	u32 ref_and_mask, reg_mem_engine;
5891 
5892 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5893 		switch (ring->me) {
5894 		case 1:
5895 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5896 			break;
5897 		case 2:
5898 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5899 			break;
5900 		default:
5901 			return;
5902 		}
5903 		reg_mem_engine = 0;
5904 	} else {
5905 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5906 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5907 	}
5908 
5909 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5910 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5911 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
5912 				 reg_mem_engine));
5913 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5914 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5915 	amdgpu_ring_write(ring, ref_and_mask);
5916 	amdgpu_ring_write(ring, ref_and_mask);
5917 	amdgpu_ring_write(ring, 0x20); /* poll interval */
5918 }
5919 
5920 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5921 {
5922 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5923 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5924 				 WRITE_DATA_DST_SEL(0) |
5925 				 WR_CONFIRM));
5926 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
5927 	amdgpu_ring_write(ring, 0);
5928 	amdgpu_ring_write(ring, 1);
5929 
5930 }
5931 
5932 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5933 				      struct amdgpu_ib *ib,
5934 				      unsigned vm_id, bool ctx_switch)
5935 {
5936 	u32 header, control = 0;
5937 
5938 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
5939 	if (ctx_switch) {
5940 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5941 		amdgpu_ring_write(ring, 0);
5942 	}
5943 
5944 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5945 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5946 	else
5947 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5948 
5949 	control |= ib->length_dw | (vm_id << 24);
5950 
5951 	amdgpu_ring_write(ring, header);
5952 	amdgpu_ring_write(ring,
5953 #ifdef __BIG_ENDIAN
5954 			  (2 << 0) |
5955 #endif
5956 			  (ib->gpu_addr & 0xFFFFFFFC));
5957 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5958 	amdgpu_ring_write(ring, control);
5959 }
5960 
5961 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5962 					  struct amdgpu_ib *ib,
5963 					  unsigned vm_id, bool ctx_switch)
5964 {
5965 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5966 
5967 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5968 	amdgpu_ring_write(ring,
5969 #ifdef __BIG_ENDIAN
5970 					  (2 << 0) |
5971 #endif
5972 					  (ib->gpu_addr & 0xFFFFFFFC));
5973 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5974 	amdgpu_ring_write(ring, control);
5975 }
5976 
5977 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5978 					 u64 seq, unsigned flags)
5979 {
5980 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5981 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5982 
5983 	/* EVENT_WRITE_EOP - flush caches, send int */
5984 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5985 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5986 				 EOP_TC_ACTION_EN |
5987 				 EOP_TC_WB_ACTION_EN |
5988 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5989 				 EVENT_INDEX(5)));
5990 	amdgpu_ring_write(ring, addr & 0xfffffffc);
5991 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5992 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5993 	amdgpu_ring_write(ring, lower_32_bits(seq));
5994 	amdgpu_ring_write(ring, upper_32_bits(seq));
5995 
5996 }
5997 
5998 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5999 {
6000 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6001 	uint32_t seq = ring->fence_drv.sync_seq;
6002 	uint64_t addr = ring->fence_drv.gpu_addr;
6003 
6004 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6005 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6006 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6007 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6008 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6009 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6010 	amdgpu_ring_write(ring, seq);
6011 	amdgpu_ring_write(ring, 0xffffffff);
6012 	amdgpu_ring_write(ring, 4); /* poll interval */
6013 
6014 	if (usepfp) {
6015 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
6016 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6017 		amdgpu_ring_write(ring, 0);
6018 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6019 		amdgpu_ring_write(ring, 0);
6020 	}
6021 }
6022 
6023 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6024 					unsigned vm_id, uint64_t pd_addr)
6025 {
6026 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6027 
6028 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6029 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6030 				 WRITE_DATA_DST_SEL(0)) |
6031 				 WR_CONFIRM);
6032 	if (vm_id < 8) {
6033 		amdgpu_ring_write(ring,
6034 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6035 	} else {
6036 		amdgpu_ring_write(ring,
6037 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6038 	}
6039 	amdgpu_ring_write(ring, 0);
6040 	amdgpu_ring_write(ring, pd_addr >> 12);
6041 
6042 	/* bits 0-15 are the VM contexts0-15 */
6043 	/* invalidate the cache */
6044 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6045 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6046 				 WRITE_DATA_DST_SEL(0)));
6047 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6048 	amdgpu_ring_write(ring, 0);
6049 	amdgpu_ring_write(ring, 1 << vm_id);
6050 
6051 	/* wait for the invalidate to complete */
6052 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6053 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6054 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6055 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6056 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6057 	amdgpu_ring_write(ring, 0);
6058 	amdgpu_ring_write(ring, 0); /* ref */
6059 	amdgpu_ring_write(ring, 0); /* mask */
6060 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6061 
6062 	/* compute doesn't have PFP */
6063 	if (usepfp) {
6064 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6065 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6066 		amdgpu_ring_write(ring, 0x0);
6067 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6068 		amdgpu_ring_write(ring, 0);
6069 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6070 		amdgpu_ring_write(ring, 0);
6071 	}
6072 }
6073 
6074 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6075 {
6076 	return ring->adev->wb.wb[ring->rptr_offs];
6077 }
6078 
6079 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6080 {
6081 	return ring->adev->wb.wb[ring->wptr_offs];
6082 }
6083 
6084 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6085 {
6086 	struct amdgpu_device *adev = ring->adev;
6087 
6088 	/* XXX check if swapping is necessary on BE */
6089 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6090 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6091 }
6092 
6093 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6094 					     u64 addr, u64 seq,
6095 					     unsigned flags)
6096 {
6097 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6098 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6099 
6100 	/* RELEASE_MEM - flush caches, send int */
6101 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6102 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6103 				 EOP_TC_ACTION_EN |
6104 				 EOP_TC_WB_ACTION_EN |
6105 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6106 				 EVENT_INDEX(5)));
6107 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6108 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6109 	amdgpu_ring_write(ring, upper_32_bits(addr));
6110 	amdgpu_ring_write(ring, lower_32_bits(seq));
6111 	amdgpu_ring_write(ring, upper_32_bits(seq));
6112 }
6113 
6114 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6115 						 enum amdgpu_interrupt_state state)
6116 {
6117 	u32 cp_int_cntl;
6118 
6119 	switch (state) {
6120 	case AMDGPU_IRQ_STATE_DISABLE:
6121 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6122 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6123 					    TIME_STAMP_INT_ENABLE, 0);
6124 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6125 		break;
6126 	case AMDGPU_IRQ_STATE_ENABLE:
6127 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6128 		cp_int_cntl =
6129 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6130 				      TIME_STAMP_INT_ENABLE, 1);
6131 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6132 		break;
6133 	default:
6134 		break;
6135 	}
6136 }
6137 
6138 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6139 						     int me, int pipe,
6140 						     enum amdgpu_interrupt_state state)
6141 {
6142 	u32 mec_int_cntl, mec_int_cntl_reg;
6143 
6144 	/*
6145 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6146 	 * handles the setting of interrupts for this specific pipe. All other
6147 	 * pipes' interrupts are set by amdkfd.
6148 	 */
6149 
6150 	if (me == 1) {
6151 		switch (pipe) {
6152 		case 0:
6153 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6154 			break;
6155 		default:
6156 			DRM_DEBUG("invalid pipe %d\n", pipe);
6157 			return;
6158 		}
6159 	} else {
6160 		DRM_DEBUG("invalid me %d\n", me);
6161 		return;
6162 	}
6163 
6164 	switch (state) {
6165 	case AMDGPU_IRQ_STATE_DISABLE:
6166 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6167 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6168 					     TIME_STAMP_INT_ENABLE, 0);
6169 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6170 		break;
6171 	case AMDGPU_IRQ_STATE_ENABLE:
6172 		mec_int_cntl = RREG32(mec_int_cntl_reg);
6173 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6174 					     TIME_STAMP_INT_ENABLE, 1);
6175 		WREG32(mec_int_cntl_reg, mec_int_cntl);
6176 		break;
6177 	default:
6178 		break;
6179 	}
6180 }
6181 
6182 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6183 					     struct amdgpu_irq_src *source,
6184 					     unsigned type,
6185 					     enum amdgpu_interrupt_state state)
6186 {
6187 	u32 cp_int_cntl;
6188 
6189 	switch (state) {
6190 	case AMDGPU_IRQ_STATE_DISABLE:
6191 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6192 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6193 					    PRIV_REG_INT_ENABLE, 0);
6194 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6195 		break;
6196 	case AMDGPU_IRQ_STATE_ENABLE:
6197 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6198 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6199 					    PRIV_REG_INT_ENABLE, 1);
6200 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6201 		break;
6202 	default:
6203 		break;
6204 	}
6205 
6206 	return 0;
6207 }
6208 
6209 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6210 					      struct amdgpu_irq_src *source,
6211 					      unsigned type,
6212 					      enum amdgpu_interrupt_state state)
6213 {
6214 	u32 cp_int_cntl;
6215 
6216 	switch (state) {
6217 	case AMDGPU_IRQ_STATE_DISABLE:
6218 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6219 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6220 					    PRIV_INSTR_INT_ENABLE, 0);
6221 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6222 		break;
6223 	case AMDGPU_IRQ_STATE_ENABLE:
6224 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6225 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6226 					    PRIV_INSTR_INT_ENABLE, 1);
6227 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6228 		break;
6229 	default:
6230 		break;
6231 	}
6232 
6233 	return 0;
6234 }
6235 
6236 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6237 					    struct amdgpu_irq_src *src,
6238 					    unsigned type,
6239 					    enum amdgpu_interrupt_state state)
6240 {
6241 	switch (type) {
6242 	case AMDGPU_CP_IRQ_GFX_EOP:
6243 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6244 		break;
6245 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6246 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6247 		break;
6248 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6249 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6250 		break;
6251 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6252 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6253 		break;
6254 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6255 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6256 		break;
6257 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6258 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6259 		break;
6260 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6261 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6262 		break;
6263 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6264 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6265 		break;
6266 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6267 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6268 		break;
6269 	default:
6270 		break;
6271 	}
6272 	return 0;
6273 }
6274 
6275 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6276 			    struct amdgpu_irq_src *source,
6277 			    struct amdgpu_iv_entry *entry)
6278 {
6279 	int i;
6280 	u8 me_id, pipe_id, queue_id;
6281 	struct amdgpu_ring *ring;
6282 
6283 	DRM_DEBUG("IH: CP EOP\n");
6284 	me_id = (entry->ring_id & 0x0c) >> 2;
6285 	pipe_id = (entry->ring_id & 0x03) >> 0;
6286 	queue_id = (entry->ring_id & 0x70) >> 4;
6287 
6288 	switch (me_id) {
6289 	case 0:
6290 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6291 		break;
6292 	case 1:
6293 	case 2:
6294 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6295 			ring = &adev->gfx.compute_ring[i];
6296 			/* Per-queue interrupt is supported for MEC starting from VI.
6297 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6298 			  */
6299 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6300 				amdgpu_fence_process(ring);
6301 		}
6302 		break;
6303 	}
6304 	return 0;
6305 }
6306 
6307 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6308 				 struct amdgpu_irq_src *source,
6309 				 struct amdgpu_iv_entry *entry)
6310 {
6311 	DRM_ERROR("Illegal register access in command stream\n");
6312 	schedule_work(&adev->reset_work);
6313 	return 0;
6314 }
6315 
6316 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6317 				  struct amdgpu_irq_src *source,
6318 				  struct amdgpu_iv_entry *entry)
6319 {
6320 	DRM_ERROR("Illegal instruction in command stream\n");
6321 	schedule_work(&adev->reset_work);
6322 	return 0;
6323 }
6324 
6325 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6326 	.name = "gfx_v8_0",
6327 	.early_init = gfx_v8_0_early_init,
6328 	.late_init = gfx_v8_0_late_init,
6329 	.sw_init = gfx_v8_0_sw_init,
6330 	.sw_fini = gfx_v8_0_sw_fini,
6331 	.hw_init = gfx_v8_0_hw_init,
6332 	.hw_fini = gfx_v8_0_hw_fini,
6333 	.suspend = gfx_v8_0_suspend,
6334 	.resume = gfx_v8_0_resume,
6335 	.is_idle = gfx_v8_0_is_idle,
6336 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6337 	.soft_reset = gfx_v8_0_soft_reset,
6338 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6339 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6340 };
6341 
6342 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6343 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6344 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6345 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6346 	.parse_cs = NULL,
6347 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6348 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6349 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6350 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6351 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6352 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6353 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6354 	.test_ring = gfx_v8_0_ring_test_ring,
6355 	.test_ib = gfx_v8_0_ring_test_ib,
6356 	.insert_nop = amdgpu_ring_insert_nop,
6357 	.pad_ib = amdgpu_ring_generic_pad_ib,
6358 };
6359 
6360 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6361 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
6362 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6363 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6364 	.parse_cs = NULL,
6365 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6366 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6367 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6368 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6369 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6370 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6371 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6372 	.test_ring = gfx_v8_0_ring_test_ring,
6373 	.test_ib = gfx_v8_0_ring_test_ib,
6374 	.insert_nop = amdgpu_ring_insert_nop,
6375 	.pad_ib = amdgpu_ring_generic_pad_ib,
6376 };
6377 
6378 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6379 {
6380 	int i;
6381 
6382 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6383 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6384 
6385 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6386 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6387 }
6388 
6389 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6390 	.set = gfx_v8_0_set_eop_interrupt_state,
6391 	.process = gfx_v8_0_eop_irq,
6392 };
6393 
6394 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6395 	.set = gfx_v8_0_set_priv_reg_fault_state,
6396 	.process = gfx_v8_0_priv_reg_irq,
6397 };
6398 
6399 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6400 	.set = gfx_v8_0_set_priv_inst_fault_state,
6401 	.process = gfx_v8_0_priv_inst_irq,
6402 };
6403 
6404 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6405 {
6406 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6407 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6408 
6409 	adev->gfx.priv_reg_irq.num_types = 1;
6410 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6411 
6412 	adev->gfx.priv_inst_irq.num_types = 1;
6413 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6414 }
6415 
6416 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6417 {
6418 	switch (adev->asic_type) {
6419 	case CHIP_TOPAZ:
6420 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6421 		break;
6422 	case CHIP_STONEY:
6423 	case CHIP_CARRIZO:
6424 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6425 		break;
6426 	default:
6427 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6428 		break;
6429 	}
6430 }
6431 
6432 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6433 {
6434 	/* init asci gds info */
6435 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6436 	adev->gds.gws.total_size = 64;
6437 	adev->gds.oa.total_size = 16;
6438 
6439 	if (adev->gds.mem.total_size == 64 * 1024) {
6440 		adev->gds.mem.gfx_partition_size = 4096;
6441 		adev->gds.mem.cs_partition_size = 4096;
6442 
6443 		adev->gds.gws.gfx_partition_size = 4;
6444 		adev->gds.gws.cs_partition_size = 4;
6445 
6446 		adev->gds.oa.gfx_partition_size = 4;
6447 		adev->gds.oa.cs_partition_size = 1;
6448 	} else {
6449 		adev->gds.mem.gfx_partition_size = 1024;
6450 		adev->gds.mem.cs_partition_size = 1024;
6451 
6452 		adev->gds.gws.gfx_partition_size = 16;
6453 		adev->gds.gws.cs_partition_size = 16;
6454 
6455 		adev->gds.oa.gfx_partition_size = 4;
6456 		adev->gds.oa.cs_partition_size = 4;
6457 	}
6458 }
6459 
6460 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6461 						 u32 bitmap)
6462 {
6463 	u32 data;
6464 
6465 	if (!bitmap)
6466 		return;
6467 
6468 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6469 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6470 
6471 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6472 }
6473 
6474 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6475 {
6476 	u32 data, mask;
6477 
6478 	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6479 	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6480 
6481 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6482 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6483 
6484 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6485 
6486 	return (~data) & mask;
6487 }
6488 
6489 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6490 {
6491 	int i, j, k, counter, active_cu_number = 0;
6492 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6493 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6494 	unsigned disable_masks[4 * 2];
6495 
6496 	memset(cu_info, 0, sizeof(*cu_info));
6497 
6498 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6499 
6500 	mutex_lock(&adev->grbm_idx_mutex);
6501 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6502 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6503 			mask = 1;
6504 			ao_bitmap = 0;
6505 			counter = 0;
6506 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6507 			if (i < 4 && j < 2)
6508 				gfx_v8_0_set_user_cu_inactive_bitmap(
6509 					adev, disable_masks[i * 2 + j]);
6510 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6511 			cu_info->bitmap[i][j] = bitmap;
6512 
6513 			for (k = 0; k < 16; k ++) {
6514 				if (bitmap & mask) {
6515 					if (counter < 2)
6516 						ao_bitmap |= mask;
6517 					counter ++;
6518 				}
6519 				mask <<= 1;
6520 			}
6521 			active_cu_number += counter;
6522 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6523 		}
6524 	}
6525 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6526 	mutex_unlock(&adev->grbm_idx_mutex);
6527 
6528 	cu_info->number = active_cu_number;
6529 	cu_info->ao_cu_mask = ao_cu_mask;
6530 }
6531