xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision 98838d95075a5295f3478ceba18bcccf472e30f4)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33 
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36 
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39 
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42 
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47 
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50 
51 #include "smu/smu_7_1_3_d.h"
52 
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55 
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60 
61 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70 
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77 
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81 
82 /* BPM Register Address*/
83 enum {
84 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89 	BPM_REG_FGCG_MAX
90 };
91 
92 #define RLC_FormatDirectRegListLength        14
93 
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100 
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119 
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126 
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133 
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140 
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160 
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179 
180 static const u32 tonga_golden_common_all[] =
181 {
182 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191 
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270 
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
284 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291 
292 static const u32 polaris11_golden_common_all[] =
293 {
294 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301 
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
316 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322 
323 static const u32 polaris10_golden_common_all[] =
324 {
325 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334 
335 static const u32 fiji_golden_common_all[] =
336 {
337 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348 
349 static const u32 golden_settings_fiji_a10[] =
350 {
351 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363 
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402 
403 static const u32 golden_settings_iceland_a11[] =
404 {
405 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
409 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422 
423 static const u32 iceland_golden_common_all[] =
424 {
425 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434 
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502 
503 static const u32 cz_golden_settings_a11[] =
504 {
505 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
508 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518 
519 static const u32 cz_golden_common_all[] =
520 {
521 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530 
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609 
610 static const u32 stoney_golden_settings_a11[] =
611 {
612 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
614 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623 
624 static const u32 stoney_golden_common_all[] =
625 {
626 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635 
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645 
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652 
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655 	switch (adev->asic_type) {
656 	case CHIP_TOPAZ:
657 		amdgpu_program_register_sequence(adev,
658 						 iceland_mgcg_cgcg_init,
659 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660 		amdgpu_program_register_sequence(adev,
661 						 golden_settings_iceland_a11,
662 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663 		amdgpu_program_register_sequence(adev,
664 						 iceland_golden_common_all,
665 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
666 		break;
667 	case CHIP_FIJI:
668 		amdgpu_program_register_sequence(adev,
669 						 fiji_mgcg_cgcg_init,
670 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671 		amdgpu_program_register_sequence(adev,
672 						 golden_settings_fiji_a10,
673 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674 		amdgpu_program_register_sequence(adev,
675 						 fiji_golden_common_all,
676 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
677 		break;
678 
679 	case CHIP_TONGA:
680 		amdgpu_program_register_sequence(adev,
681 						 tonga_mgcg_cgcg_init,
682 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683 		amdgpu_program_register_sequence(adev,
684 						 golden_settings_tonga_a11,
685 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686 		amdgpu_program_register_sequence(adev,
687 						 tonga_golden_common_all,
688 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
689 		break;
690 	case CHIP_POLARIS11:
691 		amdgpu_program_register_sequence(adev,
692 						 golden_settings_polaris11_a11,
693 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694 		amdgpu_program_register_sequence(adev,
695 						 polaris11_golden_common_all,
696 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697 		break;
698 	case CHIP_POLARIS10:
699 		amdgpu_program_register_sequence(adev,
700 						 golden_settings_polaris10_a11,
701 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702 		amdgpu_program_register_sequence(adev,
703 						 polaris10_golden_common_all,
704 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706 		if (adev->pdev->revision == 0xc7 &&
707 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
712 		}
713 		break;
714 	case CHIP_CARRIZO:
715 		amdgpu_program_register_sequence(adev,
716 						 cz_mgcg_cgcg_init,
717 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718 		amdgpu_program_register_sequence(adev,
719 						 cz_golden_settings_a11,
720 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721 		amdgpu_program_register_sequence(adev,
722 						 cz_golden_common_all,
723 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
724 		break;
725 	case CHIP_STONEY:
726 		amdgpu_program_register_sequence(adev,
727 						 stoney_mgcg_cgcg_init,
728 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729 		amdgpu_program_register_sequence(adev,
730 						 stoney_golden_settings_a11,
731 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732 		amdgpu_program_register_sequence(adev,
733 						 stoney_golden_common_all,
734 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
735 		break;
736 	default:
737 		break;
738 	}
739 }
740 
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
742 {
743 	int i;
744 
745 	adev->gfx.scratch.num_reg = 7;
746 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748 		adev->gfx.scratch.free[i] = true;
749 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
750 	}
751 }
752 
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
754 {
755 	struct amdgpu_device *adev = ring->adev;
756 	uint32_t scratch;
757 	uint32_t tmp = 0;
758 	unsigned i;
759 	int r;
760 
761 	r = amdgpu_gfx_scratch_get(adev, &scratch);
762 	if (r) {
763 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
764 		return r;
765 	}
766 	WREG32(scratch, 0xCAFEDEAD);
767 	r = amdgpu_ring_alloc(ring, 3);
768 	if (r) {
769 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
770 			  ring->idx, r);
771 		amdgpu_gfx_scratch_free(adev, scratch);
772 		return r;
773 	}
774 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776 	amdgpu_ring_write(ring, 0xDEADBEEF);
777 	amdgpu_ring_commit(ring);
778 
779 	for (i = 0; i < adev->usec_timeout; i++) {
780 		tmp = RREG32(scratch);
781 		if (tmp == 0xDEADBEEF)
782 			break;
783 		DRM_UDELAY(1);
784 	}
785 	if (i < adev->usec_timeout) {
786 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
787 			 ring->idx, i);
788 	} else {
789 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790 			  ring->idx, scratch, tmp);
791 		r = -EINVAL;
792 	}
793 	amdgpu_gfx_scratch_free(adev, scratch);
794 	return r;
795 }
796 
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
798 {
799 	struct amdgpu_device *adev = ring->adev;
800 	struct amdgpu_ib ib;
801 	struct fence *f = NULL;
802 	uint32_t scratch;
803 	uint32_t tmp = 0;
804 	long r;
805 
806 	r = amdgpu_gfx_scratch_get(adev, &scratch);
807 	if (r) {
808 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
809 		return r;
810 	}
811 	WREG32(scratch, 0xCAFEDEAD);
812 	memset(&ib, 0, sizeof(ib));
813 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
814 	if (r) {
815 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
816 		goto err1;
817 	}
818 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820 	ib.ptr[2] = 0xDEADBEEF;
821 	ib.length_dw = 3;
822 
823 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
824 	if (r)
825 		goto err2;
826 
827 	r = fence_wait_timeout(f, false, timeout);
828 	if (r == 0) {
829 		DRM_ERROR("amdgpu: IB test timed out.\n");
830 		r = -ETIMEDOUT;
831 		goto err2;
832 	} else if (r < 0) {
833 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
834 		goto err2;
835 	}
836 	tmp = RREG32(scratch);
837 	if (tmp == 0xDEADBEEF) {
838 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
839 		r = 0;
840 	} else {
841 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
842 			  scratch, tmp);
843 		r = -EINVAL;
844 	}
845 err2:
846 	amdgpu_ib_free(adev, &ib, NULL);
847 	fence_put(f);
848 err1:
849 	amdgpu_gfx_scratch_free(adev, scratch);
850 	return r;
851 }
852 
853 
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855 	release_firmware(adev->gfx.pfp_fw);
856 	adev->gfx.pfp_fw = NULL;
857 	release_firmware(adev->gfx.me_fw);
858 	adev->gfx.me_fw = NULL;
859 	release_firmware(adev->gfx.ce_fw);
860 	adev->gfx.ce_fw = NULL;
861 	release_firmware(adev->gfx.rlc_fw);
862 	adev->gfx.rlc_fw = NULL;
863 	release_firmware(adev->gfx.mec_fw);
864 	adev->gfx.mec_fw = NULL;
865 	if ((adev->asic_type != CHIP_STONEY) &&
866 	    (adev->asic_type != CHIP_TOPAZ))
867 		release_firmware(adev->gfx.mec2_fw);
868 	adev->gfx.mec2_fw = NULL;
869 
870 	kfree(adev->gfx.rlc.register_list_format);
871 }
872 
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
874 {
875 	const char *chip_name;
876 	char fw_name[30];
877 	int err;
878 	struct amdgpu_firmware_info *info = NULL;
879 	const struct common_firmware_header *header = NULL;
880 	const struct gfx_firmware_header_v1_0 *cp_hdr;
881 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
882 	unsigned int *tmp = NULL, i;
883 
884 	DRM_DEBUG("\n");
885 
886 	switch (adev->asic_type) {
887 	case CHIP_TOPAZ:
888 		chip_name = "topaz";
889 		break;
890 	case CHIP_TONGA:
891 		chip_name = "tonga";
892 		break;
893 	case CHIP_CARRIZO:
894 		chip_name = "carrizo";
895 		break;
896 	case CHIP_FIJI:
897 		chip_name = "fiji";
898 		break;
899 	case CHIP_POLARIS11:
900 		chip_name = "polaris11";
901 		break;
902 	case CHIP_POLARIS10:
903 		chip_name = "polaris10";
904 		break;
905 	case CHIP_STONEY:
906 		chip_name = "stoney";
907 		break;
908 	default:
909 		BUG();
910 	}
911 
912 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
914 	if (err)
915 		goto out;
916 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
917 	if (err)
918 		goto out;
919 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922 
923 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
925 	if (err)
926 		goto out;
927 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
928 	if (err)
929 		goto out;
930 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933 
934 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
936 	if (err)
937 		goto out;
938 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
939 	if (err)
940 		goto out;
941 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944 
945 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
947 	if (err)
948 		goto out;
949 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
953 
954 	adev->gfx.rlc.save_and_restore_offset =
955 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
956 	adev->gfx.rlc.clear_state_descriptor_offset =
957 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958 	adev->gfx.rlc.avail_scratch_ram_locations =
959 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960 	adev->gfx.rlc.reg_restore_list_size =
961 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
962 	adev->gfx.rlc.reg_list_format_start =
963 			le32_to_cpu(rlc_hdr->reg_list_format_start);
964 	adev->gfx.rlc.reg_list_format_separate_start =
965 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966 	adev->gfx.rlc.starting_offsets_start =
967 			le32_to_cpu(rlc_hdr->starting_offsets_start);
968 	adev->gfx.rlc.reg_list_format_size_bytes =
969 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970 	adev->gfx.rlc.reg_list_size_bytes =
971 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
972 
973 	adev->gfx.rlc.register_list_format =
974 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
976 
977 	if (!adev->gfx.rlc.register_list_format) {
978 		err = -ENOMEM;
979 		goto out;
980 	}
981 
982 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
986 
987 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
988 
989 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
993 
994 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
996 	if (err)
997 		goto out;
998 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
999 	if (err)
1000 		goto out;
1001 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1004 
1005 	if ((adev->asic_type != CHIP_STONEY) &&
1006 	    (adev->asic_type != CHIP_TOPAZ)) {
1007 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1009 		if (!err) {
1010 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1011 			if (err)
1012 				goto out;
1013 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014 				adev->gfx.mec2_fw->data;
1015 			adev->gfx.mec2_fw_version =
1016 				le32_to_cpu(cp_hdr->header.ucode_version);
1017 			adev->gfx.mec2_feature_version =
1018 				le32_to_cpu(cp_hdr->ucode_feature_version);
1019 		} else {
1020 			err = 0;
1021 			adev->gfx.mec2_fw = NULL;
1022 		}
1023 	}
1024 
1025 	if (adev->firmware.smu_load) {
1026 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028 		info->fw = adev->gfx.pfp_fw;
1029 		header = (const struct common_firmware_header *)info->fw->data;
1030 		adev->firmware.fw_size +=
1031 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032 
1033 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035 		info->fw = adev->gfx.me_fw;
1036 		header = (const struct common_firmware_header *)info->fw->data;
1037 		adev->firmware.fw_size +=
1038 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039 
1040 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042 		info->fw = adev->gfx.ce_fw;
1043 		header = (const struct common_firmware_header *)info->fw->data;
1044 		adev->firmware.fw_size +=
1045 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046 
1047 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049 		info->fw = adev->gfx.rlc_fw;
1050 		header = (const struct common_firmware_header *)info->fw->data;
1051 		adev->firmware.fw_size +=
1052 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053 
1054 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056 		info->fw = adev->gfx.mec_fw;
1057 		header = (const struct common_firmware_header *)info->fw->data;
1058 		adev->firmware.fw_size +=
1059 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060 
1061 		if (adev->gfx.mec2_fw) {
1062 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1063 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1064 			info->fw = adev->gfx.mec2_fw;
1065 			header = (const struct common_firmware_header *)info->fw->data;
1066 			adev->firmware.fw_size +=
1067 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068 		}
1069 
1070 	}
1071 
1072 out:
1073 	if (err) {
1074 		dev_err(adev->dev,
1075 			"gfx8: Failed to load firmware \"%s\"\n",
1076 			fw_name);
1077 		release_firmware(adev->gfx.pfp_fw);
1078 		adev->gfx.pfp_fw = NULL;
1079 		release_firmware(adev->gfx.me_fw);
1080 		adev->gfx.me_fw = NULL;
1081 		release_firmware(adev->gfx.ce_fw);
1082 		adev->gfx.ce_fw = NULL;
1083 		release_firmware(adev->gfx.rlc_fw);
1084 		adev->gfx.rlc_fw = NULL;
1085 		release_firmware(adev->gfx.mec_fw);
1086 		adev->gfx.mec_fw = NULL;
1087 		release_firmware(adev->gfx.mec2_fw);
1088 		adev->gfx.mec2_fw = NULL;
1089 	}
1090 	return err;
1091 }
1092 
1093 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1094 				    volatile u32 *buffer)
1095 {
1096 	u32 count = 0, i;
1097 	const struct cs_section_def *sect = NULL;
1098 	const struct cs_extent_def *ext = NULL;
1099 
1100 	if (adev->gfx.rlc.cs_data == NULL)
1101 		return;
1102 	if (buffer == NULL)
1103 		return;
1104 
1105 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1106 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1107 
1108 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1109 	buffer[count++] = cpu_to_le32(0x80000000);
1110 	buffer[count++] = cpu_to_le32(0x80000000);
1111 
1112 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1113 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1114 			if (sect->id == SECT_CONTEXT) {
1115 				buffer[count++] =
1116 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1117 				buffer[count++] = cpu_to_le32(ext->reg_index -
1118 						PACKET3_SET_CONTEXT_REG_START);
1119 				for (i = 0; i < ext->reg_count; i++)
1120 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1121 			} else {
1122 				return;
1123 			}
1124 		}
1125 	}
1126 
1127 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1128 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1129 			PACKET3_SET_CONTEXT_REG_START);
1130 	switch (adev->asic_type) {
1131 	case CHIP_TONGA:
1132 	case CHIP_POLARIS10:
1133 		buffer[count++] = cpu_to_le32(0x16000012);
1134 		buffer[count++] = cpu_to_le32(0x0000002A);
1135 		break;
1136 	case CHIP_POLARIS11:
1137 		buffer[count++] = cpu_to_le32(0x16000012);
1138 		buffer[count++] = cpu_to_le32(0x00000000);
1139 		break;
1140 	case CHIP_FIJI:
1141 		buffer[count++] = cpu_to_le32(0x3a00161a);
1142 		buffer[count++] = cpu_to_le32(0x0000002e);
1143 		break;
1144 	case CHIP_TOPAZ:
1145 	case CHIP_CARRIZO:
1146 		buffer[count++] = cpu_to_le32(0x00000002);
1147 		buffer[count++] = cpu_to_le32(0x00000000);
1148 		break;
1149 	case CHIP_STONEY:
1150 		buffer[count++] = cpu_to_le32(0x00000000);
1151 		buffer[count++] = cpu_to_le32(0x00000000);
1152 		break;
1153 	default:
1154 		buffer[count++] = cpu_to_le32(0x00000000);
1155 		buffer[count++] = cpu_to_le32(0x00000000);
1156 		break;
1157 	}
1158 
1159 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1160 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1161 
1162 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1163 	buffer[count++] = cpu_to_le32(0);
1164 }
1165 
1166 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1167 {
1168 	const __le32 *fw_data;
1169 	volatile u32 *dst_ptr;
1170 	int me, i, max_me = 4;
1171 	u32 bo_offset = 0;
1172 	u32 table_offset, table_size;
1173 
1174 	if (adev->asic_type == CHIP_CARRIZO)
1175 		max_me = 5;
1176 
1177 	/* write the cp table buffer */
1178 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1179 	for (me = 0; me < max_me; me++) {
1180 		if (me == 0) {
1181 			const struct gfx_firmware_header_v1_0 *hdr =
1182 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1183 			fw_data = (const __le32 *)
1184 				(adev->gfx.ce_fw->data +
1185 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1186 			table_offset = le32_to_cpu(hdr->jt_offset);
1187 			table_size = le32_to_cpu(hdr->jt_size);
1188 		} else if (me == 1) {
1189 			const struct gfx_firmware_header_v1_0 *hdr =
1190 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1191 			fw_data = (const __le32 *)
1192 				(adev->gfx.pfp_fw->data +
1193 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194 			table_offset = le32_to_cpu(hdr->jt_offset);
1195 			table_size = le32_to_cpu(hdr->jt_size);
1196 		} else if (me == 2) {
1197 			const struct gfx_firmware_header_v1_0 *hdr =
1198 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1199 			fw_data = (const __le32 *)
1200 				(adev->gfx.me_fw->data +
1201 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202 			table_offset = le32_to_cpu(hdr->jt_offset);
1203 			table_size = le32_to_cpu(hdr->jt_size);
1204 		} else if (me == 3) {
1205 			const struct gfx_firmware_header_v1_0 *hdr =
1206 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1207 			fw_data = (const __le32 *)
1208 				(adev->gfx.mec_fw->data +
1209 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210 			table_offset = le32_to_cpu(hdr->jt_offset);
1211 			table_size = le32_to_cpu(hdr->jt_size);
1212 		} else  if (me == 4) {
1213 			const struct gfx_firmware_header_v1_0 *hdr =
1214 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1215 			fw_data = (const __le32 *)
1216 				(adev->gfx.mec2_fw->data +
1217 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218 			table_offset = le32_to_cpu(hdr->jt_offset);
1219 			table_size = le32_to_cpu(hdr->jt_size);
1220 		}
1221 
1222 		for (i = 0; i < table_size; i ++) {
1223 			dst_ptr[bo_offset + i] =
1224 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1225 		}
1226 
1227 		bo_offset += table_size;
1228 	}
1229 }
1230 
1231 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1232 {
1233 	int r;
1234 
1235 	/* clear state block */
1236 	if (adev->gfx.rlc.clear_state_obj) {
1237 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1238 		if (unlikely(r != 0))
1239 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1240 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1241 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1242 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1243 		adev->gfx.rlc.clear_state_obj = NULL;
1244 	}
1245 
1246 	/* jump table block */
1247 	if (adev->gfx.rlc.cp_table_obj) {
1248 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1249 		if (unlikely(r != 0))
1250 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1251 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1252 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1253 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1254 		adev->gfx.rlc.cp_table_obj = NULL;
1255 	}
1256 }
1257 
1258 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1259 {
1260 	volatile u32 *dst_ptr;
1261 	u32 dws;
1262 	const struct cs_section_def *cs_data;
1263 	int r;
1264 
1265 	adev->gfx.rlc.cs_data = vi_cs_data;
1266 
1267 	cs_data = adev->gfx.rlc.cs_data;
1268 
1269 	if (cs_data) {
1270 		/* clear state block */
1271 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1272 
1273 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1274 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1275 					     AMDGPU_GEM_DOMAIN_VRAM,
1276 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1277 					     NULL, NULL,
1278 					     &adev->gfx.rlc.clear_state_obj);
1279 			if (r) {
1280 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281 				gfx_v8_0_rlc_fini(adev);
1282 				return r;
1283 			}
1284 		}
1285 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286 		if (unlikely(r != 0)) {
1287 			gfx_v8_0_rlc_fini(adev);
1288 			return r;
1289 		}
1290 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291 				  &adev->gfx.rlc.clear_state_gpu_addr);
1292 		if (r) {
1293 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295 			gfx_v8_0_rlc_fini(adev);
1296 			return r;
1297 		}
1298 
1299 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300 		if (r) {
1301 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302 			gfx_v8_0_rlc_fini(adev);
1303 			return r;
1304 		}
1305 		/* set up the cs buffer */
1306 		dst_ptr = adev->gfx.rlc.cs_ptr;
1307 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310 	}
1311 
1312 	if ((adev->asic_type == CHIP_CARRIZO) ||
1313 	    (adev->asic_type == CHIP_STONEY)) {
1314 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1316 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317 					     AMDGPU_GEM_DOMAIN_VRAM,
1318 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1319 					     NULL, NULL,
1320 					     &adev->gfx.rlc.cp_table_obj);
1321 			if (r) {
1322 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1323 				return r;
1324 			}
1325 		}
1326 
1327 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1328 		if (unlikely(r != 0)) {
1329 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1330 			return r;
1331 		}
1332 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1333 				  &adev->gfx.rlc.cp_table_gpu_addr);
1334 		if (r) {
1335 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1336 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1337 			return r;
1338 		}
1339 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1340 		if (r) {
1341 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1342 			return r;
1343 		}
1344 
1345 		cz_init_cp_jump_table(adev);
1346 
1347 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1348 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1349 	}
1350 
1351 	return 0;
1352 }
1353 
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356 	int r;
1357 
1358 	if (adev->gfx.mec.hpd_eop_obj) {
1359 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360 		if (unlikely(r != 0))
1361 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1365 		adev->gfx.mec.hpd_eop_obj = NULL;
1366 	}
1367 }
1368 
1369 #define MEC_HPD_SIZE 2048
1370 
1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1372 {
1373 	int r;
1374 	u32 *hpd;
1375 
1376 	/*
1377 	 * we assign only 1 pipe because all other pipes will
1378 	 * be handled by KFD
1379 	 */
1380 	adev->gfx.mec.num_mec = 1;
1381 	adev->gfx.mec.num_pipe = 1;
1382 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1383 
1384 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1385 		r = amdgpu_bo_create(adev,
1386 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1387 				     PAGE_SIZE, true,
1388 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1389 				     &adev->gfx.mec.hpd_eop_obj);
1390 		if (r) {
1391 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1392 			return r;
1393 		}
1394 	}
1395 
1396 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1397 	if (unlikely(r != 0)) {
1398 		gfx_v8_0_mec_fini(adev);
1399 		return r;
1400 	}
1401 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1402 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1403 	if (r) {
1404 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1405 		gfx_v8_0_mec_fini(adev);
1406 		return r;
1407 	}
1408 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1409 	if (r) {
1410 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1411 		gfx_v8_0_mec_fini(adev);
1412 		return r;
1413 	}
1414 
1415 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1416 
1417 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1418 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1419 
1420 	return 0;
1421 }
1422 
1423 static const u32 vgpr_init_compute_shader[] =
1424 {
1425 	0x7e000209, 0x7e020208,
1426 	0x7e040207, 0x7e060206,
1427 	0x7e080205, 0x7e0a0204,
1428 	0x7e0c0203, 0x7e0e0202,
1429 	0x7e100201, 0x7e120200,
1430 	0x7e140209, 0x7e160208,
1431 	0x7e180207, 0x7e1a0206,
1432 	0x7e1c0205, 0x7e1e0204,
1433 	0x7e200203, 0x7e220202,
1434 	0x7e240201, 0x7e260200,
1435 	0x7e280209, 0x7e2a0208,
1436 	0x7e2c0207, 0x7e2e0206,
1437 	0x7e300205, 0x7e320204,
1438 	0x7e340203, 0x7e360202,
1439 	0x7e380201, 0x7e3a0200,
1440 	0x7e3c0209, 0x7e3e0208,
1441 	0x7e400207, 0x7e420206,
1442 	0x7e440205, 0x7e460204,
1443 	0x7e480203, 0x7e4a0202,
1444 	0x7e4c0201, 0x7e4e0200,
1445 	0x7e500209, 0x7e520208,
1446 	0x7e540207, 0x7e560206,
1447 	0x7e580205, 0x7e5a0204,
1448 	0x7e5c0203, 0x7e5e0202,
1449 	0x7e600201, 0x7e620200,
1450 	0x7e640209, 0x7e660208,
1451 	0x7e680207, 0x7e6a0206,
1452 	0x7e6c0205, 0x7e6e0204,
1453 	0x7e700203, 0x7e720202,
1454 	0x7e740201, 0x7e760200,
1455 	0x7e780209, 0x7e7a0208,
1456 	0x7e7c0207, 0x7e7e0206,
1457 	0xbf8a0000, 0xbf810000,
1458 };
1459 
1460 static const u32 sgpr_init_compute_shader[] =
1461 {
1462 	0xbe8a0100, 0xbe8c0102,
1463 	0xbe8e0104, 0xbe900106,
1464 	0xbe920108, 0xbe940100,
1465 	0xbe960102, 0xbe980104,
1466 	0xbe9a0106, 0xbe9c0108,
1467 	0xbe9e0100, 0xbea00102,
1468 	0xbea20104, 0xbea40106,
1469 	0xbea60108, 0xbea80100,
1470 	0xbeaa0102, 0xbeac0104,
1471 	0xbeae0106, 0xbeb00108,
1472 	0xbeb20100, 0xbeb40102,
1473 	0xbeb60104, 0xbeb80106,
1474 	0xbeba0108, 0xbebc0100,
1475 	0xbebe0102, 0xbec00104,
1476 	0xbec20106, 0xbec40108,
1477 	0xbec60100, 0xbec80102,
1478 	0xbee60004, 0xbee70005,
1479 	0xbeea0006, 0xbeeb0007,
1480 	0xbee80008, 0xbee90009,
1481 	0xbefc0000, 0xbf8a0000,
1482 	0xbf810000, 0x00000000,
1483 };
1484 
1485 static const u32 vgpr_init_regs[] =
1486 {
1487 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1488 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1489 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1490 	mmCOMPUTE_NUM_THREAD_Y, 1,
1491 	mmCOMPUTE_NUM_THREAD_Z, 1,
1492 	mmCOMPUTE_PGM_RSRC2, 20,
1493 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1494 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1495 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1496 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1497 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1498 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1499 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1500 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1501 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1502 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1503 };
1504 
1505 static const u32 sgpr1_init_regs[] =
1506 {
1507 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1508 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1509 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1510 	mmCOMPUTE_NUM_THREAD_Y, 1,
1511 	mmCOMPUTE_NUM_THREAD_Z, 1,
1512 	mmCOMPUTE_PGM_RSRC2, 20,
1513 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1514 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1515 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1516 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1517 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1518 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1519 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1520 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1521 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1522 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1523 };
1524 
1525 static const u32 sgpr2_init_regs[] =
1526 {
1527 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1528 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1529 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1530 	mmCOMPUTE_NUM_THREAD_Y, 1,
1531 	mmCOMPUTE_NUM_THREAD_Z, 1,
1532 	mmCOMPUTE_PGM_RSRC2, 20,
1533 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1534 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1535 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1536 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1537 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1538 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1539 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1540 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1541 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1542 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1543 };
1544 
1545 static const u32 sec_ded_counter_registers[] =
1546 {
1547 	mmCPC_EDC_ATC_CNT,
1548 	mmCPC_EDC_SCRATCH_CNT,
1549 	mmCPC_EDC_UCODE_CNT,
1550 	mmCPF_EDC_ATC_CNT,
1551 	mmCPF_EDC_ROQ_CNT,
1552 	mmCPF_EDC_TAG_CNT,
1553 	mmCPG_EDC_ATC_CNT,
1554 	mmCPG_EDC_DMA_CNT,
1555 	mmCPG_EDC_TAG_CNT,
1556 	mmDC_EDC_CSINVOC_CNT,
1557 	mmDC_EDC_RESTORE_CNT,
1558 	mmDC_EDC_STATE_CNT,
1559 	mmGDS_EDC_CNT,
1560 	mmGDS_EDC_GRBM_CNT,
1561 	mmGDS_EDC_OA_DED,
1562 	mmSPI_EDC_CNT,
1563 	mmSQC_ATC_EDC_GATCL1_CNT,
1564 	mmSQC_EDC_CNT,
1565 	mmSQ_EDC_DED_CNT,
1566 	mmSQ_EDC_INFO,
1567 	mmSQ_EDC_SEC_CNT,
1568 	mmTCC_EDC_CNT,
1569 	mmTCP_ATC_EDC_GATCL1_CNT,
1570 	mmTCP_EDC_CNT,
1571 	mmTD_EDC_CNT
1572 };
1573 
1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1575 {
1576 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1577 	struct amdgpu_ib ib;
1578 	struct fence *f = NULL;
1579 	int r, i;
1580 	u32 tmp;
1581 	unsigned total_size, vgpr_offset, sgpr_offset;
1582 	u64 gpu_addr;
1583 
1584 	/* only supported on CZ */
1585 	if (adev->asic_type != CHIP_CARRIZO)
1586 		return 0;
1587 
1588 	/* bail if the compute ring is not ready */
1589 	if (!ring->ready)
1590 		return 0;
1591 
1592 	tmp = RREG32(mmGB_EDC_MODE);
1593 	WREG32(mmGB_EDC_MODE, 0);
1594 
1595 	total_size =
1596 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1597 	total_size +=
1598 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1599 	total_size +=
1600 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1601 	total_size = ALIGN(total_size, 256);
1602 	vgpr_offset = total_size;
1603 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1604 	sgpr_offset = total_size;
1605 	total_size += sizeof(sgpr_init_compute_shader);
1606 
1607 	/* allocate an indirect buffer to put the commands in */
1608 	memset(&ib, 0, sizeof(ib));
1609 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1610 	if (r) {
1611 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1612 		return r;
1613 	}
1614 
1615 	/* load the compute shaders */
1616 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1617 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1618 
1619 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1620 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1621 
1622 	/* init the ib length to 0 */
1623 	ib.length_dw = 0;
1624 
1625 	/* VGPR */
1626 	/* write the register state for the compute dispatch */
1627 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1628 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1631 	}
1632 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1634 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638 
1639 	/* write dispatch packet */
1640 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 	ib.ptr[ib.length_dw++] = 8; /* x */
1642 	ib.ptr[ib.length_dw++] = 1; /* y */
1643 	ib.ptr[ib.length_dw++] = 1; /* z */
1644 	ib.ptr[ib.length_dw++] =
1645 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646 
1647 	/* write CS partial flush packet */
1648 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650 
1651 	/* SGPR1 */
1652 	/* write the register state for the compute dispatch */
1653 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1654 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1657 	}
1658 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664 
1665 	/* write dispatch packet */
1666 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 	ib.ptr[ib.length_dw++] = 8; /* x */
1668 	ib.ptr[ib.length_dw++] = 1; /* y */
1669 	ib.ptr[ib.length_dw++] = 1; /* z */
1670 	ib.ptr[ib.length_dw++] =
1671 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672 
1673 	/* write CS partial flush packet */
1674 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676 
1677 	/* SGPR2 */
1678 	/* write the register state for the compute dispatch */
1679 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1680 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1682 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1683 	}
1684 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690 
1691 	/* write dispatch packet */
1692 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693 	ib.ptr[ib.length_dw++] = 8; /* x */
1694 	ib.ptr[ib.length_dw++] = 1; /* y */
1695 	ib.ptr[ib.length_dw++] = 1; /* z */
1696 	ib.ptr[ib.length_dw++] =
1697 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698 
1699 	/* write CS partial flush packet */
1700 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702 
1703 	/* shedule the ib on the ring */
1704 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1705 	if (r) {
1706 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1707 		goto fail;
1708 	}
1709 
1710 	/* wait for the GPU to finish processing the IB */
1711 	r = fence_wait(f, false);
1712 	if (r) {
1713 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1714 		goto fail;
1715 	}
1716 
1717 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1718 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1719 	WREG32(mmGB_EDC_MODE, tmp);
1720 
1721 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1722 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1723 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1724 
1725 
1726 	/* read back registers to clear the counters */
1727 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1728 		RREG32(sec_ded_counter_registers[i]);
1729 
1730 fail:
1731 	amdgpu_ib_free(adev, &ib, NULL);
1732 	fence_put(f);
1733 
1734 	return r;
1735 }
1736 
1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1738 {
1739 	u32 gb_addr_config;
1740 	u32 mc_shared_chmap, mc_arb_ramcfg;
1741 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1742 	u32 tmp;
1743 	int ret;
1744 
1745 	switch (adev->asic_type) {
1746 	case CHIP_TOPAZ:
1747 		adev->gfx.config.max_shader_engines = 1;
1748 		adev->gfx.config.max_tile_pipes = 2;
1749 		adev->gfx.config.max_cu_per_sh = 6;
1750 		adev->gfx.config.max_sh_per_se = 1;
1751 		adev->gfx.config.max_backends_per_se = 2;
1752 		adev->gfx.config.max_texture_channel_caches = 2;
1753 		adev->gfx.config.max_gprs = 256;
1754 		adev->gfx.config.max_gs_threads = 32;
1755 		adev->gfx.config.max_hw_contexts = 8;
1756 
1757 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1762 		break;
1763 	case CHIP_FIJI:
1764 		adev->gfx.config.max_shader_engines = 4;
1765 		adev->gfx.config.max_tile_pipes = 16;
1766 		adev->gfx.config.max_cu_per_sh = 16;
1767 		adev->gfx.config.max_sh_per_se = 1;
1768 		adev->gfx.config.max_backends_per_se = 4;
1769 		adev->gfx.config.max_texture_channel_caches = 16;
1770 		adev->gfx.config.max_gprs = 256;
1771 		adev->gfx.config.max_gs_threads = 32;
1772 		adev->gfx.config.max_hw_contexts = 8;
1773 
1774 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779 		break;
1780 	case CHIP_POLARIS11:
1781 		ret = amdgpu_atombios_get_gfx_info(adev);
1782 		if (ret)
1783 			return ret;
1784 		adev->gfx.config.max_gprs = 256;
1785 		adev->gfx.config.max_gs_threads = 32;
1786 		adev->gfx.config.max_hw_contexts = 8;
1787 
1788 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1789 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1790 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1791 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1792 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1793 		break;
1794 	case CHIP_POLARIS10:
1795 		ret = amdgpu_atombios_get_gfx_info(adev);
1796 		if (ret)
1797 			return ret;
1798 		adev->gfx.config.max_gprs = 256;
1799 		adev->gfx.config.max_gs_threads = 32;
1800 		adev->gfx.config.max_hw_contexts = 8;
1801 
1802 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807 		break;
1808 	case CHIP_TONGA:
1809 		adev->gfx.config.max_shader_engines = 4;
1810 		adev->gfx.config.max_tile_pipes = 8;
1811 		adev->gfx.config.max_cu_per_sh = 8;
1812 		adev->gfx.config.max_sh_per_se = 1;
1813 		adev->gfx.config.max_backends_per_se = 2;
1814 		adev->gfx.config.max_texture_channel_caches = 8;
1815 		adev->gfx.config.max_gprs = 256;
1816 		adev->gfx.config.max_gs_threads = 32;
1817 		adev->gfx.config.max_hw_contexts = 8;
1818 
1819 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824 		break;
1825 	case CHIP_CARRIZO:
1826 		adev->gfx.config.max_shader_engines = 1;
1827 		adev->gfx.config.max_tile_pipes = 2;
1828 		adev->gfx.config.max_sh_per_se = 1;
1829 		adev->gfx.config.max_backends_per_se = 2;
1830 
1831 		switch (adev->pdev->revision) {
1832 		case 0xc4:
1833 		case 0x84:
1834 		case 0xc8:
1835 		case 0xcc:
1836 		case 0xe1:
1837 		case 0xe3:
1838 			/* B10 */
1839 			adev->gfx.config.max_cu_per_sh = 8;
1840 			break;
1841 		case 0xc5:
1842 		case 0x81:
1843 		case 0x85:
1844 		case 0xc9:
1845 		case 0xcd:
1846 		case 0xe2:
1847 		case 0xe4:
1848 			/* B8 */
1849 			adev->gfx.config.max_cu_per_sh = 6;
1850 			break;
1851 		case 0xc6:
1852 		case 0xca:
1853 		case 0xce:
1854 		case 0x88:
1855 			/* B6 */
1856 			adev->gfx.config.max_cu_per_sh = 6;
1857 			break;
1858 		case 0xc7:
1859 		case 0x87:
1860 		case 0xcb:
1861 		case 0xe5:
1862 		case 0x89:
1863 		default:
1864 			/* B4 */
1865 			adev->gfx.config.max_cu_per_sh = 4;
1866 			break;
1867 		}
1868 
1869 		adev->gfx.config.max_texture_channel_caches = 2;
1870 		adev->gfx.config.max_gprs = 256;
1871 		adev->gfx.config.max_gs_threads = 32;
1872 		adev->gfx.config.max_hw_contexts = 8;
1873 
1874 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1878 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1879 		break;
1880 	case CHIP_STONEY:
1881 		adev->gfx.config.max_shader_engines = 1;
1882 		adev->gfx.config.max_tile_pipes = 2;
1883 		adev->gfx.config.max_sh_per_se = 1;
1884 		adev->gfx.config.max_backends_per_se = 1;
1885 
1886 		switch (adev->pdev->revision) {
1887 		case 0xc0:
1888 		case 0xc1:
1889 		case 0xc2:
1890 		case 0xc4:
1891 		case 0xc8:
1892 		case 0xc9:
1893 			adev->gfx.config.max_cu_per_sh = 3;
1894 			break;
1895 		case 0xd0:
1896 		case 0xd1:
1897 		case 0xd2:
1898 		default:
1899 			adev->gfx.config.max_cu_per_sh = 2;
1900 			break;
1901 		}
1902 
1903 		adev->gfx.config.max_texture_channel_caches = 2;
1904 		adev->gfx.config.max_gprs = 256;
1905 		adev->gfx.config.max_gs_threads = 16;
1906 		adev->gfx.config.max_hw_contexts = 8;
1907 
1908 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1912 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1913 		break;
1914 	default:
1915 		adev->gfx.config.max_shader_engines = 2;
1916 		adev->gfx.config.max_tile_pipes = 4;
1917 		adev->gfx.config.max_cu_per_sh = 2;
1918 		adev->gfx.config.max_sh_per_se = 1;
1919 		adev->gfx.config.max_backends_per_se = 2;
1920 		adev->gfx.config.max_texture_channel_caches = 4;
1921 		adev->gfx.config.max_gprs = 256;
1922 		adev->gfx.config.max_gs_threads = 32;
1923 		adev->gfx.config.max_hw_contexts = 8;
1924 
1925 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1929 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1930 		break;
1931 	}
1932 
1933 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1934 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1935 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1936 
1937 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1938 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1939 	if (adev->flags & AMD_IS_APU) {
1940 		/* Get memory bank mapping mode. */
1941 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1942 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1943 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1944 
1945 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1946 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1947 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1948 
1949 		/* Validate settings in case only one DIMM installed. */
1950 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1951 			dimm00_addr_map = 0;
1952 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1953 			dimm01_addr_map = 0;
1954 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1955 			dimm10_addr_map = 0;
1956 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1957 			dimm11_addr_map = 0;
1958 
1959 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1960 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1961 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1962 			adev->gfx.config.mem_row_size_in_kb = 2;
1963 		else
1964 			adev->gfx.config.mem_row_size_in_kb = 1;
1965 	} else {
1966 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1967 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1968 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1969 			adev->gfx.config.mem_row_size_in_kb = 4;
1970 	}
1971 
1972 	adev->gfx.config.shader_engine_tile_size = 32;
1973 	adev->gfx.config.num_gpus = 1;
1974 	adev->gfx.config.multi_gpu_tile_size = 64;
1975 
1976 	/* fix up row size */
1977 	switch (adev->gfx.config.mem_row_size_in_kb) {
1978 	case 1:
1979 	default:
1980 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1981 		break;
1982 	case 2:
1983 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1984 		break;
1985 	case 4:
1986 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1987 		break;
1988 	}
1989 	adev->gfx.config.gb_addr_config = gb_addr_config;
1990 
1991 	return 0;
1992 }
1993 
1994 static int gfx_v8_0_sw_init(void *handle)
1995 {
1996 	int i, r;
1997 	struct amdgpu_ring *ring;
1998 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999 
2000 	/* EOP Event */
2001 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2002 	if (r)
2003 		return r;
2004 
2005 	/* Privileged reg */
2006 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2007 	if (r)
2008 		return r;
2009 
2010 	/* Privileged inst */
2011 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2012 	if (r)
2013 		return r;
2014 
2015 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016 
2017 	gfx_v8_0_scratch_init(adev);
2018 
2019 	r = gfx_v8_0_init_microcode(adev);
2020 	if (r) {
2021 		DRM_ERROR("Failed to load gfx firmware!\n");
2022 		return r;
2023 	}
2024 
2025 	r = gfx_v8_0_rlc_init(adev);
2026 	if (r) {
2027 		DRM_ERROR("Failed to init rlc BOs!\n");
2028 		return r;
2029 	}
2030 
2031 	r = gfx_v8_0_mec_init(adev);
2032 	if (r) {
2033 		DRM_ERROR("Failed to init MEC BOs!\n");
2034 		return r;
2035 	}
2036 
2037 	/* set up the gfx ring */
2038 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039 		ring = &adev->gfx.gfx_ring[i];
2040 		ring->ring_obj = NULL;
2041 		sprintf(ring->name, "gfx");
2042 		/* no gfx doorbells on iceland */
2043 		if (adev->asic_type != CHIP_TOPAZ) {
2044 			ring->use_doorbell = true;
2045 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046 		}
2047 
2048 		r = amdgpu_ring_init(adev, ring, 1024,
2049 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2050 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2051 				     AMDGPU_RING_TYPE_GFX);
2052 		if (r)
2053 			return r;
2054 	}
2055 
2056 	/* set up the compute queues */
2057 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058 		unsigned irq_type;
2059 
2060 		/* max 32 queues per MEC */
2061 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2063 			break;
2064 		}
2065 		ring = &adev->gfx.compute_ring[i];
2066 		ring->ring_obj = NULL;
2067 		ring->use_doorbell = true;
2068 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069 		ring->me = 1; /* first MEC */
2070 		ring->pipe = i / 8;
2071 		ring->queue = i % 8;
2072 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2075 		r = amdgpu_ring_init(adev, ring, 1024,
2076 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2077 				     &adev->gfx.eop_irq, irq_type,
2078 				     AMDGPU_RING_TYPE_COMPUTE);
2079 		if (r)
2080 			return r;
2081 	}
2082 
2083 	/* reserve GDS, GWS and OA resource for gfx */
2084 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2085 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2086 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2087 	if (r)
2088 		return r;
2089 
2090 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2091 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2092 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2093 	if (r)
2094 		return r;
2095 
2096 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2097 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2098 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2099 	if (r)
2100 		return r;
2101 
2102 	adev->gfx.ce_ram_size = 0x8000;
2103 
2104 	r = gfx_v8_0_gpu_early_init(adev);
2105 	if (r)
2106 		return r;
2107 
2108 	return 0;
2109 }
2110 
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113 	int i;
2114 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115 
2116 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2117 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2118 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2119 
2120 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124 
2125 	gfx_v8_0_mec_fini(adev);
2126 	gfx_v8_0_rlc_fini(adev);
2127 	gfx_v8_0_free_microcode(adev);
2128 
2129 	return 0;
2130 }
2131 
2132 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2133 {
2134 	uint32_t *modearray, *mod2array;
2135 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2136 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2137 	u32 reg_offset;
2138 
2139 	modearray = adev->gfx.config.tile_mode_array;
2140 	mod2array = adev->gfx.config.macrotile_mode_array;
2141 
2142 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2143 		modearray[reg_offset] = 0;
2144 
2145 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2146 		mod2array[reg_offset] = 0;
2147 
2148 	switch (adev->asic_type) {
2149 	case CHIP_TOPAZ:
2150 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151 				PIPE_CONFIG(ADDR_SURF_P2) |
2152 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2153 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155 				PIPE_CONFIG(ADDR_SURF_P2) |
2156 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2157 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159 				PIPE_CONFIG(ADDR_SURF_P2) |
2160 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2161 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2163 				PIPE_CONFIG(ADDR_SURF_P2) |
2164 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2165 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2166 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167 				PIPE_CONFIG(ADDR_SURF_P2) |
2168 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2169 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2170 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171 				PIPE_CONFIG(ADDR_SURF_P2) |
2172 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2173 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175 				PIPE_CONFIG(ADDR_SURF_P2) |
2176 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2177 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2179 				PIPE_CONFIG(ADDR_SURF_P2));
2180 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2181 				PIPE_CONFIG(ADDR_SURF_P2) |
2182 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2183 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185 				 PIPE_CONFIG(ADDR_SURF_P2) |
2186 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2187 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189 				 PIPE_CONFIG(ADDR_SURF_P2) |
2190 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2191 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193 				 PIPE_CONFIG(ADDR_SURF_P2) |
2194 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197 				 PIPE_CONFIG(ADDR_SURF_P2) |
2198 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2201 				 PIPE_CONFIG(ADDR_SURF_P2) |
2202 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2203 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205 				 PIPE_CONFIG(ADDR_SURF_P2) |
2206 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2208 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2209 				 PIPE_CONFIG(ADDR_SURF_P2) |
2210 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2211 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2213 				 PIPE_CONFIG(ADDR_SURF_P2) |
2214 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2215 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2217 				 PIPE_CONFIG(ADDR_SURF_P2) |
2218 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2221 				 PIPE_CONFIG(ADDR_SURF_P2) |
2222 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2225 				 PIPE_CONFIG(ADDR_SURF_P2) |
2226 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2227 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2228 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2229 				 PIPE_CONFIG(ADDR_SURF_P2) |
2230 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2233 				 PIPE_CONFIG(ADDR_SURF_P2) |
2234 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2237 				 PIPE_CONFIG(ADDR_SURF_P2) |
2238 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2241 				 PIPE_CONFIG(ADDR_SURF_P2) |
2242 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2243 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 				 PIPE_CONFIG(ADDR_SURF_P2) |
2246 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2247 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249 				 PIPE_CONFIG(ADDR_SURF_P2) |
2250 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2251 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2252 
2253 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256 				NUM_BANKS(ADDR_SURF_8_BANK));
2257 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2258 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260 				NUM_BANKS(ADDR_SURF_8_BANK));
2261 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 				NUM_BANKS(ADDR_SURF_8_BANK));
2265 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268 				NUM_BANKS(ADDR_SURF_8_BANK));
2269 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2271 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2272 				NUM_BANKS(ADDR_SURF_8_BANK));
2273 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276 				NUM_BANKS(ADDR_SURF_8_BANK));
2277 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2279 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280 				NUM_BANKS(ADDR_SURF_8_BANK));
2281 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2282 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2283 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284 				NUM_BANKS(ADDR_SURF_16_BANK));
2285 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2286 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 				NUM_BANKS(ADDR_SURF_16_BANK));
2289 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2290 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2292 				 NUM_BANKS(ADDR_SURF_16_BANK));
2293 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2294 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2295 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296 				 NUM_BANKS(ADDR_SURF_16_BANK));
2297 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2299 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300 				 NUM_BANKS(ADDR_SURF_16_BANK));
2301 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304 				 NUM_BANKS(ADDR_SURF_16_BANK));
2305 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2307 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308 				 NUM_BANKS(ADDR_SURF_8_BANK));
2309 
2310 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2311 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2312 			    reg_offset != 23)
2313 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2314 
2315 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2316 			if (reg_offset != 7)
2317 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2318 
2319 		break;
2320 	case CHIP_FIJI:
2321 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2324 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2328 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2332 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2336 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2340 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2341 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2344 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2345 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2346 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2348 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2351 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2352 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2354 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2355 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2367 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2369 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2372 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2376 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2380 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2387 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2390 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2391 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2392 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2396 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2402 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2404 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2408 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2412 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2413 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2414 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2415 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2416 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2419 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2420 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2422 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2423 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2424 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2426 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443 
2444 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447 				NUM_BANKS(ADDR_SURF_8_BANK));
2448 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451 				NUM_BANKS(ADDR_SURF_8_BANK));
2452 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2454 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455 				NUM_BANKS(ADDR_SURF_8_BANK));
2456 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459 				NUM_BANKS(ADDR_SURF_8_BANK));
2460 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 				NUM_BANKS(ADDR_SURF_8_BANK));
2464 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467 				NUM_BANKS(ADDR_SURF_8_BANK));
2468 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471 				NUM_BANKS(ADDR_SURF_8_BANK));
2472 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2474 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475 				NUM_BANKS(ADDR_SURF_8_BANK));
2476 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 				NUM_BANKS(ADDR_SURF_8_BANK));
2480 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 				 NUM_BANKS(ADDR_SURF_8_BANK));
2484 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 				 NUM_BANKS(ADDR_SURF_8_BANK));
2488 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491 				 NUM_BANKS(ADDR_SURF_8_BANK));
2492 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495 				 NUM_BANKS(ADDR_SURF_8_BANK));
2496 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499 				 NUM_BANKS(ADDR_SURF_4_BANK));
2500 
2501 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2502 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2503 
2504 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505 			if (reg_offset != 7)
2506 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2507 
2508 		break;
2509 	case CHIP_TONGA:
2510 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2513 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2517 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2521 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2525 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2529 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2530 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2533 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2534 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2537 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2538 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2541 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2543 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2544 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2558 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2569 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2571 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2573 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2575 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2576 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2580 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2581 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2585 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2591 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2593 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2597 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2601 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2603 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2604 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2605 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2608 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2609 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2611 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2613 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2616 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2619 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2623 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2631 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632 
2633 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636 				NUM_BANKS(ADDR_SURF_16_BANK));
2637 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640 				NUM_BANKS(ADDR_SURF_16_BANK));
2641 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2643 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2644 				NUM_BANKS(ADDR_SURF_16_BANK));
2645 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2647 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648 				NUM_BANKS(ADDR_SURF_16_BANK));
2649 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2652 				NUM_BANKS(ADDR_SURF_16_BANK));
2653 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656 				NUM_BANKS(ADDR_SURF_16_BANK));
2657 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660 				NUM_BANKS(ADDR_SURF_16_BANK));
2661 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2663 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2664 				NUM_BANKS(ADDR_SURF_16_BANK));
2665 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2668 				NUM_BANKS(ADDR_SURF_16_BANK));
2669 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2672 				 NUM_BANKS(ADDR_SURF_16_BANK));
2673 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676 				 NUM_BANKS(ADDR_SURF_16_BANK));
2677 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680 				 NUM_BANKS(ADDR_SURF_8_BANK));
2681 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2684 				 NUM_BANKS(ADDR_SURF_4_BANK));
2685 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2688 				 NUM_BANKS(ADDR_SURF_4_BANK));
2689 
2690 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2691 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2692 
2693 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2694 			if (reg_offset != 7)
2695 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2696 
2697 		break;
2698 	case CHIP_POLARIS11:
2699 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2702 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2706 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2710 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2714 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2718 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2732 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2733 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2736 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2750 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2758 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2765 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2770 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2778 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2782 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2786 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2794 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2798 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2800 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2802 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2817 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821 
2822 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825 				NUM_BANKS(ADDR_SURF_16_BANK));
2826 
2827 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 				NUM_BANKS(ADDR_SURF_16_BANK));
2831 
2832 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 				NUM_BANKS(ADDR_SURF_16_BANK));
2836 
2837 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840 				NUM_BANKS(ADDR_SURF_16_BANK));
2841 
2842 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845 				NUM_BANKS(ADDR_SURF_16_BANK));
2846 
2847 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 				NUM_BANKS(ADDR_SURF_16_BANK));
2851 
2852 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 				NUM_BANKS(ADDR_SURF_16_BANK));
2856 
2857 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2859 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 				NUM_BANKS(ADDR_SURF_16_BANK));
2861 
2862 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 				NUM_BANKS(ADDR_SURF_16_BANK));
2866 
2867 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 				NUM_BANKS(ADDR_SURF_16_BANK));
2871 
2872 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875 				NUM_BANKS(ADDR_SURF_16_BANK));
2876 
2877 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880 				NUM_BANKS(ADDR_SURF_16_BANK));
2881 
2882 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2885 				NUM_BANKS(ADDR_SURF_8_BANK));
2886 
2887 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2890 				NUM_BANKS(ADDR_SURF_4_BANK));
2891 
2892 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2893 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2894 
2895 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2896 			if (reg_offset != 7)
2897 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2898 
2899 		break;
2900 	case CHIP_POLARIS10:
2901 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2904 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2908 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2912 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2916 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2920 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2934 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2935 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2949 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2960 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2967 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2972 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2980 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2984 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2988 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2993 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2996 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3000 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3002 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3004 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3010 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3016 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3019 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023 
3024 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027 				NUM_BANKS(ADDR_SURF_16_BANK));
3028 
3029 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 				NUM_BANKS(ADDR_SURF_16_BANK));
3033 
3034 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 				NUM_BANKS(ADDR_SURF_16_BANK));
3038 
3039 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042 				NUM_BANKS(ADDR_SURF_16_BANK));
3043 
3044 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3046 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047 				NUM_BANKS(ADDR_SURF_16_BANK));
3048 
3049 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052 				NUM_BANKS(ADDR_SURF_16_BANK));
3053 
3054 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057 				NUM_BANKS(ADDR_SURF_16_BANK));
3058 
3059 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3061 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062 				NUM_BANKS(ADDR_SURF_16_BANK));
3063 
3064 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3066 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067 				NUM_BANKS(ADDR_SURF_16_BANK));
3068 
3069 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072 				NUM_BANKS(ADDR_SURF_16_BANK));
3073 
3074 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 				NUM_BANKS(ADDR_SURF_16_BANK));
3078 
3079 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082 				NUM_BANKS(ADDR_SURF_8_BANK));
3083 
3084 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087 				NUM_BANKS(ADDR_SURF_4_BANK));
3088 
3089 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3092 				NUM_BANKS(ADDR_SURF_4_BANK));
3093 
3094 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3095 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3096 
3097 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3098 			if (reg_offset != 7)
3099 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3100 
3101 		break;
3102 	case CHIP_STONEY:
3103 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104 				PIPE_CONFIG(ADDR_SURF_P2) |
3105 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3106 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 				PIPE_CONFIG(ADDR_SURF_P2) |
3109 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3110 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112 				PIPE_CONFIG(ADDR_SURF_P2) |
3113 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3114 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 				PIPE_CONFIG(ADDR_SURF_P2) |
3117 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3118 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120 				PIPE_CONFIG(ADDR_SURF_P2) |
3121 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3122 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124 				PIPE_CONFIG(ADDR_SURF_P2) |
3125 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128 				PIPE_CONFIG(ADDR_SURF_P2) |
3129 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3132 				PIPE_CONFIG(ADDR_SURF_P2));
3133 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134 				PIPE_CONFIG(ADDR_SURF_P2) |
3135 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3136 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138 				 PIPE_CONFIG(ADDR_SURF_P2) |
3139 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142 				 PIPE_CONFIG(ADDR_SURF_P2) |
3143 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146 				 PIPE_CONFIG(ADDR_SURF_P2) |
3147 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3148 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3149 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3150 				 PIPE_CONFIG(ADDR_SURF_P2) |
3151 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3154 				 PIPE_CONFIG(ADDR_SURF_P2) |
3155 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3158 				 PIPE_CONFIG(ADDR_SURF_P2) |
3159 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3161 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3162 				 PIPE_CONFIG(ADDR_SURF_P2) |
3163 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166 				 PIPE_CONFIG(ADDR_SURF_P2) |
3167 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3170 				 PIPE_CONFIG(ADDR_SURF_P2) |
3171 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3174 				 PIPE_CONFIG(ADDR_SURF_P2) |
3175 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3178 				 PIPE_CONFIG(ADDR_SURF_P2) |
3179 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3182 				 PIPE_CONFIG(ADDR_SURF_P2) |
3183 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3186 				 PIPE_CONFIG(ADDR_SURF_P2) |
3187 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3188 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3190 				 PIPE_CONFIG(ADDR_SURF_P2) |
3191 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194 				 PIPE_CONFIG(ADDR_SURF_P2) |
3195 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3196 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198 				 PIPE_CONFIG(ADDR_SURF_P2) |
3199 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202 				 PIPE_CONFIG(ADDR_SURF_P2) |
3203 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205 
3206 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3208 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3209 				NUM_BANKS(ADDR_SURF_8_BANK));
3210 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3212 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213 				NUM_BANKS(ADDR_SURF_8_BANK));
3214 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3217 				NUM_BANKS(ADDR_SURF_8_BANK));
3218 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221 				NUM_BANKS(ADDR_SURF_8_BANK));
3222 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225 				NUM_BANKS(ADDR_SURF_8_BANK));
3226 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229 				NUM_BANKS(ADDR_SURF_8_BANK));
3230 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233 				NUM_BANKS(ADDR_SURF_8_BANK));
3234 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3235 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3236 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237 				NUM_BANKS(ADDR_SURF_16_BANK));
3238 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3240 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 				NUM_BANKS(ADDR_SURF_16_BANK));
3242 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3243 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245 				 NUM_BANKS(ADDR_SURF_16_BANK));
3246 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3248 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 				 NUM_BANKS(ADDR_SURF_16_BANK));
3250 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 				 NUM_BANKS(ADDR_SURF_16_BANK));
3254 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257 				 NUM_BANKS(ADDR_SURF_16_BANK));
3258 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261 				 NUM_BANKS(ADDR_SURF_8_BANK));
3262 
3263 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3264 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3265 			    reg_offset != 23)
3266 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3267 
3268 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3269 			if (reg_offset != 7)
3270 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3271 
3272 		break;
3273 	default:
3274 		dev_warn(adev->dev,
3275 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3276 			 adev->asic_type);
3277 
3278 	case CHIP_CARRIZO:
3279 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280 				PIPE_CONFIG(ADDR_SURF_P2) |
3281 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3282 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 				PIPE_CONFIG(ADDR_SURF_P2) |
3285 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3286 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288 				PIPE_CONFIG(ADDR_SURF_P2) |
3289 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3290 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 				PIPE_CONFIG(ADDR_SURF_P2) |
3293 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3294 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296 				PIPE_CONFIG(ADDR_SURF_P2) |
3297 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300 				PIPE_CONFIG(ADDR_SURF_P2) |
3301 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304 				PIPE_CONFIG(ADDR_SURF_P2) |
3305 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3308 				PIPE_CONFIG(ADDR_SURF_P2));
3309 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3310 				PIPE_CONFIG(ADDR_SURF_P2) |
3311 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314 				 PIPE_CONFIG(ADDR_SURF_P2) |
3315 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3318 				 PIPE_CONFIG(ADDR_SURF_P2) |
3319 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3321 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322 				 PIPE_CONFIG(ADDR_SURF_P2) |
3323 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3326 				 PIPE_CONFIG(ADDR_SURF_P2) |
3327 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3330 				 PIPE_CONFIG(ADDR_SURF_P2) |
3331 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3334 				 PIPE_CONFIG(ADDR_SURF_P2) |
3335 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3337 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338 				 PIPE_CONFIG(ADDR_SURF_P2) |
3339 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342 				 PIPE_CONFIG(ADDR_SURF_P2) |
3343 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3346 				 PIPE_CONFIG(ADDR_SURF_P2) |
3347 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3350 				 PIPE_CONFIG(ADDR_SURF_P2) |
3351 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3354 				 PIPE_CONFIG(ADDR_SURF_P2) |
3355 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3358 				 PIPE_CONFIG(ADDR_SURF_P2) |
3359 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3362 				 PIPE_CONFIG(ADDR_SURF_P2) |
3363 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3366 				 PIPE_CONFIG(ADDR_SURF_P2) |
3367 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3370 				 PIPE_CONFIG(ADDR_SURF_P2) |
3371 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374 				 PIPE_CONFIG(ADDR_SURF_P2) |
3375 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3378 				 PIPE_CONFIG(ADDR_SURF_P2) |
3379 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3381 
3382 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3384 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385 				NUM_BANKS(ADDR_SURF_8_BANK));
3386 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3388 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389 				NUM_BANKS(ADDR_SURF_8_BANK));
3390 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393 				NUM_BANKS(ADDR_SURF_8_BANK));
3394 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397 				NUM_BANKS(ADDR_SURF_8_BANK));
3398 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401 				NUM_BANKS(ADDR_SURF_8_BANK));
3402 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405 				NUM_BANKS(ADDR_SURF_8_BANK));
3406 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409 				NUM_BANKS(ADDR_SURF_8_BANK));
3410 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3412 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413 				NUM_BANKS(ADDR_SURF_16_BANK));
3414 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 				NUM_BANKS(ADDR_SURF_16_BANK));
3418 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421 				 NUM_BANKS(ADDR_SURF_16_BANK));
3422 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425 				 NUM_BANKS(ADDR_SURF_16_BANK));
3426 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429 				 NUM_BANKS(ADDR_SURF_16_BANK));
3430 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433 				 NUM_BANKS(ADDR_SURF_16_BANK));
3434 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437 				 NUM_BANKS(ADDR_SURF_8_BANK));
3438 
3439 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3440 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3441 			    reg_offset != 23)
3442 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3443 
3444 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3445 			if (reg_offset != 7)
3446 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3447 
3448 		break;
3449 	}
3450 }
3451 
3452 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3453 				  u32 se_num, u32 sh_num, u32 instance)
3454 {
3455 	u32 data;
3456 
3457 	if (instance == 0xffffffff)
3458 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3459 	else
3460 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3461 
3462 	if (se_num == 0xffffffff)
3463 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3464 	else
3465 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3466 
3467 	if (sh_num == 0xffffffff)
3468 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3469 	else
3470 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3471 
3472 	WREG32(mmGRBM_GFX_INDEX, data);
3473 }
3474 
3475 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3476 {
3477 	return (u32)((1ULL << bit_width) - 1);
3478 }
3479 
3480 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3481 {
3482 	u32 data, mask;
3483 
3484 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3485 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486 
3487 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3488 
3489 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3490 				       adev->gfx.config.max_sh_per_se);
3491 
3492 	return (~data) & mask;
3493 }
3494 
3495 static void
3496 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3497 {
3498 	switch (adev->asic_type) {
3499 	case CHIP_FIJI:
3500 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3501 			  RB_XSEL2(1) | PKR_MAP(2) |
3502 			  PKR_XSEL(1) | PKR_YSEL(1) |
3503 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3504 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3505 			   SE_PAIR_YSEL(2);
3506 		break;
3507 	case CHIP_TONGA:
3508 	case CHIP_POLARIS10:
3509 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3510 			  SE_XSEL(1) | SE_YSEL(1);
3511 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3512 			   SE_PAIR_YSEL(2);
3513 		break;
3514 	case CHIP_TOPAZ:
3515 	case CHIP_CARRIZO:
3516 		*rconf |= RB_MAP_PKR0(2);
3517 		*rconf1 |= 0x0;
3518 		break;
3519 	case CHIP_POLARIS11:
3520 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521 			  SE_XSEL(1) | SE_YSEL(1);
3522 		*rconf1 |= 0x0;
3523 		break;
3524 	case CHIP_STONEY:
3525 		*rconf |= 0x0;
3526 		*rconf1 |= 0x0;
3527 		break;
3528 	default:
3529 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530 		break;
3531 	}
3532 }
3533 
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536 					u32 raster_config, u32 raster_config_1,
3537 					unsigned rb_mask, unsigned num_rb)
3538 {
3539 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542 	unsigned rb_per_se = num_rb / num_se;
3543 	unsigned se_mask[4];
3544 	unsigned se;
3545 
3546 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550 
3551 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554 
3555 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556 			     (!se_mask[2] && !se_mask[3]))) {
3557 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558 
3559 		if (!se_mask[0] && !se_mask[1]) {
3560 			raster_config_1 |=
3561 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562 		} else {
3563 			raster_config_1 |=
3564 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565 		}
3566 	}
3567 
3568 	for (se = 0; se < num_se; se++) {
3569 		unsigned raster_config_se = raster_config;
3570 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572 		int idx = (se / 2) * 2;
3573 
3574 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575 			raster_config_se &= ~SE_MAP_MASK;
3576 
3577 			if (!se_mask[idx]) {
3578 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579 			} else {
3580 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581 			}
3582 		}
3583 
3584 		pkr0_mask &= rb_mask;
3585 		pkr1_mask &= rb_mask;
3586 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587 			raster_config_se &= ~PKR_MAP_MASK;
3588 
3589 			if (!pkr0_mask) {
3590 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591 			} else {
3592 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593 			}
3594 		}
3595 
3596 		if (rb_per_se >= 2) {
3597 			unsigned rb0_mask = 1 << (se * rb_per_se);
3598 			unsigned rb1_mask = rb0_mask << 1;
3599 
3600 			rb0_mask &= rb_mask;
3601 			rb1_mask &= rb_mask;
3602 			if (!rb0_mask || !rb1_mask) {
3603 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3604 
3605 				if (!rb0_mask) {
3606 					raster_config_se |=
3607 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608 				} else {
3609 					raster_config_se |=
3610 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611 				}
3612 			}
3613 
3614 			if (rb_per_se > 2) {
3615 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616 				rb1_mask = rb0_mask << 1;
3617 				rb0_mask &= rb_mask;
3618 				rb1_mask &= rb_mask;
3619 				if (!rb0_mask || !rb1_mask) {
3620 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3621 
3622 					if (!rb0_mask) {
3623 						raster_config_se |=
3624 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625 					} else {
3626 						raster_config_se |=
3627 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628 					}
3629 				}
3630 			}
3631 		}
3632 
3633 		/* GRBM_GFX_INDEX has a different offset on VI */
3634 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637 	}
3638 
3639 	/* GRBM_GFX_INDEX has a different offset on VI */
3640 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642 
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645 	int i, j;
3646 	u32 data;
3647 	u32 raster_config = 0, raster_config_1 = 0;
3648 	u32 active_rbs = 0;
3649 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650 					adev->gfx.config.max_sh_per_se;
3651 	unsigned num_rb_pipes;
3652 
3653 	mutex_lock(&adev->grbm_idx_mutex);
3654 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3658 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659 					       rb_bitmap_width_per_sh);
3660 		}
3661 	}
3662 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663 
3664 	adev->gfx.config.backend_enable_mask = active_rbs;
3665 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3666 
3667 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668 			     adev->gfx.config.max_shader_engines, 16);
3669 
3670 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671 
3672 	if (!adev->gfx.config.backend_enable_mask ||
3673 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3674 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676 	} else {
3677 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678 							adev->gfx.config.backend_enable_mask,
3679 							num_rb_pipes);
3680 	}
3681 
3682 	mutex_unlock(&adev->grbm_idx_mutex);
3683 }
3684 
3685 /**
3686  * gfx_v8_0_init_compute_vmid - gart enable
3687  *
3688  * @rdev: amdgpu_device pointer
3689  *
3690  * Initialize compute vmid sh_mem registers
3691  *
3692  */
3693 #define DEFAULT_SH_MEM_BASES	(0x6000)
3694 #define FIRST_COMPUTE_VMID	(8)
3695 #define LAST_COMPUTE_VMID	(16)
3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3697 {
3698 	int i;
3699 	uint32_t sh_mem_config;
3700 	uint32_t sh_mem_bases;
3701 
3702 	/*
3703 	 * Configure apertures:
3704 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3705 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3706 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3707 	 */
3708 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3709 
3710 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3711 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3712 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3713 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3714 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3715 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3716 
3717 	mutex_lock(&adev->srbm_mutex);
3718 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3719 		vi_srbm_select(adev, 0, 0, 0, i);
3720 		/* CP and shaders */
3721 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3722 		WREG32(mmSH_MEM_APE1_BASE, 1);
3723 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3724 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3725 	}
3726 	vi_srbm_select(adev, 0, 0, 0, 0);
3727 	mutex_unlock(&adev->srbm_mutex);
3728 }
3729 
3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3731 {
3732 	u32 tmp;
3733 	int i;
3734 
3735 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3736 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3739 
3740 	gfx_v8_0_tiling_mode_table_init(adev);
3741 	gfx_v8_0_setup_rb(adev);
3742 	gfx_v8_0_get_cu_info(adev);
3743 
3744 	/* XXX SH_MEM regs */
3745 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3746 	mutex_lock(&adev->srbm_mutex);
3747 	for (i = 0; i < 16; i++) {
3748 		vi_srbm_select(adev, 0, 0, 0, i);
3749 		/* CP and shaders */
3750 		if (i == 0) {
3751 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3752 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3753 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3754 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3755 			WREG32(mmSH_MEM_CONFIG, tmp);
3756 		} else {
3757 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3758 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3759 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761 			WREG32(mmSH_MEM_CONFIG, tmp);
3762 		}
3763 
3764 		WREG32(mmSH_MEM_APE1_BASE, 1);
3765 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766 		WREG32(mmSH_MEM_BASES, 0);
3767 	}
3768 	vi_srbm_select(adev, 0, 0, 0, 0);
3769 	mutex_unlock(&adev->srbm_mutex);
3770 
3771 	gfx_v8_0_init_compute_vmid(adev);
3772 
3773 	mutex_lock(&adev->grbm_idx_mutex);
3774 	/*
3775 	 * making sure that the following register writes will be broadcasted
3776 	 * to all the shaders
3777 	 */
3778 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779 
3780 	WREG32(mmPA_SC_FIFO_SIZE,
3781 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3784 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789 	mutex_unlock(&adev->grbm_idx_mutex);
3790 
3791 }
3792 
3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3794 {
3795 	u32 i, j, k;
3796 	u32 mask;
3797 
3798 	mutex_lock(&adev->grbm_idx_mutex);
3799 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3800 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3801 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3802 			for (k = 0; k < adev->usec_timeout; k++) {
3803 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3804 					break;
3805 				udelay(1);
3806 			}
3807 		}
3808 	}
3809 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810 	mutex_unlock(&adev->grbm_idx_mutex);
3811 
3812 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3813 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3814 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3815 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3816 	for (k = 0; k < adev->usec_timeout; k++) {
3817 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3818 			break;
3819 		udelay(1);
3820 	}
3821 }
3822 
3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3824 					       bool enable)
3825 {
3826 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3827 
3828 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3829 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3830 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3831 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3832 
3833 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3834 }
3835 
3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3837 {
3838 	/* csib */
3839 	WREG32(mmRLC_CSIB_ADDR_HI,
3840 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3841 	WREG32(mmRLC_CSIB_ADDR_LO,
3842 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3843 	WREG32(mmRLC_CSIB_LENGTH,
3844 			adev->gfx.rlc.clear_state_size);
3845 }
3846 
3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3848 				int ind_offset,
3849 				int list_size,
3850 				int *unique_indices,
3851 				int *indices_count,
3852 				int max_indices,
3853 				int *ind_start_offsets,
3854 				int *offset_count,
3855 				int max_offset)
3856 {
3857 	int indices;
3858 	bool new_entry = true;
3859 
3860 	for (; ind_offset < list_size; ind_offset++) {
3861 
3862 		if (new_entry) {
3863 			new_entry = false;
3864 			ind_start_offsets[*offset_count] = ind_offset;
3865 			*offset_count = *offset_count + 1;
3866 			BUG_ON(*offset_count >= max_offset);
3867 		}
3868 
3869 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3870 			new_entry = true;
3871 			continue;
3872 		}
3873 
3874 		ind_offset += 2;
3875 
3876 		/* look for the matching indice */
3877 		for (indices = 0;
3878 			indices < *indices_count;
3879 			indices++) {
3880 			if (unique_indices[indices] ==
3881 				register_list_format[ind_offset])
3882 				break;
3883 		}
3884 
3885 		if (indices >= *indices_count) {
3886 			unique_indices[*indices_count] =
3887 				register_list_format[ind_offset];
3888 			indices = *indices_count;
3889 			*indices_count = *indices_count + 1;
3890 			BUG_ON(*indices_count >= max_indices);
3891 		}
3892 
3893 		register_list_format[ind_offset] = indices;
3894 	}
3895 }
3896 
3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3898 {
3899 	int i, temp, data;
3900 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3901 	int indices_count = 0;
3902 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3903 	int offset_count = 0;
3904 
3905 	int list_size;
3906 	unsigned int *register_list_format =
3907 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3908 	if (register_list_format == NULL)
3909 		return -ENOMEM;
3910 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3911 			adev->gfx.rlc.reg_list_format_size_bytes);
3912 
3913 	gfx_v8_0_parse_ind_reg_list(register_list_format,
3914 				RLC_FormatDirectRegListLength,
3915 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3916 				unique_indices,
3917 				&indices_count,
3918 				sizeof(unique_indices) / sizeof(int),
3919 				indirect_start_offsets,
3920 				&offset_count,
3921 				sizeof(indirect_start_offsets)/sizeof(int));
3922 
3923 	/* save and restore list */
3924 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3925 
3926 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3927 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3928 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3929 
3930 	/* indirect list */
3931 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3932 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3933 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3934 
3935 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3936 	list_size = list_size >> 1;
3937 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3938 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3939 
3940 	/* starting offsets starts */
3941 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
3942 		adev->gfx.rlc.starting_offsets_start);
3943 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3944 		WREG32(mmRLC_GPM_SCRATCH_DATA,
3945 				indirect_start_offsets[i]);
3946 
3947 	/* unique indices */
3948 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3949 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3950 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3951 		amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3952 		amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3953 	}
3954 	kfree(register_list_format);
3955 
3956 	return 0;
3957 }
3958 
3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3960 {
3961 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3962 }
3963 
3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3965 {
3966 	uint32_t data;
3967 
3968 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3969 			      AMD_PG_SUPPORT_GFX_SMG |
3970 			      AMD_PG_SUPPORT_GFX_DMG)) {
3971 		WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3972 
3973 		data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3974 		data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3975 		data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3976 		data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3977 		WREG32(mmRLC_PG_DELAY, data);
3978 
3979 		WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3980 		WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3981 	}
3982 }
3983 
3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3985 						bool enable)
3986 {
3987 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3988 }
3989 
3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3991 						  bool enable)
3992 {
3993 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3994 }
3995 
3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3997 {
3998 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3999 }
4000 
4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4002 {
4003 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4004 			      AMD_PG_SUPPORT_GFX_SMG |
4005 			      AMD_PG_SUPPORT_GFX_DMG |
4006 			      AMD_PG_SUPPORT_CP |
4007 			      AMD_PG_SUPPORT_GDS |
4008 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
4009 		gfx_v8_0_init_csb(adev);
4010 		gfx_v8_0_init_save_restore_list(adev);
4011 		gfx_v8_0_enable_save_restore_machine(adev);
4012 
4013 		if ((adev->asic_type == CHIP_CARRIZO) ||
4014 		    (adev->asic_type == CHIP_STONEY)) {
4015 			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4016 			gfx_v8_0_init_power_gating(adev);
4017 			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4018 			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4019 				cz_enable_sck_slow_down_on_power_up(adev, true);
4020 				cz_enable_sck_slow_down_on_power_down(adev, true);
4021 			} else {
4022 				cz_enable_sck_slow_down_on_power_up(adev, false);
4023 				cz_enable_sck_slow_down_on_power_down(adev, false);
4024 			}
4025 			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4026 				cz_enable_cp_power_gating(adev, true);
4027 			else
4028 				cz_enable_cp_power_gating(adev, false);
4029 		} else if (adev->asic_type == CHIP_POLARIS11) {
4030 			gfx_v8_0_init_power_gating(adev);
4031 		}
4032 	}
4033 }
4034 
4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4036 {
4037 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4038 
4039 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4040 	gfx_v8_0_wait_for_rlc_serdes(adev);
4041 }
4042 
4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4044 {
4045 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4046 	udelay(50);
4047 
4048 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4049 	udelay(50);
4050 }
4051 
4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4053 {
4054 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4055 
4056 	/* carrizo do enable cp interrupt after cp inited */
4057 	if (!(adev->flags & AMD_IS_APU))
4058 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4059 
4060 	udelay(50);
4061 }
4062 
4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4064 {
4065 	const struct rlc_firmware_header_v2_0 *hdr;
4066 	const __le32 *fw_data;
4067 	unsigned i, fw_size;
4068 
4069 	if (!adev->gfx.rlc_fw)
4070 		return -EINVAL;
4071 
4072 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4073 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4074 
4075 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4076 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4077 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4078 
4079 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4080 	for (i = 0; i < fw_size; i++)
4081 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4082 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4083 
4084 	return 0;
4085 }
4086 
4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4088 {
4089 	int r;
4090 	u32 tmp;
4091 
4092 	gfx_v8_0_rlc_stop(adev);
4093 
4094 	/* disable CG */
4095 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4096 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4097 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4098 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4099 	if (adev->asic_type == CHIP_POLARIS11 ||
4100 	    adev->asic_type == CHIP_POLARIS10) {
4101 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4102 		tmp &= ~0x3;
4103 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4104 	}
4105 
4106 	/* disable PG */
4107 	WREG32(mmRLC_PG_CNTL, 0);
4108 
4109 	gfx_v8_0_rlc_reset(adev);
4110 	gfx_v8_0_init_pg(adev);
4111 
4112 	if (!adev->pp_enabled) {
4113 		if (!adev->firmware.smu_load) {
4114 			/* legacy rlc firmware loading */
4115 			r = gfx_v8_0_rlc_load_microcode(adev);
4116 			if (r)
4117 				return r;
4118 		} else {
4119 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4120 							AMDGPU_UCODE_ID_RLC_G);
4121 			if (r)
4122 				return -EINVAL;
4123 		}
4124 	}
4125 
4126 	gfx_v8_0_rlc_start(adev);
4127 
4128 	return 0;
4129 }
4130 
4131 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4132 {
4133 	int i;
4134 	u32 tmp = RREG32(mmCP_ME_CNTL);
4135 
4136 	if (enable) {
4137 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4138 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4139 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4140 	} else {
4141 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4142 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4143 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4144 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4145 			adev->gfx.gfx_ring[i].ready = false;
4146 	}
4147 	WREG32(mmCP_ME_CNTL, tmp);
4148 	udelay(50);
4149 }
4150 
4151 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4152 {
4153 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4154 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4155 	const struct gfx_firmware_header_v1_0 *me_hdr;
4156 	const __le32 *fw_data;
4157 	unsigned i, fw_size;
4158 
4159 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4160 		return -EINVAL;
4161 
4162 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4163 		adev->gfx.pfp_fw->data;
4164 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4165 		adev->gfx.ce_fw->data;
4166 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4167 		adev->gfx.me_fw->data;
4168 
4169 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4170 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4171 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4172 
4173 	gfx_v8_0_cp_gfx_enable(adev, false);
4174 
4175 	/* PFP */
4176 	fw_data = (const __le32 *)
4177 		(adev->gfx.pfp_fw->data +
4178 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4179 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4180 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4181 	for (i = 0; i < fw_size; i++)
4182 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4183 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4184 
4185 	/* CE */
4186 	fw_data = (const __le32 *)
4187 		(adev->gfx.ce_fw->data +
4188 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4189 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4190 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4191 	for (i = 0; i < fw_size; i++)
4192 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4193 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4194 
4195 	/* ME */
4196 	fw_data = (const __le32 *)
4197 		(adev->gfx.me_fw->data +
4198 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4199 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4200 	WREG32(mmCP_ME_RAM_WADDR, 0);
4201 	for (i = 0; i < fw_size; i++)
4202 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4203 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4204 
4205 	return 0;
4206 }
4207 
4208 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4209 {
4210 	u32 count = 0;
4211 	const struct cs_section_def *sect = NULL;
4212 	const struct cs_extent_def *ext = NULL;
4213 
4214 	/* begin clear state */
4215 	count += 2;
4216 	/* context control state */
4217 	count += 3;
4218 
4219 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4220 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4221 			if (sect->id == SECT_CONTEXT)
4222 				count += 2 + ext->reg_count;
4223 			else
4224 				return 0;
4225 		}
4226 	}
4227 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4228 	count += 4;
4229 	/* end clear state */
4230 	count += 2;
4231 	/* clear state */
4232 	count += 2;
4233 
4234 	return count;
4235 }
4236 
4237 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4238 {
4239 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4240 	const struct cs_section_def *sect = NULL;
4241 	const struct cs_extent_def *ext = NULL;
4242 	int r, i;
4243 
4244 	/* init the CP */
4245 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4246 	WREG32(mmCP_ENDIAN_SWAP, 0);
4247 	WREG32(mmCP_DEVICE_ID, 1);
4248 
4249 	gfx_v8_0_cp_gfx_enable(adev, true);
4250 
4251 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4252 	if (r) {
4253 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4254 		return r;
4255 	}
4256 
4257 	/* clear state buffer */
4258 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4259 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4260 
4261 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4262 	amdgpu_ring_write(ring, 0x80000000);
4263 	amdgpu_ring_write(ring, 0x80000000);
4264 
4265 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4266 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4267 			if (sect->id == SECT_CONTEXT) {
4268 				amdgpu_ring_write(ring,
4269 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4270 					       ext->reg_count));
4271 				amdgpu_ring_write(ring,
4272 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4273 				for (i = 0; i < ext->reg_count; i++)
4274 					amdgpu_ring_write(ring, ext->extent[i]);
4275 			}
4276 		}
4277 	}
4278 
4279 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4280 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4281 	switch (adev->asic_type) {
4282 	case CHIP_TONGA:
4283 	case CHIP_POLARIS10:
4284 		amdgpu_ring_write(ring, 0x16000012);
4285 		amdgpu_ring_write(ring, 0x0000002A);
4286 		break;
4287 	case CHIP_POLARIS11:
4288 		amdgpu_ring_write(ring, 0x16000012);
4289 		amdgpu_ring_write(ring, 0x00000000);
4290 		break;
4291 	case CHIP_FIJI:
4292 		amdgpu_ring_write(ring, 0x3a00161a);
4293 		amdgpu_ring_write(ring, 0x0000002e);
4294 		break;
4295 	case CHIP_CARRIZO:
4296 		amdgpu_ring_write(ring, 0x00000002);
4297 		amdgpu_ring_write(ring, 0x00000000);
4298 		break;
4299 	case CHIP_TOPAZ:
4300 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4301 				0x00000000 : 0x00000002);
4302 		amdgpu_ring_write(ring, 0x00000000);
4303 		break;
4304 	case CHIP_STONEY:
4305 		amdgpu_ring_write(ring, 0x00000000);
4306 		amdgpu_ring_write(ring, 0x00000000);
4307 		break;
4308 	default:
4309 		BUG();
4310 	}
4311 
4312 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4313 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4314 
4315 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4316 	amdgpu_ring_write(ring, 0);
4317 
4318 	/* init the CE partitions */
4319 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4320 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4321 	amdgpu_ring_write(ring, 0x8000);
4322 	amdgpu_ring_write(ring, 0x8000);
4323 
4324 	amdgpu_ring_commit(ring);
4325 
4326 	return 0;
4327 }
4328 
4329 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4330 {
4331 	struct amdgpu_ring *ring;
4332 	u32 tmp;
4333 	u32 rb_bufsz;
4334 	u64 rb_addr, rptr_addr;
4335 	int r;
4336 
4337 	/* Set the write pointer delay */
4338 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4339 
4340 	/* set the RB to use vmid 0 */
4341 	WREG32(mmCP_RB_VMID, 0);
4342 
4343 	/* Set ring buffer size */
4344 	ring = &adev->gfx.gfx_ring[0];
4345 	rb_bufsz = order_base_2(ring->ring_size / 8);
4346 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4347 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4348 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4349 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4350 #ifdef __BIG_ENDIAN
4351 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4352 #endif
4353 	WREG32(mmCP_RB0_CNTL, tmp);
4354 
4355 	/* Initialize the ring buffer's read and write pointers */
4356 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4357 	ring->wptr = 0;
4358 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4359 
4360 	/* set the wb address wether it's enabled or not */
4361 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4362 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4363 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4364 
4365 	mdelay(1);
4366 	WREG32(mmCP_RB0_CNTL, tmp);
4367 
4368 	rb_addr = ring->gpu_addr >> 8;
4369 	WREG32(mmCP_RB0_BASE, rb_addr);
4370 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4371 
4372 	/* no gfx doorbells on iceland */
4373 	if (adev->asic_type != CHIP_TOPAZ) {
4374 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4375 		if (ring->use_doorbell) {
4376 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4377 					    DOORBELL_OFFSET, ring->doorbell_index);
4378 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4379 					    DOORBELL_HIT, 0);
4380 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4381 					    DOORBELL_EN, 1);
4382 		} else {
4383 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4384 					    DOORBELL_EN, 0);
4385 		}
4386 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4387 
4388 		if (adev->asic_type == CHIP_TONGA) {
4389 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4390 					    DOORBELL_RANGE_LOWER,
4391 					    AMDGPU_DOORBELL_GFX_RING0);
4392 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4393 
4394 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4395 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4396 		}
4397 
4398 	}
4399 
4400 	/* start the ring */
4401 	gfx_v8_0_cp_gfx_start(adev);
4402 	ring->ready = true;
4403 	r = amdgpu_ring_test_ring(ring);
4404 	if (r)
4405 		ring->ready = false;
4406 
4407 	return r;
4408 }
4409 
4410 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4411 {
4412 	int i;
4413 
4414 	if (enable) {
4415 		WREG32(mmCP_MEC_CNTL, 0);
4416 	} else {
4417 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4418 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4419 			adev->gfx.compute_ring[i].ready = false;
4420 	}
4421 	udelay(50);
4422 }
4423 
4424 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4425 {
4426 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4427 	const __le32 *fw_data;
4428 	unsigned i, fw_size;
4429 
4430 	if (!adev->gfx.mec_fw)
4431 		return -EINVAL;
4432 
4433 	gfx_v8_0_cp_compute_enable(adev, false);
4434 
4435 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4436 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4437 
4438 	fw_data = (const __le32 *)
4439 		(adev->gfx.mec_fw->data +
4440 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4441 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4442 
4443 	/* MEC1 */
4444 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4445 	for (i = 0; i < fw_size; i++)
4446 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4447 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4448 
4449 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4450 	if (adev->gfx.mec2_fw) {
4451 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4452 
4453 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4454 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4455 
4456 		fw_data = (const __le32 *)
4457 			(adev->gfx.mec2_fw->data +
4458 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4459 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4460 
4461 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4462 		for (i = 0; i < fw_size; i++)
4463 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4464 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4465 	}
4466 
4467 	return 0;
4468 }
4469 
4470 struct vi_mqd {
4471 	uint32_t header;  /* ordinal0 */
4472 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
4473 	uint32_t compute_dim_x;  /* ordinal2 */
4474 	uint32_t compute_dim_y;  /* ordinal3 */
4475 	uint32_t compute_dim_z;  /* ordinal4 */
4476 	uint32_t compute_start_x;  /* ordinal5 */
4477 	uint32_t compute_start_y;  /* ordinal6 */
4478 	uint32_t compute_start_z;  /* ordinal7 */
4479 	uint32_t compute_num_thread_x;  /* ordinal8 */
4480 	uint32_t compute_num_thread_y;  /* ordinal9 */
4481 	uint32_t compute_num_thread_z;  /* ordinal10 */
4482 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4483 	uint32_t compute_perfcount_enable;  /* ordinal12 */
4484 	uint32_t compute_pgm_lo;  /* ordinal13 */
4485 	uint32_t compute_pgm_hi;  /* ordinal14 */
4486 	uint32_t compute_tba_lo;  /* ordinal15 */
4487 	uint32_t compute_tba_hi;  /* ordinal16 */
4488 	uint32_t compute_tma_lo;  /* ordinal17 */
4489 	uint32_t compute_tma_hi;  /* ordinal18 */
4490 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4491 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4492 	uint32_t compute_vmid;  /* ordinal21 */
4493 	uint32_t compute_resource_limits;  /* ordinal22 */
4494 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4495 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4496 	uint32_t compute_tmpring_size;  /* ordinal25 */
4497 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4498 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4499 	uint32_t compute_restart_x;  /* ordinal28 */
4500 	uint32_t compute_restart_y;  /* ordinal29 */
4501 	uint32_t compute_restart_z;  /* ordinal30 */
4502 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
4503 	uint32_t compute_misc_reserved;  /* ordinal32 */
4504 	uint32_t compute_dispatch_id;  /* ordinal33 */
4505 	uint32_t compute_threadgroup_id;  /* ordinal34 */
4506 	uint32_t compute_relaunch;  /* ordinal35 */
4507 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4508 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4509 	uint32_t compute_wave_restore_control;  /* ordinal38 */
4510 	uint32_t reserved9;  /* ordinal39 */
4511 	uint32_t reserved10;  /* ordinal40 */
4512 	uint32_t reserved11;  /* ordinal41 */
4513 	uint32_t reserved12;  /* ordinal42 */
4514 	uint32_t reserved13;  /* ordinal43 */
4515 	uint32_t reserved14;  /* ordinal44 */
4516 	uint32_t reserved15;  /* ordinal45 */
4517 	uint32_t reserved16;  /* ordinal46 */
4518 	uint32_t reserved17;  /* ordinal47 */
4519 	uint32_t reserved18;  /* ordinal48 */
4520 	uint32_t reserved19;  /* ordinal49 */
4521 	uint32_t reserved20;  /* ordinal50 */
4522 	uint32_t reserved21;  /* ordinal51 */
4523 	uint32_t reserved22;  /* ordinal52 */
4524 	uint32_t reserved23;  /* ordinal53 */
4525 	uint32_t reserved24;  /* ordinal54 */
4526 	uint32_t reserved25;  /* ordinal55 */
4527 	uint32_t reserved26;  /* ordinal56 */
4528 	uint32_t reserved27;  /* ordinal57 */
4529 	uint32_t reserved28;  /* ordinal58 */
4530 	uint32_t reserved29;  /* ordinal59 */
4531 	uint32_t reserved30;  /* ordinal60 */
4532 	uint32_t reserved31;  /* ordinal61 */
4533 	uint32_t reserved32;  /* ordinal62 */
4534 	uint32_t reserved33;  /* ordinal63 */
4535 	uint32_t reserved34;  /* ordinal64 */
4536 	uint32_t compute_user_data_0;  /* ordinal65 */
4537 	uint32_t compute_user_data_1;  /* ordinal66 */
4538 	uint32_t compute_user_data_2;  /* ordinal67 */
4539 	uint32_t compute_user_data_3;  /* ordinal68 */
4540 	uint32_t compute_user_data_4;  /* ordinal69 */
4541 	uint32_t compute_user_data_5;  /* ordinal70 */
4542 	uint32_t compute_user_data_6;  /* ordinal71 */
4543 	uint32_t compute_user_data_7;  /* ordinal72 */
4544 	uint32_t compute_user_data_8;  /* ordinal73 */
4545 	uint32_t compute_user_data_9;  /* ordinal74 */
4546 	uint32_t compute_user_data_10;  /* ordinal75 */
4547 	uint32_t compute_user_data_11;  /* ordinal76 */
4548 	uint32_t compute_user_data_12;  /* ordinal77 */
4549 	uint32_t compute_user_data_13;  /* ordinal78 */
4550 	uint32_t compute_user_data_14;  /* ordinal79 */
4551 	uint32_t compute_user_data_15;  /* ordinal80 */
4552 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4553 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4554 	uint32_t reserved35;  /* ordinal83 */
4555 	uint32_t reserved36;  /* ordinal84 */
4556 	uint32_t reserved37;  /* ordinal85 */
4557 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4558 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4559 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4560 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4561 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4562 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4563 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4564 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4565 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4566 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4567 	uint32_t reserved38;  /* ordinal96 */
4568 	uint32_t reserved39;  /* ordinal97 */
4569 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4570 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4571 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4572 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4573 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4574 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4575 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4576 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4577 	uint32_t reserved40;  /* ordinal106 */
4578 	uint32_t reserved41;  /* ordinal107 */
4579 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4580 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4581 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4582 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4583 	uint32_t reserved42;  /* ordinal112 */
4584 	uint32_t reserved43;  /* ordinal113 */
4585 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4586 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4587 	uint32_t cp_packet_id_lo;  /* ordinal116 */
4588 	uint32_t cp_packet_id_hi;  /* ordinal117 */
4589 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4590 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4591 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4592 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4593 	uint32_t gds_save_mask_lo;  /* ordinal122 */
4594 	uint32_t gds_save_mask_hi;  /* ordinal123 */
4595 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4596 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4597 	uint32_t reserved44;  /* ordinal126 */
4598 	uint32_t reserved45;  /* ordinal127 */
4599 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4600 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4601 	uint32_t cp_hqd_active;  /* ordinal130 */
4602 	uint32_t cp_hqd_vmid;  /* ordinal131 */
4603 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4604 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4605 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4606 	uint32_t cp_hqd_quantum;  /* ordinal135 */
4607 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4608 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4609 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4610 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4611 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4612 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4613 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4614 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4615 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4616 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
4617 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4618 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4619 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4620 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
4621 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4622 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4623 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4624 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4625 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4626 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
4627 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4628 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4629 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4630 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4631 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4632 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4633 	uint32_t cp_mqd_control;  /* ordinal162 */
4634 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4635 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4636 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4637 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4638 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
4639 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4640 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4641 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4642 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4643 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4644 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4645 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4646 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4647 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4648 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4649 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4650 	uint32_t cp_hqd_error;  /* ordinal179 */
4651 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4652 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4653 	uint32_t reserved46;  /* ordinal182 */
4654 	uint32_t reserved47;  /* ordinal183 */
4655 	uint32_t reserved48;  /* ordinal184 */
4656 	uint32_t reserved49;  /* ordinal185 */
4657 	uint32_t reserved50;  /* ordinal186 */
4658 	uint32_t reserved51;  /* ordinal187 */
4659 	uint32_t reserved52;  /* ordinal188 */
4660 	uint32_t reserved53;  /* ordinal189 */
4661 	uint32_t reserved54;  /* ordinal190 */
4662 	uint32_t reserved55;  /* ordinal191 */
4663 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
4664 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4665 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4666 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4667 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4668 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4669 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4670 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4671 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4672 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4673 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4674 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4675 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4676 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4677 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4678 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4679 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4680 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4681 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4682 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4683 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4684 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4685 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4686 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4687 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4688 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4689 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4690 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4691 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4692 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4693 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4694 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4695 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4696 	uint32_t reserved56;  /* ordinal225 */
4697 	uint32_t reserved57;  /* ordinal226 */
4698 	uint32_t reserved58;  /* ordinal227 */
4699 	uint32_t set_resources_header;  /* ordinal228 */
4700 	uint32_t set_resources_dw1;  /* ordinal229 */
4701 	uint32_t set_resources_dw2;  /* ordinal230 */
4702 	uint32_t set_resources_dw3;  /* ordinal231 */
4703 	uint32_t set_resources_dw4;  /* ordinal232 */
4704 	uint32_t set_resources_dw5;  /* ordinal233 */
4705 	uint32_t set_resources_dw6;  /* ordinal234 */
4706 	uint32_t set_resources_dw7;  /* ordinal235 */
4707 	uint32_t reserved59;  /* ordinal236 */
4708 	uint32_t reserved60;  /* ordinal237 */
4709 	uint32_t reserved61;  /* ordinal238 */
4710 	uint32_t reserved62;  /* ordinal239 */
4711 	uint32_t reserved63;  /* ordinal240 */
4712 	uint32_t reserved64;  /* ordinal241 */
4713 	uint32_t reserved65;  /* ordinal242 */
4714 	uint32_t reserved66;  /* ordinal243 */
4715 	uint32_t reserved67;  /* ordinal244 */
4716 	uint32_t reserved68;  /* ordinal245 */
4717 	uint32_t reserved69;  /* ordinal246 */
4718 	uint32_t reserved70;  /* ordinal247 */
4719 	uint32_t reserved71;  /* ordinal248 */
4720 	uint32_t reserved72;  /* ordinal249 */
4721 	uint32_t reserved73;  /* ordinal250 */
4722 	uint32_t reserved74;  /* ordinal251 */
4723 	uint32_t reserved75;  /* ordinal252 */
4724 	uint32_t reserved76;  /* ordinal253 */
4725 	uint32_t reserved77;  /* ordinal254 */
4726 	uint32_t reserved78;  /* ordinal255 */
4727 
4728 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4729 };
4730 
4731 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4732 {
4733 	int i, r;
4734 
4735 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4736 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4737 
4738 		if (ring->mqd_obj) {
4739 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4740 			if (unlikely(r != 0))
4741 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4742 
4743 			amdgpu_bo_unpin(ring->mqd_obj);
4744 			amdgpu_bo_unreserve(ring->mqd_obj);
4745 
4746 			amdgpu_bo_unref(&ring->mqd_obj);
4747 			ring->mqd_obj = NULL;
4748 		}
4749 	}
4750 }
4751 
4752 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4753 {
4754 	int r, i, j;
4755 	u32 tmp;
4756 	bool use_doorbell = true;
4757 	u64 hqd_gpu_addr;
4758 	u64 mqd_gpu_addr;
4759 	u64 eop_gpu_addr;
4760 	u64 wb_gpu_addr;
4761 	u32 *buf;
4762 	struct vi_mqd *mqd;
4763 
4764 	/* init the pipes */
4765 	mutex_lock(&adev->srbm_mutex);
4766 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4767 		int me = (i < 4) ? 1 : 2;
4768 		int pipe = (i < 4) ? i : (i - 4);
4769 
4770 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4771 		eop_gpu_addr >>= 8;
4772 
4773 		vi_srbm_select(adev, me, pipe, 0, 0);
4774 
4775 		/* write the EOP addr */
4776 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4777 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4778 
4779 		/* set the VMID assigned */
4780 		WREG32(mmCP_HQD_VMID, 0);
4781 
4782 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4783 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4784 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4785 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
4786 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4787 	}
4788 	vi_srbm_select(adev, 0, 0, 0, 0);
4789 	mutex_unlock(&adev->srbm_mutex);
4790 
4791 	/* init the queues.  Just two for now. */
4792 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4793 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4794 
4795 		if (ring->mqd_obj == NULL) {
4796 			r = amdgpu_bo_create(adev,
4797 					     sizeof(struct vi_mqd),
4798 					     PAGE_SIZE, true,
4799 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4800 					     NULL, &ring->mqd_obj);
4801 			if (r) {
4802 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4803 				return r;
4804 			}
4805 		}
4806 
4807 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4808 		if (unlikely(r != 0)) {
4809 			gfx_v8_0_cp_compute_fini(adev);
4810 			return r;
4811 		}
4812 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4813 				  &mqd_gpu_addr);
4814 		if (r) {
4815 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4816 			gfx_v8_0_cp_compute_fini(adev);
4817 			return r;
4818 		}
4819 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4820 		if (r) {
4821 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4822 			gfx_v8_0_cp_compute_fini(adev);
4823 			return r;
4824 		}
4825 
4826 		/* init the mqd struct */
4827 		memset(buf, 0, sizeof(struct vi_mqd));
4828 
4829 		mqd = (struct vi_mqd *)buf;
4830 		mqd->header = 0xC0310800;
4831 		mqd->compute_pipelinestat_enable = 0x00000001;
4832 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4833 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4834 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4835 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4836 		mqd->compute_misc_reserved = 0x00000003;
4837 
4838 		mutex_lock(&adev->srbm_mutex);
4839 		vi_srbm_select(adev, ring->me,
4840 			       ring->pipe,
4841 			       ring->queue, 0);
4842 
4843 		/* disable wptr polling */
4844 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4845 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4846 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4847 
4848 		mqd->cp_hqd_eop_base_addr_lo =
4849 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
4850 		mqd->cp_hqd_eop_base_addr_hi =
4851 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4852 
4853 		/* enable doorbell? */
4854 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4855 		if (use_doorbell) {
4856 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4857 		} else {
4858 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4859 		}
4860 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4861 		mqd->cp_hqd_pq_doorbell_control = tmp;
4862 
4863 		/* disable the queue if it's active */
4864 		mqd->cp_hqd_dequeue_request = 0;
4865 		mqd->cp_hqd_pq_rptr = 0;
4866 		mqd->cp_hqd_pq_wptr= 0;
4867 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4868 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4869 			for (j = 0; j < adev->usec_timeout; j++) {
4870 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4871 					break;
4872 				udelay(1);
4873 			}
4874 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4875 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4876 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4877 		}
4878 
4879 		/* set the pointer to the MQD */
4880 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4881 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4882 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4883 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4884 
4885 		/* set MQD vmid to 0 */
4886 		tmp = RREG32(mmCP_MQD_CONTROL);
4887 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4888 		WREG32(mmCP_MQD_CONTROL, tmp);
4889 		mqd->cp_mqd_control = tmp;
4890 
4891 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4892 		hqd_gpu_addr = ring->gpu_addr >> 8;
4893 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4894 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4895 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4896 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4897 
4898 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4899 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4900 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4901 				    (order_base_2(ring->ring_size / 4) - 1));
4902 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4903 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4904 #ifdef __BIG_ENDIAN
4905 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4906 #endif
4907 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4908 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4909 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4910 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4911 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4912 		mqd->cp_hqd_pq_control = tmp;
4913 
4914 		/* set the wb address wether it's enabled or not */
4915 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4916 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4917 		mqd->cp_hqd_pq_rptr_report_addr_hi =
4918 			upper_32_bits(wb_gpu_addr) & 0xffff;
4919 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4920 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
4921 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4922 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
4923 
4924 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4925 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4926 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4927 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4928 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4929 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4930 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4931 
4932 		/* enable the doorbell if requested */
4933 		if (use_doorbell) {
4934 			if ((adev->asic_type == CHIP_CARRIZO) ||
4935 			    (adev->asic_type == CHIP_FIJI) ||
4936 			    (adev->asic_type == CHIP_STONEY) ||
4937 			    (adev->asic_type == CHIP_POLARIS11) ||
4938 			    (adev->asic_type == CHIP_POLARIS10)) {
4939 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4940 				       AMDGPU_DOORBELL_KIQ << 2);
4941 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4942 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
4943 			}
4944 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4945 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4946 					    DOORBELL_OFFSET, ring->doorbell_index);
4947 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4948 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4949 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4950 			mqd->cp_hqd_pq_doorbell_control = tmp;
4951 
4952 		} else {
4953 			mqd->cp_hqd_pq_doorbell_control = 0;
4954 		}
4955 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4956 		       mqd->cp_hqd_pq_doorbell_control);
4957 
4958 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4959 		ring->wptr = 0;
4960 		mqd->cp_hqd_pq_wptr = ring->wptr;
4961 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4962 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4963 
4964 		/* set the vmid for the queue */
4965 		mqd->cp_hqd_vmid = 0;
4966 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4967 
4968 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4969 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4970 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4971 		mqd->cp_hqd_persistent_state = tmp;
4972 		if (adev->asic_type == CHIP_STONEY ||
4973 			adev->asic_type == CHIP_POLARIS11 ||
4974 			adev->asic_type == CHIP_POLARIS10) {
4975 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4976 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4977 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4978 		}
4979 
4980 		/* activate the queue */
4981 		mqd->cp_hqd_active = 1;
4982 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4983 
4984 		vi_srbm_select(adev, 0, 0, 0, 0);
4985 		mutex_unlock(&adev->srbm_mutex);
4986 
4987 		amdgpu_bo_kunmap(ring->mqd_obj);
4988 		amdgpu_bo_unreserve(ring->mqd_obj);
4989 	}
4990 
4991 	if (use_doorbell) {
4992 		tmp = RREG32(mmCP_PQ_STATUS);
4993 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4994 		WREG32(mmCP_PQ_STATUS, tmp);
4995 	}
4996 
4997 	gfx_v8_0_cp_compute_enable(adev, true);
4998 
4999 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5000 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5001 
5002 		ring->ready = true;
5003 		r = amdgpu_ring_test_ring(ring);
5004 		if (r)
5005 			ring->ready = false;
5006 	}
5007 
5008 	return 0;
5009 }
5010 
5011 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5012 {
5013 	int r;
5014 
5015 	if (!(adev->flags & AMD_IS_APU))
5016 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5017 
5018 	if (!adev->pp_enabled) {
5019 		if (!adev->firmware.smu_load) {
5020 			/* legacy firmware loading */
5021 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
5022 			if (r)
5023 				return r;
5024 
5025 			r = gfx_v8_0_cp_compute_load_microcode(adev);
5026 			if (r)
5027 				return r;
5028 		} else {
5029 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5030 							AMDGPU_UCODE_ID_CP_CE);
5031 			if (r)
5032 				return -EINVAL;
5033 
5034 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5035 							AMDGPU_UCODE_ID_CP_PFP);
5036 			if (r)
5037 				return -EINVAL;
5038 
5039 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5040 							AMDGPU_UCODE_ID_CP_ME);
5041 			if (r)
5042 				return -EINVAL;
5043 
5044 			if (adev->asic_type == CHIP_TOPAZ) {
5045 				r = gfx_v8_0_cp_compute_load_microcode(adev);
5046 				if (r)
5047 					return r;
5048 			} else {
5049 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5050 										 AMDGPU_UCODE_ID_CP_MEC1);
5051 				if (r)
5052 					return -EINVAL;
5053 			}
5054 		}
5055 	}
5056 
5057 	r = gfx_v8_0_cp_gfx_resume(adev);
5058 	if (r)
5059 		return r;
5060 
5061 	r = gfx_v8_0_cp_compute_resume(adev);
5062 	if (r)
5063 		return r;
5064 
5065 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5066 
5067 	return 0;
5068 }
5069 
5070 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5071 {
5072 	gfx_v8_0_cp_gfx_enable(adev, enable);
5073 	gfx_v8_0_cp_compute_enable(adev, enable);
5074 }
5075 
5076 static int gfx_v8_0_hw_init(void *handle)
5077 {
5078 	int r;
5079 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080 
5081 	gfx_v8_0_init_golden_registers(adev);
5082 	gfx_v8_0_gpu_init(adev);
5083 
5084 	r = gfx_v8_0_rlc_resume(adev);
5085 	if (r)
5086 		return r;
5087 
5088 	r = gfx_v8_0_cp_resume(adev);
5089 
5090 	return r;
5091 }
5092 
5093 static int gfx_v8_0_hw_fini(void *handle)
5094 {
5095 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5096 
5097 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5098 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5099 	gfx_v8_0_cp_enable(adev, false);
5100 	gfx_v8_0_rlc_stop(adev);
5101 	gfx_v8_0_cp_compute_fini(adev);
5102 
5103 	amdgpu_set_powergating_state(adev,
5104 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5105 
5106 	return 0;
5107 }
5108 
5109 static int gfx_v8_0_suspend(void *handle)
5110 {
5111 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112 
5113 	return gfx_v8_0_hw_fini(adev);
5114 }
5115 
5116 static int gfx_v8_0_resume(void *handle)
5117 {
5118 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5119 
5120 	return gfx_v8_0_hw_init(adev);
5121 }
5122 
5123 static bool gfx_v8_0_is_idle(void *handle)
5124 {
5125 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5126 
5127 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5128 		return false;
5129 	else
5130 		return true;
5131 }
5132 
5133 static int gfx_v8_0_wait_for_idle(void *handle)
5134 {
5135 	unsigned i;
5136 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5137 
5138 	for (i = 0; i < adev->usec_timeout; i++) {
5139 		if (gfx_v8_0_is_idle(handle))
5140 			return 0;
5141 
5142 		udelay(1);
5143 	}
5144 	return -ETIMEDOUT;
5145 }
5146 
5147 static bool gfx_v8_0_check_soft_reset(void *handle)
5148 {
5149 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5150 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5151 	u32 tmp;
5152 
5153 	/* GRBM_STATUS */
5154 	tmp = RREG32(mmGRBM_STATUS);
5155 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5156 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5157 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5158 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5159 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5160 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5161 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5162 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5163 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5164 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5165 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5166 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5167 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5168 	}
5169 
5170 	/* GRBM_STATUS2 */
5171 	tmp = RREG32(mmGRBM_STATUS2);
5172 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5173 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5174 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5175 
5176 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5177 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5178 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5179 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5180 						SOFT_RESET_CPF, 1);
5181 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5182 						SOFT_RESET_CPC, 1);
5183 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5184 						SOFT_RESET_CPG, 1);
5185 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5186 						SOFT_RESET_GRBM, 1);
5187 	}
5188 
5189 	/* SRBM_STATUS */
5190 	tmp = RREG32(mmSRBM_STATUS);
5191 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5192 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5195 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5196 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5197 
5198 	if (grbm_soft_reset || srbm_soft_reset) {
5199 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5200 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5201 		return true;
5202 	} else {
5203 		adev->gfx.grbm_soft_reset = 0;
5204 		adev->gfx.srbm_soft_reset = 0;
5205 		return false;
5206 	}
5207 }
5208 
5209 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5210 				  struct amdgpu_ring *ring)
5211 {
5212 	int i;
5213 
5214 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5215 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5216 		u32 tmp;
5217 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5218 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5219 				    DEQUEUE_REQ, 2);
5220 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5221 		for (i = 0; i < adev->usec_timeout; i++) {
5222 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5223 				break;
5224 			udelay(1);
5225 		}
5226 	}
5227 }
5228 
5229 static int gfx_v8_0_pre_soft_reset(void *handle)
5230 {
5231 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5233 
5234 	if ((!adev->gfx.grbm_soft_reset) &&
5235 	    (!adev->gfx.srbm_soft_reset))
5236 		return 0;
5237 
5238 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5239 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5240 
5241 	/* stop the rlc */
5242 	gfx_v8_0_rlc_stop(adev);
5243 
5244 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5245 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5246 		/* Disable GFX parsing/prefetching */
5247 		gfx_v8_0_cp_gfx_enable(adev, false);
5248 
5249 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5250 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5251 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5252 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5253 		int i;
5254 
5255 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5256 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5257 
5258 			gfx_v8_0_inactive_hqd(adev, ring);
5259 		}
5260 		/* Disable MEC parsing/prefetching */
5261 		gfx_v8_0_cp_compute_enable(adev, false);
5262 	}
5263 
5264        return 0;
5265 }
5266 
5267 static int gfx_v8_0_soft_reset(void *handle)
5268 {
5269 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5270 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5271 	u32 tmp;
5272 
5273 	if ((!adev->gfx.grbm_soft_reset) &&
5274 	    (!adev->gfx.srbm_soft_reset))
5275 		return 0;
5276 
5277 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5278 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5279 
5280 	if (grbm_soft_reset || srbm_soft_reset) {
5281 		tmp = RREG32(mmGMCON_DEBUG);
5282 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5283 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5284 		WREG32(mmGMCON_DEBUG, tmp);
5285 		udelay(50);
5286 	}
5287 
5288 	if (grbm_soft_reset) {
5289 		tmp = RREG32(mmGRBM_SOFT_RESET);
5290 		tmp |= grbm_soft_reset;
5291 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5292 		WREG32(mmGRBM_SOFT_RESET, tmp);
5293 		tmp = RREG32(mmGRBM_SOFT_RESET);
5294 
5295 		udelay(50);
5296 
5297 		tmp &= ~grbm_soft_reset;
5298 		WREG32(mmGRBM_SOFT_RESET, tmp);
5299 		tmp = RREG32(mmGRBM_SOFT_RESET);
5300 	}
5301 
5302 	if (srbm_soft_reset) {
5303 		tmp = RREG32(mmSRBM_SOFT_RESET);
5304 		tmp |= srbm_soft_reset;
5305 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5306 		WREG32(mmSRBM_SOFT_RESET, tmp);
5307 		tmp = RREG32(mmSRBM_SOFT_RESET);
5308 
5309 		udelay(50);
5310 
5311 		tmp &= ~srbm_soft_reset;
5312 		WREG32(mmSRBM_SOFT_RESET, tmp);
5313 		tmp = RREG32(mmSRBM_SOFT_RESET);
5314 	}
5315 
5316 	if (grbm_soft_reset || srbm_soft_reset) {
5317 		tmp = RREG32(mmGMCON_DEBUG);
5318 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5319 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5320 		WREG32(mmGMCON_DEBUG, tmp);
5321 	}
5322 
5323 	/* Wait a little for things to settle down */
5324 	udelay(50);
5325 
5326 	return 0;
5327 }
5328 
5329 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5330 			      struct amdgpu_ring *ring)
5331 {
5332 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5333 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5334 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5335 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5336 	vi_srbm_select(adev, 0, 0, 0, 0);
5337 }
5338 
5339 static int gfx_v8_0_post_soft_reset(void *handle)
5340 {
5341 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343 
5344 	if ((!adev->gfx.grbm_soft_reset) &&
5345 	    (!adev->gfx.srbm_soft_reset))
5346 		return 0;
5347 
5348 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350 
5351 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353 		gfx_v8_0_cp_gfx_resume(adev);
5354 
5355 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359 		int i;
5360 
5361 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363 
5364 			gfx_v8_0_init_hqd(adev, ring);
5365 		}
5366 		gfx_v8_0_cp_compute_resume(adev);
5367 	}
5368 	gfx_v8_0_rlc_start(adev);
5369 
5370 	return 0;
5371 }
5372 
5373 /**
5374  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5375  *
5376  * @adev: amdgpu_device pointer
5377  *
5378  * Fetches a GPU clock counter snapshot.
5379  * Returns the 64 bit clock counter snapshot.
5380  */
5381 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5382 {
5383 	uint64_t clock;
5384 
5385 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5386 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5387 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5388 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5389 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5390 	return clock;
5391 }
5392 
5393 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5394 					  uint32_t vmid,
5395 					  uint32_t gds_base, uint32_t gds_size,
5396 					  uint32_t gws_base, uint32_t gws_size,
5397 					  uint32_t oa_base, uint32_t oa_size)
5398 {
5399 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5400 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5401 
5402 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5403 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5404 
5405 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5406 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5407 
5408 	/* GDS Base */
5409 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411 				WRITE_DATA_DST_SEL(0)));
5412 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5413 	amdgpu_ring_write(ring, 0);
5414 	amdgpu_ring_write(ring, gds_base);
5415 
5416 	/* GDS Size */
5417 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419 				WRITE_DATA_DST_SEL(0)));
5420 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5421 	amdgpu_ring_write(ring, 0);
5422 	amdgpu_ring_write(ring, gds_size);
5423 
5424 	/* GWS */
5425 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5426 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5427 				WRITE_DATA_DST_SEL(0)));
5428 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5429 	amdgpu_ring_write(ring, 0);
5430 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5431 
5432 	/* OA */
5433 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5434 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5435 				WRITE_DATA_DST_SEL(0)));
5436 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5437 	amdgpu_ring_write(ring, 0);
5438 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5439 }
5440 
5441 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5442 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5443 	.select_se_sh = &gfx_v8_0_select_se_sh,
5444 };
5445 
5446 static int gfx_v8_0_early_init(void *handle)
5447 {
5448 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5449 
5450 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5451 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5452 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5453 	gfx_v8_0_set_ring_funcs(adev);
5454 	gfx_v8_0_set_irq_funcs(adev);
5455 	gfx_v8_0_set_gds_init(adev);
5456 	gfx_v8_0_set_rlc_funcs(adev);
5457 
5458 	return 0;
5459 }
5460 
5461 static int gfx_v8_0_late_init(void *handle)
5462 {
5463 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5464 	int r;
5465 
5466 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5467 	if (r)
5468 		return r;
5469 
5470 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5471 	if (r)
5472 		return r;
5473 
5474 	/* requires IBs so do in late init after IB pool is initialized */
5475 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5476 	if (r)
5477 		return r;
5478 
5479 	amdgpu_set_powergating_state(adev,
5480 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5481 
5482 	return 0;
5483 }
5484 
5485 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5486 						       bool enable)
5487 {
5488 	if (adev->asic_type == CHIP_POLARIS11)
5489 		/* Send msg to SMU via Powerplay */
5490 		amdgpu_set_powergating_state(adev,
5491 					     AMD_IP_BLOCK_TYPE_SMC,
5492 					     enable ?
5493 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5494 
5495 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5496 }
5497 
5498 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5499 							bool enable)
5500 {
5501 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5502 }
5503 
5504 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5505 		bool enable)
5506 {
5507 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5508 }
5509 
5510 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5511 					  bool enable)
5512 {
5513 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5514 }
5515 
5516 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5517 						bool enable)
5518 {
5519 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5520 
5521 	/* Read any GFX register to wake up GFX. */
5522 	if (!enable)
5523 		RREG32(mmDB_RENDER_CONTROL);
5524 }
5525 
5526 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5527 					  bool enable)
5528 {
5529 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5530 		cz_enable_gfx_cg_power_gating(adev, true);
5531 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5532 			cz_enable_gfx_pipeline_power_gating(adev, true);
5533 	} else {
5534 		cz_enable_gfx_cg_power_gating(adev, false);
5535 		cz_enable_gfx_pipeline_power_gating(adev, false);
5536 	}
5537 }
5538 
5539 static int gfx_v8_0_set_powergating_state(void *handle,
5540 					  enum amd_powergating_state state)
5541 {
5542 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5543 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5544 
5545 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5546 		return 0;
5547 
5548 	switch (adev->asic_type) {
5549 	case CHIP_CARRIZO:
5550 	case CHIP_STONEY:
5551 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5552 			cz_update_gfx_cg_power_gating(adev, enable);
5553 
5554 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5555 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5556 		else
5557 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5558 
5559 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5560 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5561 		else
5562 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5563 		break;
5564 	case CHIP_POLARIS11:
5565 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5566 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5567 		else
5568 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5569 
5570 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5571 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5572 		else
5573 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5574 
5575 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5576 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5577 		else
5578 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5579 		break;
5580 	default:
5581 		break;
5582 	}
5583 
5584 	return 0;
5585 }
5586 
5587 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5588 				     uint32_t reg_addr, uint32_t cmd)
5589 {
5590 	uint32_t data;
5591 
5592 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5593 
5594 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5595 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5596 
5597 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5598 	if (adev->asic_type == CHIP_STONEY)
5599 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5600 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5601 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5602 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5603 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5604 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5605 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5606 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5607 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5608 	else
5609 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5610 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5611 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5612 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5613 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5614 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5615 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5616 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5617 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5618 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5619 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5620 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5621 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5622 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5623 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5624 
5625 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5626 }
5627 
5628 #define MSG_ENTER_RLC_SAFE_MODE     1
5629 #define MSG_EXIT_RLC_SAFE_MODE      0
5630 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5631 #define RLC_GPR_REG2__REQ__SHIFT 0
5632 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5633 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5634 
5635 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5636 {
5637 	u32 data = 0;
5638 	unsigned i;
5639 
5640 	data = RREG32(mmRLC_CNTL);
5641 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5642 		return;
5643 
5644 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5645 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5646 			       AMD_PG_SUPPORT_GFX_DMG))) {
5647 		data |= RLC_GPR_REG2__REQ_MASK;
5648 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5649 		data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5650 		WREG32(mmRLC_GPR_REG2, data);
5651 
5652 		for (i = 0; i < adev->usec_timeout; i++) {
5653 			if ((RREG32(mmRLC_GPM_STAT) &
5654 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5655 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5656 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5657 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5658 				break;
5659 			udelay(1);
5660 		}
5661 
5662 		for (i = 0; i < adev->usec_timeout; i++) {
5663 			if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5664 				break;
5665 			udelay(1);
5666 		}
5667 		adev->gfx.rlc.in_safe_mode = true;
5668 	}
5669 }
5670 
5671 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5672 {
5673 	u32 data;
5674 	unsigned i;
5675 
5676 	data = RREG32(mmRLC_CNTL);
5677 	if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5678 		return;
5679 
5680 	if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5681 	    (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5682 			       AMD_PG_SUPPORT_GFX_DMG))) {
5683 		data |= RLC_GPR_REG2__REQ_MASK;
5684 		data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5685 		data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5686 		WREG32(mmRLC_GPR_REG2, data);
5687 		adev->gfx.rlc.in_safe_mode = false;
5688 	}
5689 
5690 	for (i = 0; i < adev->usec_timeout; i++) {
5691 		if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5692 			break;
5693 		udelay(1);
5694 	}
5695 }
5696 
5697 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5698 {
5699 	u32 data;
5700 	unsigned i;
5701 
5702 	data = RREG32(mmRLC_CNTL);
5703 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5704 		return;
5705 
5706 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5707 		data |= RLC_SAFE_MODE__CMD_MASK;
5708 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5709 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5710 		WREG32(mmRLC_SAFE_MODE, data);
5711 
5712 		for (i = 0; i < adev->usec_timeout; i++) {
5713 			if ((RREG32(mmRLC_GPM_STAT) &
5714 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5715 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5716 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5717 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5718 				break;
5719 			udelay(1);
5720 		}
5721 
5722 		for (i = 0; i < adev->usec_timeout; i++) {
5723 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5724 				break;
5725 			udelay(1);
5726 		}
5727 		adev->gfx.rlc.in_safe_mode = true;
5728 	}
5729 }
5730 
5731 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5732 {
5733 	u32 data = 0;
5734 	unsigned i;
5735 
5736 	data = RREG32(mmRLC_CNTL);
5737 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5738 		return;
5739 
5740 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5741 		if (adev->gfx.rlc.in_safe_mode) {
5742 			data |= RLC_SAFE_MODE__CMD_MASK;
5743 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5744 			WREG32(mmRLC_SAFE_MODE, data);
5745 			adev->gfx.rlc.in_safe_mode = false;
5746 		}
5747 	}
5748 
5749 	for (i = 0; i < adev->usec_timeout; i++) {
5750 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5751 			break;
5752 		udelay(1);
5753 	}
5754 }
5755 
5756 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5757 {
5758 	adev->gfx.rlc.in_safe_mode = true;
5759 }
5760 
5761 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5762 {
5763 	adev->gfx.rlc.in_safe_mode = false;
5764 }
5765 
5766 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5767 	.enter_safe_mode = cz_enter_rlc_safe_mode,
5768 	.exit_safe_mode = cz_exit_rlc_safe_mode
5769 };
5770 
5771 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5772 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
5773 	.exit_safe_mode = iceland_exit_rlc_safe_mode
5774 };
5775 
5776 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5777 	.enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5778 	.exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5779 };
5780 
5781 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5782 						      bool enable)
5783 {
5784 	uint32_t temp, data;
5785 
5786 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5787 
5788 	/* It is disabled by HW by default */
5789 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5790 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5791 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5792 				/* 1 - RLC memory Light sleep */
5793 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5794 
5795 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5796 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5797 		}
5798 
5799 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
5800 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5801 		if (adev->flags & AMD_IS_APU)
5802 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5803 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5804 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5805 		else
5806 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5807 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5808 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5809 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5810 
5811 		if (temp != data)
5812 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5813 
5814 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5815 		gfx_v8_0_wait_for_rlc_serdes(adev);
5816 
5817 		/* 5 - clear mgcg override */
5818 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5819 
5820 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5821 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5822 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5823 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5824 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5825 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5826 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5827 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5828 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5829 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5830 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5831 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5832 			if (temp != data)
5833 				WREG32(mmCGTS_SM_CTRL_REG, data);
5834 		}
5835 		udelay(50);
5836 
5837 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5838 		gfx_v8_0_wait_for_rlc_serdes(adev);
5839 	} else {
5840 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5841 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5842 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5843 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5844 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5845 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5846 		if (temp != data)
5847 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5848 
5849 		/* 2 - disable MGLS in RLC */
5850 		data = RREG32(mmRLC_MEM_SLP_CNTL);
5851 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5852 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5853 			WREG32(mmRLC_MEM_SLP_CNTL, data);
5854 		}
5855 
5856 		/* 3 - disable MGLS in CP */
5857 		data = RREG32(mmCP_MEM_SLP_CNTL);
5858 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5859 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5860 			WREG32(mmCP_MEM_SLP_CNTL, data);
5861 		}
5862 
5863 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5864 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5865 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5866 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5867 		if (temp != data)
5868 			WREG32(mmCGTS_SM_CTRL_REG, data);
5869 
5870 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871 		gfx_v8_0_wait_for_rlc_serdes(adev);
5872 
5873 		/* 6 - set mgcg override */
5874 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5875 
5876 		udelay(50);
5877 
5878 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5879 		gfx_v8_0_wait_for_rlc_serdes(adev);
5880 	}
5881 
5882 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5883 }
5884 
5885 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5886 						      bool enable)
5887 {
5888 	uint32_t temp, temp1, data, data1;
5889 
5890 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5891 
5892 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
5893 
5894 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5895 		/* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5896 		 * Cmp_busy/GFX_Idle interrupts
5897 		 */
5898 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5899 
5900 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5901 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5902 		if (temp1 != data1)
5903 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5904 
5905 		/* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5906 		gfx_v8_0_wait_for_rlc_serdes(adev);
5907 
5908 		/* 3 - clear cgcg override */
5909 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5910 
5911 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5912 		gfx_v8_0_wait_for_rlc_serdes(adev);
5913 
5914 		/* 4 - write cmd to set CGLS */
5915 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5916 
5917 		/* 5 - enable cgcg */
5918 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5919 
5920 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5921 			/* enable cgls*/
5922 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5923 
5924 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5925 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5926 
5927 			if (temp1 != data1)
5928 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5929 		} else {
5930 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5931 		}
5932 
5933 		if (temp != data)
5934 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5935 	} else {
5936 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
5937 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5938 
5939 		/* TEST CGCG */
5940 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5941 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5942 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5943 		if (temp1 != data1)
5944 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5945 
5946 		/* read gfx register to wake up cgcg */
5947 		RREG32(mmCB_CGTT_SCLK_CTRL);
5948 		RREG32(mmCB_CGTT_SCLK_CTRL);
5949 		RREG32(mmCB_CGTT_SCLK_CTRL);
5950 		RREG32(mmCB_CGTT_SCLK_CTRL);
5951 
5952 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5953 		gfx_v8_0_wait_for_rlc_serdes(adev);
5954 
5955 		/* write cmd to Set CGCG Overrride */
5956 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5957 
5958 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5959 		gfx_v8_0_wait_for_rlc_serdes(adev);
5960 
5961 		/* write cmd to Clear CGLS */
5962 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5963 
5964 		/* disable cgcg, cgls should be disabled too. */
5965 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5966 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5967 		if (temp != data)
5968 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5969 	}
5970 
5971 	gfx_v8_0_wait_for_rlc_serdes(adev);
5972 
5973 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
5974 }
5975 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5976 					    bool enable)
5977 {
5978 	if (enable) {
5979 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5980 		 * ===  MGCG + MGLS + TS(CG/LS) ===
5981 		 */
5982 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5984 	} else {
5985 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5986 		 * ===  CGCG + CGLS ===
5987 		 */
5988 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5989 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5990 	}
5991 	return 0;
5992 }
5993 
5994 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5995 					  enum amd_clockgating_state state)
5996 {
5997 	uint32_t msg_id, pp_state;
5998 	void *pp_handle = adev->powerplay.pp_handle;
5999 
6000 	if (state == AMD_CG_STATE_UNGATE)
6001 		pp_state = 0;
6002 	else
6003 		pp_state = PP_STATE_CG | PP_STATE_LS;
6004 
6005 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6006 			PP_BLOCK_GFX_CG,
6007 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6008 			pp_state);
6009 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6010 
6011 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6012 			PP_BLOCK_GFX_MG,
6013 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6014 			pp_state);
6015 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6016 
6017 	return 0;
6018 }
6019 
6020 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6021 					  enum amd_clockgating_state state)
6022 {
6023 	uint32_t msg_id, pp_state;
6024 	void *pp_handle = adev->powerplay.pp_handle;
6025 
6026 	if (state == AMD_CG_STATE_UNGATE)
6027 		pp_state = 0;
6028 	else
6029 		pp_state = PP_STATE_CG | PP_STATE_LS;
6030 
6031 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6032 			PP_BLOCK_GFX_CG,
6033 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6034 			pp_state);
6035 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6036 
6037 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6038 			PP_BLOCK_GFX_3D,
6039 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6040 			pp_state);
6041 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6042 
6043 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6044 			PP_BLOCK_GFX_MG,
6045 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6046 			pp_state);
6047 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6048 
6049 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6050 			PP_BLOCK_GFX_RLC,
6051 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6052 			pp_state);
6053 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6054 
6055 	msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6056 			PP_BLOCK_GFX_CP,
6057 			PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6058 			pp_state);
6059 	amd_set_clockgating_by_smu(pp_handle, msg_id);
6060 
6061 	return 0;
6062 }
6063 
6064 static int gfx_v8_0_set_clockgating_state(void *handle,
6065 					  enum amd_clockgating_state state)
6066 {
6067 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6068 
6069 	switch (adev->asic_type) {
6070 	case CHIP_FIJI:
6071 	case CHIP_CARRIZO:
6072 	case CHIP_STONEY:
6073 		gfx_v8_0_update_gfx_clock_gating(adev,
6074 						 state == AMD_CG_STATE_GATE ? true : false);
6075 		break;
6076 	case CHIP_TONGA:
6077 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6078 		break;
6079 	case CHIP_POLARIS10:
6080 	case CHIP_POLARIS11:
6081 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6082 		break;
6083 	default:
6084 		break;
6085 	}
6086 	return 0;
6087 }
6088 
6089 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6090 {
6091 	return ring->adev->wb.wb[ring->rptr_offs];
6092 }
6093 
6094 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6095 {
6096 	struct amdgpu_device *adev = ring->adev;
6097 
6098 	if (ring->use_doorbell)
6099 		/* XXX check if swapping is necessary on BE */
6100 		return ring->adev->wb.wb[ring->wptr_offs];
6101 	else
6102 		return RREG32(mmCP_RB0_WPTR);
6103 }
6104 
6105 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6106 {
6107 	struct amdgpu_device *adev = ring->adev;
6108 
6109 	if (ring->use_doorbell) {
6110 		/* XXX check if swapping is necessary on BE */
6111 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6112 		WDOORBELL32(ring->doorbell_index, ring->wptr);
6113 	} else {
6114 		WREG32(mmCP_RB0_WPTR, ring->wptr);
6115 		(void)RREG32(mmCP_RB0_WPTR);
6116 	}
6117 }
6118 
6119 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6120 {
6121 	u32 ref_and_mask, reg_mem_engine;
6122 
6123 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6124 		switch (ring->me) {
6125 		case 1:
6126 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6127 			break;
6128 		case 2:
6129 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6130 			break;
6131 		default:
6132 			return;
6133 		}
6134 		reg_mem_engine = 0;
6135 	} else {
6136 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6137 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6138 	}
6139 
6140 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6141 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6142 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6143 				 reg_mem_engine));
6144 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6145 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6146 	amdgpu_ring_write(ring, ref_and_mask);
6147 	amdgpu_ring_write(ring, ref_and_mask);
6148 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6149 }
6150 
6151 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6152 {
6153 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6154 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6155 				 WRITE_DATA_DST_SEL(0) |
6156 				 WR_CONFIRM));
6157 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6158 	amdgpu_ring_write(ring, 0);
6159 	amdgpu_ring_write(ring, 1);
6160 
6161 }
6162 
6163 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6164 				      struct amdgpu_ib *ib,
6165 				      unsigned vm_id, bool ctx_switch)
6166 {
6167 	u32 header, control = 0;
6168 
6169 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6170 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6171 	else
6172 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6173 
6174 	control |= ib->length_dw | (vm_id << 24);
6175 
6176 	amdgpu_ring_write(ring, header);
6177 	amdgpu_ring_write(ring,
6178 #ifdef __BIG_ENDIAN
6179 			  (2 << 0) |
6180 #endif
6181 			  (ib->gpu_addr & 0xFFFFFFFC));
6182 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6183 	amdgpu_ring_write(ring, control);
6184 }
6185 
6186 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6187 					  struct amdgpu_ib *ib,
6188 					  unsigned vm_id, bool ctx_switch)
6189 {
6190 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6191 
6192 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6193 	amdgpu_ring_write(ring,
6194 #ifdef __BIG_ENDIAN
6195 				(2 << 0) |
6196 #endif
6197 				(ib->gpu_addr & 0xFFFFFFFC));
6198 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6199 	amdgpu_ring_write(ring, control);
6200 }
6201 
6202 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6203 					 u64 seq, unsigned flags)
6204 {
6205 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6206 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6207 
6208 	/* EVENT_WRITE_EOP - flush caches, send int */
6209 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6210 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6211 				 EOP_TC_ACTION_EN |
6212 				 EOP_TC_WB_ACTION_EN |
6213 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6214 				 EVENT_INDEX(5)));
6215 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6216 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6217 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6218 	amdgpu_ring_write(ring, lower_32_bits(seq));
6219 	amdgpu_ring_write(ring, upper_32_bits(seq));
6220 
6221 }
6222 
6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6224 {
6225 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6226 	uint32_t seq = ring->fence_drv.sync_seq;
6227 	uint64_t addr = ring->fence_drv.gpu_addr;
6228 
6229 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6230 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6231 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6232 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6233 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6234 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6235 	amdgpu_ring_write(ring, seq);
6236 	amdgpu_ring_write(ring, 0xffffffff);
6237 	amdgpu_ring_write(ring, 4); /* poll interval */
6238 }
6239 
6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6241 					unsigned vm_id, uint64_t pd_addr)
6242 {
6243 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6244 
6245 	/* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6246 	if (usepfp)
6247 		amdgpu_ring_insert_nop(ring, 128);
6248 
6249 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6250 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6251 				 WRITE_DATA_DST_SEL(0)) |
6252 				 WR_CONFIRM);
6253 	if (vm_id < 8) {
6254 		amdgpu_ring_write(ring,
6255 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6256 	} else {
6257 		amdgpu_ring_write(ring,
6258 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6259 	}
6260 	amdgpu_ring_write(ring, 0);
6261 	amdgpu_ring_write(ring, pd_addr >> 12);
6262 
6263 	/* bits 0-15 are the VM contexts0-15 */
6264 	/* invalidate the cache */
6265 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6266 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6267 				 WRITE_DATA_DST_SEL(0)));
6268 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6269 	amdgpu_ring_write(ring, 0);
6270 	amdgpu_ring_write(ring, 1 << vm_id);
6271 
6272 	/* wait for the invalidate to complete */
6273 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6274 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6275 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6276 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6277 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6278 	amdgpu_ring_write(ring, 0);
6279 	amdgpu_ring_write(ring, 0); /* ref */
6280 	amdgpu_ring_write(ring, 0); /* mask */
6281 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6282 
6283 	/* compute doesn't have PFP */
6284 	if (usepfp) {
6285 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6286 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6287 		amdgpu_ring_write(ring, 0x0);
6288 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6289 		amdgpu_ring_insert_nop(ring, 128);
6290 	}
6291 }
6292 
6293 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6294 {
6295 	return ring->adev->wb.wb[ring->wptr_offs];
6296 }
6297 
6298 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6299 {
6300 	struct amdgpu_device *adev = ring->adev;
6301 
6302 	/* XXX check if swapping is necessary on BE */
6303 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6304 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6305 }
6306 
6307 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6308 					     u64 addr, u64 seq,
6309 					     unsigned flags)
6310 {
6311 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6312 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6313 
6314 	/* RELEASE_MEM - flush caches, send int */
6315 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6316 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6317 				 EOP_TC_ACTION_EN |
6318 				 EOP_TC_WB_ACTION_EN |
6319 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6320 				 EVENT_INDEX(5)));
6321 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6322 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6323 	amdgpu_ring_write(ring, upper_32_bits(addr));
6324 	amdgpu_ring_write(ring, lower_32_bits(seq));
6325 	amdgpu_ring_write(ring, upper_32_bits(seq));
6326 }
6327 
6328 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6329 {
6330 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6331 	amdgpu_ring_write(ring, 0);
6332 }
6333 
6334 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6335 {
6336 	uint32_t dw2 = 0;
6337 
6338 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6339 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6340 		/* set load_global_config & load_global_uconfig */
6341 		dw2 |= 0x8001;
6342 		/* set load_cs_sh_regs */
6343 		dw2 |= 0x01000000;
6344 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6345 		dw2 |= 0x10002;
6346 
6347 		/* set load_ce_ram if preamble presented */
6348 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6349 			dw2 |= 0x10000000;
6350 	} else {
6351 		/* still load_ce_ram if this is the first time preamble presented
6352 		 * although there is no context switch happens.
6353 		 */
6354 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6355 			dw2 |= 0x10000000;
6356 	}
6357 
6358 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6359 	amdgpu_ring_write(ring, dw2);
6360 	amdgpu_ring_write(ring, 0);
6361 }
6362 
6363 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6364 {
6365 	return
6366 		4; /* gfx_v8_0_ring_emit_ib_gfx */
6367 }
6368 
6369 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6370 {
6371 	return
6372 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6373 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6374 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6375 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6376 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6377 		256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6378 		2 + /* gfx_v8_ring_emit_sb */
6379 		3; /* gfx_v8_ring_emit_cntxcntl */
6380 }
6381 
6382 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6383 {
6384 	return
6385 		4; /* gfx_v8_0_ring_emit_ib_compute */
6386 }
6387 
6388 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6389 {
6390 	return
6391 		20 + /* gfx_v8_0_ring_emit_gds_switch */
6392 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
6393 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6394 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6395 		17 + /* gfx_v8_0_ring_emit_vm_flush */
6396 		7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6397 }
6398 
6399 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6400 						 enum amdgpu_interrupt_state state)
6401 {
6402 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6403 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6404 }
6405 
6406 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6407 						     int me, int pipe,
6408 						     enum amdgpu_interrupt_state state)
6409 {
6410 	/*
6411 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6412 	 * handles the setting of interrupts for this specific pipe. All other
6413 	 * pipes' interrupts are set by amdkfd.
6414 	 */
6415 
6416 	if (me == 1) {
6417 		switch (pipe) {
6418 		case 0:
6419 			break;
6420 		default:
6421 			DRM_DEBUG("invalid pipe %d\n", pipe);
6422 			return;
6423 		}
6424 	} else {
6425 		DRM_DEBUG("invalid me %d\n", me);
6426 		return;
6427 	}
6428 
6429 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6430 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6431 }
6432 
6433 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6434 					     struct amdgpu_irq_src *source,
6435 					     unsigned type,
6436 					     enum amdgpu_interrupt_state state)
6437 {
6438 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6439 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6440 
6441 	return 0;
6442 }
6443 
6444 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6445 					      struct amdgpu_irq_src *source,
6446 					      unsigned type,
6447 					      enum amdgpu_interrupt_state state)
6448 {
6449 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6450 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6451 
6452 	return 0;
6453 }
6454 
6455 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6456 					    struct amdgpu_irq_src *src,
6457 					    unsigned type,
6458 					    enum amdgpu_interrupt_state state)
6459 {
6460 	switch (type) {
6461 	case AMDGPU_CP_IRQ_GFX_EOP:
6462 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6463 		break;
6464 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6465 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6466 		break;
6467 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6468 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6469 		break;
6470 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6471 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6472 		break;
6473 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6474 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6475 		break;
6476 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6477 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6478 		break;
6479 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6480 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6481 		break;
6482 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6483 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6484 		break;
6485 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6486 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6487 		break;
6488 	default:
6489 		break;
6490 	}
6491 	return 0;
6492 }
6493 
6494 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6495 			    struct amdgpu_irq_src *source,
6496 			    struct amdgpu_iv_entry *entry)
6497 {
6498 	int i;
6499 	u8 me_id, pipe_id, queue_id;
6500 	struct amdgpu_ring *ring;
6501 
6502 	DRM_DEBUG("IH: CP EOP\n");
6503 	me_id = (entry->ring_id & 0x0c) >> 2;
6504 	pipe_id = (entry->ring_id & 0x03) >> 0;
6505 	queue_id = (entry->ring_id & 0x70) >> 4;
6506 
6507 	switch (me_id) {
6508 	case 0:
6509 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6510 		break;
6511 	case 1:
6512 	case 2:
6513 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6514 			ring = &adev->gfx.compute_ring[i];
6515 			/* Per-queue interrupt is supported for MEC starting from VI.
6516 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6517 			  */
6518 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6519 				amdgpu_fence_process(ring);
6520 		}
6521 		break;
6522 	}
6523 	return 0;
6524 }
6525 
6526 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6527 				 struct amdgpu_irq_src *source,
6528 				 struct amdgpu_iv_entry *entry)
6529 {
6530 	DRM_ERROR("Illegal register access in command stream\n");
6531 	schedule_work(&adev->reset_work);
6532 	return 0;
6533 }
6534 
6535 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6536 				  struct amdgpu_irq_src *source,
6537 				  struct amdgpu_iv_entry *entry)
6538 {
6539 	DRM_ERROR("Illegal instruction in command stream\n");
6540 	schedule_work(&adev->reset_work);
6541 	return 0;
6542 }
6543 
6544 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6545 	.name = "gfx_v8_0",
6546 	.early_init = gfx_v8_0_early_init,
6547 	.late_init = gfx_v8_0_late_init,
6548 	.sw_init = gfx_v8_0_sw_init,
6549 	.sw_fini = gfx_v8_0_sw_fini,
6550 	.hw_init = gfx_v8_0_hw_init,
6551 	.hw_fini = gfx_v8_0_hw_fini,
6552 	.suspend = gfx_v8_0_suspend,
6553 	.resume = gfx_v8_0_resume,
6554 	.is_idle = gfx_v8_0_is_idle,
6555 	.wait_for_idle = gfx_v8_0_wait_for_idle,
6556 	.check_soft_reset = gfx_v8_0_check_soft_reset,
6557 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
6558 	.soft_reset = gfx_v8_0_soft_reset,
6559 	.post_soft_reset = gfx_v8_0_post_soft_reset,
6560 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
6561 	.set_powergating_state = gfx_v8_0_set_powergating_state,
6562 };
6563 
6564 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6565 	.get_rptr = gfx_v8_0_ring_get_rptr,
6566 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6567 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6568 	.parse_cs = NULL,
6569 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6570 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6571 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6572 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6573 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6574 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6575 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6576 	.test_ring = gfx_v8_0_ring_test_ring,
6577 	.test_ib = gfx_v8_0_ring_test_ib,
6578 	.insert_nop = amdgpu_ring_insert_nop,
6579 	.pad_ib = amdgpu_ring_generic_pad_ib,
6580 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
6581 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6582 	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6583 	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6584 };
6585 
6586 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6587 	.get_rptr = gfx_v8_0_ring_get_rptr,
6588 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
6589 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
6590 	.parse_cs = NULL,
6591 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
6592 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
6593 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6594 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6595 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6596 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6597 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6598 	.test_ring = gfx_v8_0_ring_test_ring,
6599 	.test_ib = gfx_v8_0_ring_test_ib,
6600 	.insert_nop = amdgpu_ring_insert_nop,
6601 	.pad_ib = amdgpu_ring_generic_pad_ib,
6602 	.get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6603 	.get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6604 };
6605 
6606 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6607 {
6608 	int i;
6609 
6610 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6611 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6612 
6613 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6614 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6615 }
6616 
6617 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6618 	.set = gfx_v8_0_set_eop_interrupt_state,
6619 	.process = gfx_v8_0_eop_irq,
6620 };
6621 
6622 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6623 	.set = gfx_v8_0_set_priv_reg_fault_state,
6624 	.process = gfx_v8_0_priv_reg_irq,
6625 };
6626 
6627 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6628 	.set = gfx_v8_0_set_priv_inst_fault_state,
6629 	.process = gfx_v8_0_priv_inst_irq,
6630 };
6631 
6632 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6633 {
6634 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6635 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6636 
6637 	adev->gfx.priv_reg_irq.num_types = 1;
6638 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6639 
6640 	adev->gfx.priv_inst_irq.num_types = 1;
6641 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6642 }
6643 
6644 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6645 {
6646 	switch (adev->asic_type) {
6647 	case CHIP_TOPAZ:
6648 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6649 		break;
6650 	case CHIP_STONEY:
6651 	case CHIP_CARRIZO:
6652 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
6653 		break;
6654 	default:
6655 		adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6656 		break;
6657 	}
6658 }
6659 
6660 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6661 {
6662 	/* init asci gds info */
6663 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6664 	adev->gds.gws.total_size = 64;
6665 	adev->gds.oa.total_size = 16;
6666 
6667 	if (adev->gds.mem.total_size == 64 * 1024) {
6668 		adev->gds.mem.gfx_partition_size = 4096;
6669 		adev->gds.mem.cs_partition_size = 4096;
6670 
6671 		adev->gds.gws.gfx_partition_size = 4;
6672 		adev->gds.gws.cs_partition_size = 4;
6673 
6674 		adev->gds.oa.gfx_partition_size = 4;
6675 		adev->gds.oa.cs_partition_size = 1;
6676 	} else {
6677 		adev->gds.mem.gfx_partition_size = 1024;
6678 		adev->gds.mem.cs_partition_size = 1024;
6679 
6680 		adev->gds.gws.gfx_partition_size = 16;
6681 		adev->gds.gws.cs_partition_size = 16;
6682 
6683 		adev->gds.oa.gfx_partition_size = 4;
6684 		adev->gds.oa.cs_partition_size = 4;
6685 	}
6686 }
6687 
6688 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6689 						 u32 bitmap)
6690 {
6691 	u32 data;
6692 
6693 	if (!bitmap)
6694 		return;
6695 
6696 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6697 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6698 
6699 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6700 }
6701 
6702 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6703 {
6704 	u32 data, mask;
6705 
6706 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6707 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6708 
6709 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6710 
6711 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6712 }
6713 
6714 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6715 {
6716 	int i, j, k, counter, active_cu_number = 0;
6717 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6718 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6719 	unsigned disable_masks[4 * 2];
6720 
6721 	memset(cu_info, 0, sizeof(*cu_info));
6722 
6723 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6724 
6725 	mutex_lock(&adev->grbm_idx_mutex);
6726 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6727 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6728 			mask = 1;
6729 			ao_bitmap = 0;
6730 			counter = 0;
6731 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6732 			if (i < 4 && j < 2)
6733 				gfx_v8_0_set_user_cu_inactive_bitmap(
6734 					adev, disable_masks[i * 2 + j]);
6735 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6736 			cu_info->bitmap[i][j] = bitmap;
6737 
6738 			for (k = 0; k < 16; k ++) {
6739 				if (bitmap & mask) {
6740 					if (counter < 2)
6741 						ao_bitmap |= mask;
6742 					counter ++;
6743 				}
6744 				mask <<= 1;
6745 			}
6746 			active_cu_number += counter;
6747 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6748 		}
6749 	}
6750 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6751 	mutex_unlock(&adev->grbm_idx_mutex);
6752 
6753 	cu_info->number = active_cu_number;
6754 	cu_info->ao_cu_mask = ao_cu_mask;
6755 }
6756