xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision e5c86679d5e864947a52fb31e45a425dea3e7fa9)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34 
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37 
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40 
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43 
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #include "smu/smu_7_1_3_d.h"
53 
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56 
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61 
62 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71 
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78 
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82 
83 /* BPM Register Address*/
84 enum {
85 	BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86 	BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87 	BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88 	BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89 	BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90 	BPM_REG_FGCG_MAX
91 };
92 
93 #define RLC_FormatDirectRegListLength        14
94 
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101 
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107 
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134 
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141 
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148 
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168 
169 static const u32 golden_settings_tonga_a11[] =
170 {
171 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188 
189 static const u32 tonga_golden_common_all[] =
190 {
191 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200 
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279 
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292 	mmSQ_CONFIG, 0x07f80000, 0x01180000,
293 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300 
301 static const u32 polaris11_golden_common_all[] =
302 {
303 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310 
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314 	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 	mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331 
332 static const u32 polaris10_golden_common_all[] =
333 {
334 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343 
344 static const u32 fiji_golden_common_all[] =
345 {
346 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357 
358 static const u32 golden_settings_fiji_a10[] =
359 {
360 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372 
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411 
412 static const u32 golden_settings_iceland_a11[] =
413 {
414 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431 
432 static const u32 iceland_golden_common_all[] =
433 {
434 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443 
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511 
512 static const u32 cz_golden_settings_a11[] =
513 {
514 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527 
528 static const u32 cz_golden_common_all[] =
529 {
530 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539 
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618 
619 static const u32 stoney_golden_settings_a11[] =
620 {
621 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632 
633 static const u32 stoney_golden_common_all[] =
634 {
635 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644 
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653 
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr);
661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr);
662 
663 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
664 {
665 	switch (adev->asic_type) {
666 	case CHIP_TOPAZ:
667 		amdgpu_program_register_sequence(adev,
668 						 iceland_mgcg_cgcg_init,
669 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
670 		amdgpu_program_register_sequence(adev,
671 						 golden_settings_iceland_a11,
672 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
673 		amdgpu_program_register_sequence(adev,
674 						 iceland_golden_common_all,
675 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
676 		break;
677 	case CHIP_FIJI:
678 		amdgpu_program_register_sequence(adev,
679 						 fiji_mgcg_cgcg_init,
680 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
681 		amdgpu_program_register_sequence(adev,
682 						 golden_settings_fiji_a10,
683 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
684 		amdgpu_program_register_sequence(adev,
685 						 fiji_golden_common_all,
686 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
687 		break;
688 
689 	case CHIP_TONGA:
690 		amdgpu_program_register_sequence(adev,
691 						 tonga_mgcg_cgcg_init,
692 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
693 		amdgpu_program_register_sequence(adev,
694 						 golden_settings_tonga_a11,
695 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
696 		amdgpu_program_register_sequence(adev,
697 						 tonga_golden_common_all,
698 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
699 		break;
700 	case CHIP_POLARIS11:
701 	case CHIP_POLARIS12:
702 		amdgpu_program_register_sequence(adev,
703 						 golden_settings_polaris11_a11,
704 						 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
705 		amdgpu_program_register_sequence(adev,
706 						 polaris11_golden_common_all,
707 						 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
708 		break;
709 	case CHIP_POLARIS10:
710 		amdgpu_program_register_sequence(adev,
711 						 golden_settings_polaris10_a11,
712 						 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
713 		amdgpu_program_register_sequence(adev,
714 						 polaris10_golden_common_all,
715 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
716 		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
717 		if (adev->pdev->revision == 0xc7 &&
718 		    ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
719 		     (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
720 		     (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
721 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
722 			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
723 		}
724 		break;
725 	case CHIP_CARRIZO:
726 		amdgpu_program_register_sequence(adev,
727 						 cz_mgcg_cgcg_init,
728 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
729 		amdgpu_program_register_sequence(adev,
730 						 cz_golden_settings_a11,
731 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
732 		amdgpu_program_register_sequence(adev,
733 						 cz_golden_common_all,
734 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
735 		break;
736 	case CHIP_STONEY:
737 		amdgpu_program_register_sequence(adev,
738 						 stoney_mgcg_cgcg_init,
739 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
740 		amdgpu_program_register_sequence(adev,
741 						 stoney_golden_settings_a11,
742 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
743 		amdgpu_program_register_sequence(adev,
744 						 stoney_golden_common_all,
745 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
746 		break;
747 	default:
748 		break;
749 	}
750 }
751 
752 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
753 {
754 	adev->gfx.scratch.num_reg = 7;
755 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
757 }
758 
759 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
760 {
761 	struct amdgpu_device *adev = ring->adev;
762 	uint32_t scratch;
763 	uint32_t tmp = 0;
764 	unsigned i;
765 	int r;
766 
767 	r = amdgpu_gfx_scratch_get(adev, &scratch);
768 	if (r) {
769 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
770 		return r;
771 	}
772 	WREG32(scratch, 0xCAFEDEAD);
773 	r = amdgpu_ring_alloc(ring, 3);
774 	if (r) {
775 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
776 			  ring->idx, r);
777 		amdgpu_gfx_scratch_free(adev, scratch);
778 		return r;
779 	}
780 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
781 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
782 	amdgpu_ring_write(ring, 0xDEADBEEF);
783 	amdgpu_ring_commit(ring);
784 
785 	for (i = 0; i < adev->usec_timeout; i++) {
786 		tmp = RREG32(scratch);
787 		if (tmp == 0xDEADBEEF)
788 			break;
789 		DRM_UDELAY(1);
790 	}
791 	if (i < adev->usec_timeout) {
792 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
793 			 ring->idx, i);
794 	} else {
795 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
796 			  ring->idx, scratch, tmp);
797 		r = -EINVAL;
798 	}
799 	amdgpu_gfx_scratch_free(adev, scratch);
800 	return r;
801 }
802 
803 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
804 {
805 	struct amdgpu_device *adev = ring->adev;
806 	struct amdgpu_ib ib;
807 	struct dma_fence *f = NULL;
808 	uint32_t scratch;
809 	uint32_t tmp = 0;
810 	long r;
811 
812 	r = amdgpu_gfx_scratch_get(adev, &scratch);
813 	if (r) {
814 		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
815 		return r;
816 	}
817 	WREG32(scratch, 0xCAFEDEAD);
818 	memset(&ib, 0, sizeof(ib));
819 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
820 	if (r) {
821 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
822 		goto err1;
823 	}
824 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
825 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
826 	ib.ptr[2] = 0xDEADBEEF;
827 	ib.length_dw = 3;
828 
829 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
830 	if (r)
831 		goto err2;
832 
833 	r = dma_fence_wait_timeout(f, false, timeout);
834 	if (r == 0) {
835 		DRM_ERROR("amdgpu: IB test timed out.\n");
836 		r = -ETIMEDOUT;
837 		goto err2;
838 	} else if (r < 0) {
839 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
840 		goto err2;
841 	}
842 	tmp = RREG32(scratch);
843 	if (tmp == 0xDEADBEEF) {
844 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
845 		r = 0;
846 	} else {
847 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
848 			  scratch, tmp);
849 		r = -EINVAL;
850 	}
851 err2:
852 	amdgpu_ib_free(adev, &ib, NULL);
853 	dma_fence_put(f);
854 err1:
855 	amdgpu_gfx_scratch_free(adev, scratch);
856 	return r;
857 }
858 
859 
860 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
861 	release_firmware(adev->gfx.pfp_fw);
862 	adev->gfx.pfp_fw = NULL;
863 	release_firmware(adev->gfx.me_fw);
864 	adev->gfx.me_fw = NULL;
865 	release_firmware(adev->gfx.ce_fw);
866 	adev->gfx.ce_fw = NULL;
867 	release_firmware(adev->gfx.rlc_fw);
868 	adev->gfx.rlc_fw = NULL;
869 	release_firmware(adev->gfx.mec_fw);
870 	adev->gfx.mec_fw = NULL;
871 	if ((adev->asic_type != CHIP_STONEY) &&
872 	    (adev->asic_type != CHIP_TOPAZ))
873 		release_firmware(adev->gfx.mec2_fw);
874 	adev->gfx.mec2_fw = NULL;
875 
876 	kfree(adev->gfx.rlc.register_list_format);
877 }
878 
879 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
880 {
881 	const char *chip_name;
882 	char fw_name[30];
883 	int err;
884 	struct amdgpu_firmware_info *info = NULL;
885 	const struct common_firmware_header *header = NULL;
886 	const struct gfx_firmware_header_v1_0 *cp_hdr;
887 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
888 	unsigned int *tmp = NULL, i;
889 
890 	DRM_DEBUG("\n");
891 
892 	switch (adev->asic_type) {
893 	case CHIP_TOPAZ:
894 		chip_name = "topaz";
895 		break;
896 	case CHIP_TONGA:
897 		chip_name = "tonga";
898 		break;
899 	case CHIP_CARRIZO:
900 		chip_name = "carrizo";
901 		break;
902 	case CHIP_FIJI:
903 		chip_name = "fiji";
904 		break;
905 	case CHIP_POLARIS11:
906 		chip_name = "polaris11";
907 		break;
908 	case CHIP_POLARIS10:
909 		chip_name = "polaris10";
910 		break;
911 	case CHIP_POLARIS12:
912 		chip_name = "polaris12";
913 		break;
914 	case CHIP_STONEY:
915 		chip_name = "stoney";
916 		break;
917 	default:
918 		BUG();
919 	}
920 
921 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
922 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
923 	if (err)
924 		goto out;
925 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
926 	if (err)
927 		goto out;
928 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
929 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
930 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
931 
932 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
933 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
934 	if (err)
935 		goto out;
936 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
937 	if (err)
938 		goto out;
939 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
940 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
941 
942 	/* chain ib ucode isn't formal released, just disable it by far
943 	 * TODO: when ucod ready we should use ucode version to judge if
944 	 * chain-ib support or not.
945 	 */
946 	adev->virt.chained_ib_support = false;
947 
948 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
949 
950 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
951 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
952 	if (err)
953 		goto out;
954 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
955 	if (err)
956 		goto out;
957 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
958 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
959 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
960 
961 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
962 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
963 	if (err)
964 		goto out;
965 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
966 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
967 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
968 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
969 
970 	adev->gfx.rlc.save_and_restore_offset =
971 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
972 	adev->gfx.rlc.clear_state_descriptor_offset =
973 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
974 	adev->gfx.rlc.avail_scratch_ram_locations =
975 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
976 	adev->gfx.rlc.reg_restore_list_size =
977 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
978 	adev->gfx.rlc.reg_list_format_start =
979 			le32_to_cpu(rlc_hdr->reg_list_format_start);
980 	adev->gfx.rlc.reg_list_format_separate_start =
981 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
982 	adev->gfx.rlc.starting_offsets_start =
983 			le32_to_cpu(rlc_hdr->starting_offsets_start);
984 	adev->gfx.rlc.reg_list_format_size_bytes =
985 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
986 	adev->gfx.rlc.reg_list_size_bytes =
987 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
988 
989 	adev->gfx.rlc.register_list_format =
990 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
991 					adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
992 
993 	if (!adev->gfx.rlc.register_list_format) {
994 		err = -ENOMEM;
995 		goto out;
996 	}
997 
998 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
999 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1000 	for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1001 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1002 
1003 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1004 
1005 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1006 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1007 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1008 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1009 
1010 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1011 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1012 	if (err)
1013 		goto out;
1014 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1015 	if (err)
1016 		goto out;
1017 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1018 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1019 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1020 
1021 	if ((adev->asic_type != CHIP_STONEY) &&
1022 	    (adev->asic_type != CHIP_TOPAZ)) {
1023 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1024 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1025 		if (!err) {
1026 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1027 			if (err)
1028 				goto out;
1029 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1030 				adev->gfx.mec2_fw->data;
1031 			adev->gfx.mec2_fw_version =
1032 				le32_to_cpu(cp_hdr->header.ucode_version);
1033 			adev->gfx.mec2_feature_version =
1034 				le32_to_cpu(cp_hdr->ucode_feature_version);
1035 		} else {
1036 			err = 0;
1037 			adev->gfx.mec2_fw = NULL;
1038 		}
1039 	}
1040 
1041 	if (adev->firmware.smu_load) {
1042 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1043 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1044 		info->fw = adev->gfx.pfp_fw;
1045 		header = (const struct common_firmware_header *)info->fw->data;
1046 		adev->firmware.fw_size +=
1047 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1048 
1049 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1050 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1051 		info->fw = adev->gfx.me_fw;
1052 		header = (const struct common_firmware_header *)info->fw->data;
1053 		adev->firmware.fw_size +=
1054 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1055 
1056 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1057 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1058 		info->fw = adev->gfx.ce_fw;
1059 		header = (const struct common_firmware_header *)info->fw->data;
1060 		adev->firmware.fw_size +=
1061 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1062 
1063 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1064 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1065 		info->fw = adev->gfx.rlc_fw;
1066 		header = (const struct common_firmware_header *)info->fw->data;
1067 		adev->firmware.fw_size +=
1068 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1069 
1070 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1071 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1072 		info->fw = adev->gfx.mec_fw;
1073 		header = (const struct common_firmware_header *)info->fw->data;
1074 		adev->firmware.fw_size +=
1075 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1076 
1077 		/* we need account JT in */
1078 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1079 		adev->firmware.fw_size +=
1080 			ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1081 
1082 		if (amdgpu_sriov_vf(adev)) {
1083 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1084 			info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1085 			info->fw = adev->gfx.mec_fw;
1086 			adev->firmware.fw_size +=
1087 				ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1088 		}
1089 
1090 		if (adev->gfx.mec2_fw) {
1091 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1092 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1093 			info->fw = adev->gfx.mec2_fw;
1094 			header = (const struct common_firmware_header *)info->fw->data;
1095 			adev->firmware.fw_size +=
1096 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1097 		}
1098 
1099 	}
1100 
1101 out:
1102 	if (err) {
1103 		dev_err(adev->dev,
1104 			"gfx8: Failed to load firmware \"%s\"\n",
1105 			fw_name);
1106 		release_firmware(adev->gfx.pfp_fw);
1107 		adev->gfx.pfp_fw = NULL;
1108 		release_firmware(adev->gfx.me_fw);
1109 		adev->gfx.me_fw = NULL;
1110 		release_firmware(adev->gfx.ce_fw);
1111 		adev->gfx.ce_fw = NULL;
1112 		release_firmware(adev->gfx.rlc_fw);
1113 		adev->gfx.rlc_fw = NULL;
1114 		release_firmware(adev->gfx.mec_fw);
1115 		adev->gfx.mec_fw = NULL;
1116 		release_firmware(adev->gfx.mec2_fw);
1117 		adev->gfx.mec2_fw = NULL;
1118 	}
1119 	return err;
1120 }
1121 
1122 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1123 				    volatile u32 *buffer)
1124 {
1125 	u32 count = 0, i;
1126 	const struct cs_section_def *sect = NULL;
1127 	const struct cs_extent_def *ext = NULL;
1128 
1129 	if (adev->gfx.rlc.cs_data == NULL)
1130 		return;
1131 	if (buffer == NULL)
1132 		return;
1133 
1134 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1135 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1136 
1137 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1138 	buffer[count++] = cpu_to_le32(0x80000000);
1139 	buffer[count++] = cpu_to_le32(0x80000000);
1140 
1141 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1142 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1143 			if (sect->id == SECT_CONTEXT) {
1144 				buffer[count++] =
1145 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1146 				buffer[count++] = cpu_to_le32(ext->reg_index -
1147 						PACKET3_SET_CONTEXT_REG_START);
1148 				for (i = 0; i < ext->reg_count; i++)
1149 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1150 			} else {
1151 				return;
1152 			}
1153 		}
1154 	}
1155 
1156 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1157 	buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1158 			PACKET3_SET_CONTEXT_REG_START);
1159 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1160 	buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1161 
1162 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1163 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1164 
1165 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1166 	buffer[count++] = cpu_to_le32(0);
1167 }
1168 
1169 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1170 {
1171 	const __le32 *fw_data;
1172 	volatile u32 *dst_ptr;
1173 	int me, i, max_me = 4;
1174 	u32 bo_offset = 0;
1175 	u32 table_offset, table_size;
1176 
1177 	if (adev->asic_type == CHIP_CARRIZO)
1178 		max_me = 5;
1179 
1180 	/* write the cp table buffer */
1181 	dst_ptr = adev->gfx.rlc.cp_table_ptr;
1182 	for (me = 0; me < max_me; me++) {
1183 		if (me == 0) {
1184 			const struct gfx_firmware_header_v1_0 *hdr =
1185 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1186 			fw_data = (const __le32 *)
1187 				(adev->gfx.ce_fw->data +
1188 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1189 			table_offset = le32_to_cpu(hdr->jt_offset);
1190 			table_size = le32_to_cpu(hdr->jt_size);
1191 		} else if (me == 1) {
1192 			const struct gfx_firmware_header_v1_0 *hdr =
1193 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1194 			fw_data = (const __le32 *)
1195 				(adev->gfx.pfp_fw->data +
1196 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1197 			table_offset = le32_to_cpu(hdr->jt_offset);
1198 			table_size = le32_to_cpu(hdr->jt_size);
1199 		} else if (me == 2) {
1200 			const struct gfx_firmware_header_v1_0 *hdr =
1201 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1202 			fw_data = (const __le32 *)
1203 				(adev->gfx.me_fw->data +
1204 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1205 			table_offset = le32_to_cpu(hdr->jt_offset);
1206 			table_size = le32_to_cpu(hdr->jt_size);
1207 		} else if (me == 3) {
1208 			const struct gfx_firmware_header_v1_0 *hdr =
1209 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1210 			fw_data = (const __le32 *)
1211 				(adev->gfx.mec_fw->data +
1212 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1213 			table_offset = le32_to_cpu(hdr->jt_offset);
1214 			table_size = le32_to_cpu(hdr->jt_size);
1215 		} else  if (me == 4) {
1216 			const struct gfx_firmware_header_v1_0 *hdr =
1217 				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1218 			fw_data = (const __le32 *)
1219 				(adev->gfx.mec2_fw->data +
1220 				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1221 			table_offset = le32_to_cpu(hdr->jt_offset);
1222 			table_size = le32_to_cpu(hdr->jt_size);
1223 		}
1224 
1225 		for (i = 0; i < table_size; i ++) {
1226 			dst_ptr[bo_offset + i] =
1227 				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1228 		}
1229 
1230 		bo_offset += table_size;
1231 	}
1232 }
1233 
1234 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1235 {
1236 	int r;
1237 
1238 	/* clear state block */
1239 	if (adev->gfx.rlc.clear_state_obj) {
1240 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1241 		if (unlikely(r != 0))
1242 			dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1243 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1244 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1245 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1246 		adev->gfx.rlc.clear_state_obj = NULL;
1247 	}
1248 
1249 	/* jump table block */
1250 	if (adev->gfx.rlc.cp_table_obj) {
1251 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1252 		if (unlikely(r != 0))
1253 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1254 		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1255 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1256 		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1257 		adev->gfx.rlc.cp_table_obj = NULL;
1258 	}
1259 }
1260 
1261 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1262 {
1263 	volatile u32 *dst_ptr;
1264 	u32 dws;
1265 	const struct cs_section_def *cs_data;
1266 	int r;
1267 
1268 	adev->gfx.rlc.cs_data = vi_cs_data;
1269 
1270 	cs_data = adev->gfx.rlc.cs_data;
1271 
1272 	if (cs_data) {
1273 		/* clear state block */
1274 		adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1275 
1276 		if (adev->gfx.rlc.clear_state_obj == NULL) {
1277 			r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1278 					     AMDGPU_GEM_DOMAIN_VRAM,
1279 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1280 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1281 					     NULL, NULL,
1282 					     &adev->gfx.rlc.clear_state_obj);
1283 			if (r) {
1284 				dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1285 				gfx_v8_0_rlc_fini(adev);
1286 				return r;
1287 			}
1288 		}
1289 		r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1290 		if (unlikely(r != 0)) {
1291 			gfx_v8_0_rlc_fini(adev);
1292 			return r;
1293 		}
1294 		r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1295 				  &adev->gfx.rlc.clear_state_gpu_addr);
1296 		if (r) {
1297 			amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1298 			dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1299 			gfx_v8_0_rlc_fini(adev);
1300 			return r;
1301 		}
1302 
1303 		r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1304 		if (r) {
1305 			dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1306 			gfx_v8_0_rlc_fini(adev);
1307 			return r;
1308 		}
1309 		/* set up the cs buffer */
1310 		dst_ptr = adev->gfx.rlc.cs_ptr;
1311 		gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1312 		amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1313 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1314 	}
1315 
1316 	if ((adev->asic_type == CHIP_CARRIZO) ||
1317 	    (adev->asic_type == CHIP_STONEY)) {
1318 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1319 		if (adev->gfx.rlc.cp_table_obj == NULL) {
1320 			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1321 					     AMDGPU_GEM_DOMAIN_VRAM,
1322 					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1323 					     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1324 					     NULL, NULL,
1325 					     &adev->gfx.rlc.cp_table_obj);
1326 			if (r) {
1327 				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1328 				return r;
1329 			}
1330 		}
1331 
1332 		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1333 		if (unlikely(r != 0)) {
1334 			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1335 			return r;
1336 		}
1337 		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1338 				  &adev->gfx.rlc.cp_table_gpu_addr);
1339 		if (r) {
1340 			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1341 			dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1342 			return r;
1343 		}
1344 		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1345 		if (r) {
1346 			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1347 			return r;
1348 		}
1349 
1350 		cz_init_cp_jump_table(adev);
1351 
1352 		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1353 		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1354 	}
1355 
1356 	return 0;
1357 }
1358 
1359 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1360 {
1361 	int r;
1362 
1363 	if (adev->gfx.mec.hpd_eop_obj) {
1364 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1365 		if (unlikely(r != 0))
1366 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1367 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1368 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1369 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1370 		adev->gfx.mec.hpd_eop_obj = NULL;
1371 	}
1372 }
1373 
1374 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1375 				  struct amdgpu_ring *ring,
1376 				  struct amdgpu_irq_src *irq)
1377 {
1378 	int r = 0;
1379 
1380 	if (amdgpu_sriov_vf(adev)) {
1381 		r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
1382 		if (r)
1383 			return r;
1384 	}
1385 
1386 	ring->adev = NULL;
1387 	ring->ring_obj = NULL;
1388 	ring->use_doorbell = true;
1389 	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1390 	if (adev->gfx.mec2_fw) {
1391 		ring->me = 2;
1392 		ring->pipe = 0;
1393 	} else {
1394 		ring->me = 1;
1395 		ring->pipe = 1;
1396 	}
1397 
1398 	irq->data = ring;
1399 	ring->queue = 0;
1400 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1401 	r = amdgpu_ring_init(adev, ring, 1024,
1402 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1403 	if (r)
1404 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1405 
1406 	return r;
1407 }
1408 
1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1410 				   struct amdgpu_irq_src *irq)
1411 {
1412 	if (amdgpu_sriov_vf(ring->adev))
1413 		amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
1414 
1415 	amdgpu_ring_fini(ring);
1416 	irq->data = NULL;
1417 }
1418 
1419 #define MEC_HPD_SIZE 2048
1420 
1421 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1422 {
1423 	int r;
1424 	u32 *hpd;
1425 
1426 	/*
1427 	 * we assign only 1 pipe because all other pipes will
1428 	 * be handled by KFD
1429 	 */
1430 	adev->gfx.mec.num_mec = 1;
1431 	adev->gfx.mec.num_pipe = 1;
1432 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1433 
1434 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
1435 		r = amdgpu_bo_create(adev,
1436 				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1437 				     PAGE_SIZE, true,
1438 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1439 				     &adev->gfx.mec.hpd_eop_obj);
1440 		if (r) {
1441 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1442 			return r;
1443 		}
1444 	}
1445 
1446 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1447 	if (unlikely(r != 0)) {
1448 		gfx_v8_0_mec_fini(adev);
1449 		return r;
1450 	}
1451 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1452 			  &adev->gfx.mec.hpd_eop_gpu_addr);
1453 	if (r) {
1454 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1455 		gfx_v8_0_mec_fini(adev);
1456 		return r;
1457 	}
1458 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1459 	if (r) {
1460 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1461 		gfx_v8_0_mec_fini(adev);
1462 		return r;
1463 	}
1464 
1465 	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1466 
1467 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1468 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1469 
1470 	return 0;
1471 }
1472 
1473 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1474 {
1475 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1476 
1477 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1478 	kiq->eop_obj = NULL;
1479 }
1480 
1481 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1482 {
1483 	int r;
1484 	u32 *hpd;
1485 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1486 
1487 	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1488 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1489 				    &kiq->eop_gpu_addr, (void **)&hpd);
1490 	if (r) {
1491 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1492 		return r;
1493 	}
1494 
1495 	memset(hpd, 0, MEC_HPD_SIZE);
1496 
1497 	amdgpu_bo_kunmap(kiq->eop_obj);
1498 
1499 	return 0;
1500 }
1501 
1502 static const u32 vgpr_init_compute_shader[] =
1503 {
1504 	0x7e000209, 0x7e020208,
1505 	0x7e040207, 0x7e060206,
1506 	0x7e080205, 0x7e0a0204,
1507 	0x7e0c0203, 0x7e0e0202,
1508 	0x7e100201, 0x7e120200,
1509 	0x7e140209, 0x7e160208,
1510 	0x7e180207, 0x7e1a0206,
1511 	0x7e1c0205, 0x7e1e0204,
1512 	0x7e200203, 0x7e220202,
1513 	0x7e240201, 0x7e260200,
1514 	0x7e280209, 0x7e2a0208,
1515 	0x7e2c0207, 0x7e2e0206,
1516 	0x7e300205, 0x7e320204,
1517 	0x7e340203, 0x7e360202,
1518 	0x7e380201, 0x7e3a0200,
1519 	0x7e3c0209, 0x7e3e0208,
1520 	0x7e400207, 0x7e420206,
1521 	0x7e440205, 0x7e460204,
1522 	0x7e480203, 0x7e4a0202,
1523 	0x7e4c0201, 0x7e4e0200,
1524 	0x7e500209, 0x7e520208,
1525 	0x7e540207, 0x7e560206,
1526 	0x7e580205, 0x7e5a0204,
1527 	0x7e5c0203, 0x7e5e0202,
1528 	0x7e600201, 0x7e620200,
1529 	0x7e640209, 0x7e660208,
1530 	0x7e680207, 0x7e6a0206,
1531 	0x7e6c0205, 0x7e6e0204,
1532 	0x7e700203, 0x7e720202,
1533 	0x7e740201, 0x7e760200,
1534 	0x7e780209, 0x7e7a0208,
1535 	0x7e7c0207, 0x7e7e0206,
1536 	0xbf8a0000, 0xbf810000,
1537 };
1538 
1539 static const u32 sgpr_init_compute_shader[] =
1540 {
1541 	0xbe8a0100, 0xbe8c0102,
1542 	0xbe8e0104, 0xbe900106,
1543 	0xbe920108, 0xbe940100,
1544 	0xbe960102, 0xbe980104,
1545 	0xbe9a0106, 0xbe9c0108,
1546 	0xbe9e0100, 0xbea00102,
1547 	0xbea20104, 0xbea40106,
1548 	0xbea60108, 0xbea80100,
1549 	0xbeaa0102, 0xbeac0104,
1550 	0xbeae0106, 0xbeb00108,
1551 	0xbeb20100, 0xbeb40102,
1552 	0xbeb60104, 0xbeb80106,
1553 	0xbeba0108, 0xbebc0100,
1554 	0xbebe0102, 0xbec00104,
1555 	0xbec20106, 0xbec40108,
1556 	0xbec60100, 0xbec80102,
1557 	0xbee60004, 0xbee70005,
1558 	0xbeea0006, 0xbeeb0007,
1559 	0xbee80008, 0xbee90009,
1560 	0xbefc0000, 0xbf8a0000,
1561 	0xbf810000, 0x00000000,
1562 };
1563 
1564 static const u32 vgpr_init_regs[] =
1565 {
1566 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1567 	mmCOMPUTE_RESOURCE_LIMITS, 0,
1568 	mmCOMPUTE_NUM_THREAD_X, 256*4,
1569 	mmCOMPUTE_NUM_THREAD_Y, 1,
1570 	mmCOMPUTE_NUM_THREAD_Z, 1,
1571 	mmCOMPUTE_PGM_RSRC2, 20,
1572 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1573 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1574 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1575 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1576 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1577 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1578 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1579 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1580 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1581 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1582 };
1583 
1584 static const u32 sgpr1_init_regs[] =
1585 {
1586 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1587 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1588 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1589 	mmCOMPUTE_NUM_THREAD_Y, 1,
1590 	mmCOMPUTE_NUM_THREAD_Z, 1,
1591 	mmCOMPUTE_PGM_RSRC2, 20,
1592 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1593 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1594 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1595 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1596 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1597 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1598 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1599 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1600 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1601 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1602 };
1603 
1604 static const u32 sgpr2_init_regs[] =
1605 {
1606 	mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1607 	mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1608 	mmCOMPUTE_NUM_THREAD_X, 256*5,
1609 	mmCOMPUTE_NUM_THREAD_Y, 1,
1610 	mmCOMPUTE_NUM_THREAD_Z, 1,
1611 	mmCOMPUTE_PGM_RSRC2, 20,
1612 	mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1613 	mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1614 	mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1615 	mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1616 	mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1617 	mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1618 	mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1619 	mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1620 	mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1621 	mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1622 };
1623 
1624 static const u32 sec_ded_counter_registers[] =
1625 {
1626 	mmCPC_EDC_ATC_CNT,
1627 	mmCPC_EDC_SCRATCH_CNT,
1628 	mmCPC_EDC_UCODE_CNT,
1629 	mmCPF_EDC_ATC_CNT,
1630 	mmCPF_EDC_ROQ_CNT,
1631 	mmCPF_EDC_TAG_CNT,
1632 	mmCPG_EDC_ATC_CNT,
1633 	mmCPG_EDC_DMA_CNT,
1634 	mmCPG_EDC_TAG_CNT,
1635 	mmDC_EDC_CSINVOC_CNT,
1636 	mmDC_EDC_RESTORE_CNT,
1637 	mmDC_EDC_STATE_CNT,
1638 	mmGDS_EDC_CNT,
1639 	mmGDS_EDC_GRBM_CNT,
1640 	mmGDS_EDC_OA_DED,
1641 	mmSPI_EDC_CNT,
1642 	mmSQC_ATC_EDC_GATCL1_CNT,
1643 	mmSQC_EDC_CNT,
1644 	mmSQ_EDC_DED_CNT,
1645 	mmSQ_EDC_INFO,
1646 	mmSQ_EDC_SEC_CNT,
1647 	mmTCC_EDC_CNT,
1648 	mmTCP_ATC_EDC_GATCL1_CNT,
1649 	mmTCP_EDC_CNT,
1650 	mmTD_EDC_CNT
1651 };
1652 
1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1654 {
1655 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1656 	struct amdgpu_ib ib;
1657 	struct dma_fence *f = NULL;
1658 	int r, i;
1659 	u32 tmp;
1660 	unsigned total_size, vgpr_offset, sgpr_offset;
1661 	u64 gpu_addr;
1662 
1663 	/* only supported on CZ */
1664 	if (adev->asic_type != CHIP_CARRIZO)
1665 		return 0;
1666 
1667 	/* bail if the compute ring is not ready */
1668 	if (!ring->ready)
1669 		return 0;
1670 
1671 	tmp = RREG32(mmGB_EDC_MODE);
1672 	WREG32(mmGB_EDC_MODE, 0);
1673 
1674 	total_size =
1675 		(((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1676 	total_size +=
1677 		(((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1678 	total_size +=
1679 		(((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1680 	total_size = ALIGN(total_size, 256);
1681 	vgpr_offset = total_size;
1682 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1683 	sgpr_offset = total_size;
1684 	total_size += sizeof(sgpr_init_compute_shader);
1685 
1686 	/* allocate an indirect buffer to put the commands in */
1687 	memset(&ib, 0, sizeof(ib));
1688 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1689 	if (r) {
1690 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1691 		return r;
1692 	}
1693 
1694 	/* load the compute shaders */
1695 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1696 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1697 
1698 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1699 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1700 
1701 	/* init the ib length to 0 */
1702 	ib.length_dw = 0;
1703 
1704 	/* VGPR */
1705 	/* write the register state for the compute dispatch */
1706 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1707 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1708 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1709 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1710 	}
1711 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1712 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1713 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1714 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1715 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1716 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1717 
1718 	/* write dispatch packet */
1719 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1720 	ib.ptr[ib.length_dw++] = 8; /* x */
1721 	ib.ptr[ib.length_dw++] = 1; /* y */
1722 	ib.ptr[ib.length_dw++] = 1; /* z */
1723 	ib.ptr[ib.length_dw++] =
1724 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1725 
1726 	/* write CS partial flush packet */
1727 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1728 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1729 
1730 	/* SGPR1 */
1731 	/* write the register state for the compute dispatch */
1732 	for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1733 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1734 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1735 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1736 	}
1737 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1738 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1739 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1740 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1741 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1742 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1743 
1744 	/* write dispatch packet */
1745 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1746 	ib.ptr[ib.length_dw++] = 8; /* x */
1747 	ib.ptr[ib.length_dw++] = 1; /* y */
1748 	ib.ptr[ib.length_dw++] = 1; /* z */
1749 	ib.ptr[ib.length_dw++] =
1750 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1751 
1752 	/* write CS partial flush packet */
1753 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1754 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1755 
1756 	/* SGPR2 */
1757 	/* write the register state for the compute dispatch */
1758 	for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1759 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1760 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1761 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1762 	}
1763 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1764 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1765 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1766 	ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1767 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1768 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1769 
1770 	/* write dispatch packet */
1771 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1772 	ib.ptr[ib.length_dw++] = 8; /* x */
1773 	ib.ptr[ib.length_dw++] = 1; /* y */
1774 	ib.ptr[ib.length_dw++] = 1; /* z */
1775 	ib.ptr[ib.length_dw++] =
1776 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1777 
1778 	/* write CS partial flush packet */
1779 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1780 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1781 
1782 	/* shedule the ib on the ring */
1783 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1784 	if (r) {
1785 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1786 		goto fail;
1787 	}
1788 
1789 	/* wait for the GPU to finish processing the IB */
1790 	r = dma_fence_wait(f, false);
1791 	if (r) {
1792 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1793 		goto fail;
1794 	}
1795 
1796 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1797 	tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1798 	WREG32(mmGB_EDC_MODE, tmp);
1799 
1800 	tmp = RREG32(mmCC_GC_EDC_CONFIG);
1801 	tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1802 	WREG32(mmCC_GC_EDC_CONFIG, tmp);
1803 
1804 
1805 	/* read back registers to clear the counters */
1806 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1807 		RREG32(sec_ded_counter_registers[i]);
1808 
1809 fail:
1810 	amdgpu_ib_free(adev, &ib, NULL);
1811 	dma_fence_put(f);
1812 
1813 	return r;
1814 }
1815 
1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1817 {
1818 	u32 gb_addr_config;
1819 	u32 mc_shared_chmap, mc_arb_ramcfg;
1820 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1821 	u32 tmp;
1822 	int ret;
1823 
1824 	switch (adev->asic_type) {
1825 	case CHIP_TOPAZ:
1826 		adev->gfx.config.max_shader_engines = 1;
1827 		adev->gfx.config.max_tile_pipes = 2;
1828 		adev->gfx.config.max_cu_per_sh = 6;
1829 		adev->gfx.config.max_sh_per_se = 1;
1830 		adev->gfx.config.max_backends_per_se = 2;
1831 		adev->gfx.config.max_texture_channel_caches = 2;
1832 		adev->gfx.config.max_gprs = 256;
1833 		adev->gfx.config.max_gs_threads = 32;
1834 		adev->gfx.config.max_hw_contexts = 8;
1835 
1836 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1837 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1838 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1839 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1840 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1841 		break;
1842 	case CHIP_FIJI:
1843 		adev->gfx.config.max_shader_engines = 4;
1844 		adev->gfx.config.max_tile_pipes = 16;
1845 		adev->gfx.config.max_cu_per_sh = 16;
1846 		adev->gfx.config.max_sh_per_se = 1;
1847 		adev->gfx.config.max_backends_per_se = 4;
1848 		adev->gfx.config.max_texture_channel_caches = 16;
1849 		adev->gfx.config.max_gprs = 256;
1850 		adev->gfx.config.max_gs_threads = 32;
1851 		adev->gfx.config.max_hw_contexts = 8;
1852 
1853 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1854 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1855 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1856 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1857 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1858 		break;
1859 	case CHIP_POLARIS11:
1860 	case CHIP_POLARIS12:
1861 		ret = amdgpu_atombios_get_gfx_info(adev);
1862 		if (ret)
1863 			return ret;
1864 		adev->gfx.config.max_gprs = 256;
1865 		adev->gfx.config.max_gs_threads = 32;
1866 		adev->gfx.config.max_hw_contexts = 8;
1867 
1868 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1869 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1870 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1871 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1872 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1873 		break;
1874 	case CHIP_POLARIS10:
1875 		ret = amdgpu_atombios_get_gfx_info(adev);
1876 		if (ret)
1877 			return ret;
1878 		adev->gfx.config.max_gprs = 256;
1879 		adev->gfx.config.max_gs_threads = 32;
1880 		adev->gfx.config.max_hw_contexts = 8;
1881 
1882 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1883 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1884 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1885 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1886 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1887 		break;
1888 	case CHIP_TONGA:
1889 		adev->gfx.config.max_shader_engines = 4;
1890 		adev->gfx.config.max_tile_pipes = 8;
1891 		adev->gfx.config.max_cu_per_sh = 8;
1892 		adev->gfx.config.max_sh_per_se = 1;
1893 		adev->gfx.config.max_backends_per_se = 2;
1894 		adev->gfx.config.max_texture_channel_caches = 8;
1895 		adev->gfx.config.max_gprs = 256;
1896 		adev->gfx.config.max_gs_threads = 32;
1897 		adev->gfx.config.max_hw_contexts = 8;
1898 
1899 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1900 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1901 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1902 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1903 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1904 		break;
1905 	case CHIP_CARRIZO:
1906 		adev->gfx.config.max_shader_engines = 1;
1907 		adev->gfx.config.max_tile_pipes = 2;
1908 		adev->gfx.config.max_sh_per_se = 1;
1909 		adev->gfx.config.max_backends_per_se = 2;
1910 
1911 		switch (adev->pdev->revision) {
1912 		case 0xc4:
1913 		case 0x84:
1914 		case 0xc8:
1915 		case 0xcc:
1916 		case 0xe1:
1917 		case 0xe3:
1918 			/* B10 */
1919 			adev->gfx.config.max_cu_per_sh = 8;
1920 			break;
1921 		case 0xc5:
1922 		case 0x81:
1923 		case 0x85:
1924 		case 0xc9:
1925 		case 0xcd:
1926 		case 0xe2:
1927 		case 0xe4:
1928 			/* B8 */
1929 			adev->gfx.config.max_cu_per_sh = 6;
1930 			break;
1931 		case 0xc6:
1932 		case 0xca:
1933 		case 0xce:
1934 		case 0x88:
1935 			/* B6 */
1936 			adev->gfx.config.max_cu_per_sh = 6;
1937 			break;
1938 		case 0xc7:
1939 		case 0x87:
1940 		case 0xcb:
1941 		case 0xe5:
1942 		case 0x89:
1943 		default:
1944 			/* B4 */
1945 			adev->gfx.config.max_cu_per_sh = 4;
1946 			break;
1947 		}
1948 
1949 		adev->gfx.config.max_texture_channel_caches = 2;
1950 		adev->gfx.config.max_gprs = 256;
1951 		adev->gfx.config.max_gs_threads = 32;
1952 		adev->gfx.config.max_hw_contexts = 8;
1953 
1954 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1955 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1956 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1957 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1958 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1959 		break;
1960 	case CHIP_STONEY:
1961 		adev->gfx.config.max_shader_engines = 1;
1962 		adev->gfx.config.max_tile_pipes = 2;
1963 		adev->gfx.config.max_sh_per_se = 1;
1964 		adev->gfx.config.max_backends_per_se = 1;
1965 
1966 		switch (adev->pdev->revision) {
1967 		case 0xc0:
1968 		case 0xc1:
1969 		case 0xc2:
1970 		case 0xc4:
1971 		case 0xc8:
1972 		case 0xc9:
1973 			adev->gfx.config.max_cu_per_sh = 3;
1974 			break;
1975 		case 0xd0:
1976 		case 0xd1:
1977 		case 0xd2:
1978 		default:
1979 			adev->gfx.config.max_cu_per_sh = 2;
1980 			break;
1981 		}
1982 
1983 		adev->gfx.config.max_texture_channel_caches = 2;
1984 		adev->gfx.config.max_gprs = 256;
1985 		adev->gfx.config.max_gs_threads = 16;
1986 		adev->gfx.config.max_hw_contexts = 8;
1987 
1988 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1992 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1993 		break;
1994 	default:
1995 		adev->gfx.config.max_shader_engines = 2;
1996 		adev->gfx.config.max_tile_pipes = 4;
1997 		adev->gfx.config.max_cu_per_sh = 2;
1998 		adev->gfx.config.max_sh_per_se = 1;
1999 		adev->gfx.config.max_backends_per_se = 2;
2000 		adev->gfx.config.max_texture_channel_caches = 4;
2001 		adev->gfx.config.max_gprs = 256;
2002 		adev->gfx.config.max_gs_threads = 32;
2003 		adev->gfx.config.max_hw_contexts = 8;
2004 
2005 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2006 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2007 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2008 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2009 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2010 		break;
2011 	}
2012 
2013 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2014 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2015 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2016 
2017 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2018 	adev->gfx.config.mem_max_burst_length_bytes = 256;
2019 	if (adev->flags & AMD_IS_APU) {
2020 		/* Get memory bank mapping mode. */
2021 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2022 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2023 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2024 
2025 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2026 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2027 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2028 
2029 		/* Validate settings in case only one DIMM installed. */
2030 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2031 			dimm00_addr_map = 0;
2032 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2033 			dimm01_addr_map = 0;
2034 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2035 			dimm10_addr_map = 0;
2036 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2037 			dimm11_addr_map = 0;
2038 
2039 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2040 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2041 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2042 			adev->gfx.config.mem_row_size_in_kb = 2;
2043 		else
2044 			adev->gfx.config.mem_row_size_in_kb = 1;
2045 	} else {
2046 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2047 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2048 		if (adev->gfx.config.mem_row_size_in_kb > 4)
2049 			adev->gfx.config.mem_row_size_in_kb = 4;
2050 	}
2051 
2052 	adev->gfx.config.shader_engine_tile_size = 32;
2053 	adev->gfx.config.num_gpus = 1;
2054 	adev->gfx.config.multi_gpu_tile_size = 64;
2055 
2056 	/* fix up row size */
2057 	switch (adev->gfx.config.mem_row_size_in_kb) {
2058 	case 1:
2059 	default:
2060 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2061 		break;
2062 	case 2:
2063 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2064 		break;
2065 	case 4:
2066 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2067 		break;
2068 	}
2069 	adev->gfx.config.gb_addr_config = gb_addr_config;
2070 
2071 	return 0;
2072 }
2073 
2074 static int gfx_v8_0_sw_init(void *handle)
2075 {
2076 	int i, r;
2077 	struct amdgpu_ring *ring;
2078 	struct amdgpu_kiq *kiq;
2079 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2080 
2081 	/* KIQ event */
2082 	r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
2083 	if (r)
2084 		return r;
2085 
2086 	/* EOP Event */
2087 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2088 	if (r)
2089 		return r;
2090 
2091 	/* Privileged reg */
2092 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2093 	if (r)
2094 		return r;
2095 
2096 	/* Privileged inst */
2097 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2098 	if (r)
2099 		return r;
2100 
2101 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2102 
2103 	gfx_v8_0_scratch_init(adev);
2104 
2105 	r = gfx_v8_0_init_microcode(adev);
2106 	if (r) {
2107 		DRM_ERROR("Failed to load gfx firmware!\n");
2108 		return r;
2109 	}
2110 
2111 	r = gfx_v8_0_rlc_init(adev);
2112 	if (r) {
2113 		DRM_ERROR("Failed to init rlc BOs!\n");
2114 		return r;
2115 	}
2116 
2117 	r = gfx_v8_0_mec_init(adev);
2118 	if (r) {
2119 		DRM_ERROR("Failed to init MEC BOs!\n");
2120 		return r;
2121 	}
2122 
2123 	r = gfx_v8_0_kiq_init(adev);
2124 	if (r) {
2125 		DRM_ERROR("Failed to init KIQ BOs!\n");
2126 		return r;
2127 	}
2128 
2129 	kiq = &adev->gfx.kiq;
2130 	r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2131 	if (r)
2132 		return r;
2133 
2134 	/* set up the gfx ring */
2135 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2136 		ring = &adev->gfx.gfx_ring[i];
2137 		ring->ring_obj = NULL;
2138 		sprintf(ring->name, "gfx");
2139 		/* no gfx doorbells on iceland */
2140 		if (adev->asic_type != CHIP_TOPAZ) {
2141 			ring->use_doorbell = true;
2142 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2143 		}
2144 
2145 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2146 				     AMDGPU_CP_IRQ_GFX_EOP);
2147 		if (r)
2148 			return r;
2149 	}
2150 
2151 	/* set up the compute queues */
2152 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2153 		unsigned irq_type;
2154 
2155 		/* max 32 queues per MEC */
2156 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2157 			DRM_ERROR("Too many (%d) compute rings!\n", i);
2158 			break;
2159 		}
2160 		ring = &adev->gfx.compute_ring[i];
2161 		ring->ring_obj = NULL;
2162 		ring->use_doorbell = true;
2163 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2164 		ring->me = 1; /* first MEC */
2165 		ring->pipe = i / 8;
2166 		ring->queue = i % 8;
2167 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2168 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2169 		/* type-2 packets are deprecated on MEC, use type-3 instead */
2170 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2171 				     irq_type);
2172 		if (r)
2173 			return r;
2174 	}
2175 
2176 	/* reserve GDS, GWS and OA resource for gfx */
2177 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2178 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2179 				    &adev->gds.gds_gfx_bo, NULL, NULL);
2180 	if (r)
2181 		return r;
2182 
2183 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2184 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2185 				    &adev->gds.gws_gfx_bo, NULL, NULL);
2186 	if (r)
2187 		return r;
2188 
2189 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2190 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2191 				    &adev->gds.oa_gfx_bo, NULL, NULL);
2192 	if (r)
2193 		return r;
2194 
2195 	adev->gfx.ce_ram_size = 0x8000;
2196 
2197 	r = gfx_v8_0_gpu_early_init(adev);
2198 	if (r)
2199 		return r;
2200 
2201 	return 0;
2202 }
2203 
2204 static int gfx_v8_0_sw_fini(void *handle)
2205 {
2206 	int i;
2207 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2208 
2209 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2210 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2211 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2212 
2213 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2214 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2215 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2216 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2217 	gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2218 
2219 	gfx_v8_0_kiq_fini(adev);
2220 	gfx_v8_0_mec_fini(adev);
2221 	gfx_v8_0_rlc_fini(adev);
2222 	gfx_v8_0_free_microcode(adev);
2223 
2224 	return 0;
2225 }
2226 
2227 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2228 {
2229 	uint32_t *modearray, *mod2array;
2230 	const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2231 	const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2232 	u32 reg_offset;
2233 
2234 	modearray = adev->gfx.config.tile_mode_array;
2235 	mod2array = adev->gfx.config.macrotile_mode_array;
2236 
2237 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2238 		modearray[reg_offset] = 0;
2239 
2240 	for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2241 		mod2array[reg_offset] = 0;
2242 
2243 	switch (adev->asic_type) {
2244 	case CHIP_TOPAZ:
2245 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2246 				PIPE_CONFIG(ADDR_SURF_P2) |
2247 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2248 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2249 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250 				PIPE_CONFIG(ADDR_SURF_P2) |
2251 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2252 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2253 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 				PIPE_CONFIG(ADDR_SURF_P2) |
2255 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2256 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2257 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2258 				PIPE_CONFIG(ADDR_SURF_P2) |
2259 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2260 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2261 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2262 				PIPE_CONFIG(ADDR_SURF_P2) |
2263 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2264 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2265 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2266 				PIPE_CONFIG(ADDR_SURF_P2) |
2267 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2268 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2269 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2270 				PIPE_CONFIG(ADDR_SURF_P2) |
2271 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2272 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2273 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2274 				PIPE_CONFIG(ADDR_SURF_P2));
2275 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2276 				PIPE_CONFIG(ADDR_SURF_P2) |
2277 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2278 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2279 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2280 				 PIPE_CONFIG(ADDR_SURF_P2) |
2281 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2282 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2283 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2284 				 PIPE_CONFIG(ADDR_SURF_P2) |
2285 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2287 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2288 				 PIPE_CONFIG(ADDR_SURF_P2) |
2289 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2290 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2291 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2292 				 PIPE_CONFIG(ADDR_SURF_P2) |
2293 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2294 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2295 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2296 				 PIPE_CONFIG(ADDR_SURF_P2) |
2297 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2299 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2300 				 PIPE_CONFIG(ADDR_SURF_P2) |
2301 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2303 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2304 				 PIPE_CONFIG(ADDR_SURF_P2) |
2305 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2306 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2308 				 PIPE_CONFIG(ADDR_SURF_P2) |
2309 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2310 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2311 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2312 				 PIPE_CONFIG(ADDR_SURF_P2) |
2313 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2314 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2315 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2316 				 PIPE_CONFIG(ADDR_SURF_P2) |
2317 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2318 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2319 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2320 				 PIPE_CONFIG(ADDR_SURF_P2) |
2321 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2322 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2323 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2324 				 PIPE_CONFIG(ADDR_SURF_P2) |
2325 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2326 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2327 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2328 				 PIPE_CONFIG(ADDR_SURF_P2) |
2329 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2330 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2331 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2332 				 PIPE_CONFIG(ADDR_SURF_P2) |
2333 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2334 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2335 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2336 				 PIPE_CONFIG(ADDR_SURF_P2) |
2337 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2338 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2339 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 				 PIPE_CONFIG(ADDR_SURF_P2) |
2341 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2342 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2343 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2344 				 PIPE_CONFIG(ADDR_SURF_P2) |
2345 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2346 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2347 
2348 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2349 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2350 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2351 				NUM_BANKS(ADDR_SURF_8_BANK));
2352 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2353 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2355 				NUM_BANKS(ADDR_SURF_8_BANK));
2356 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2357 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2359 				NUM_BANKS(ADDR_SURF_8_BANK));
2360 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2361 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2362 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363 				NUM_BANKS(ADDR_SURF_8_BANK));
2364 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2367 				NUM_BANKS(ADDR_SURF_8_BANK));
2368 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2371 				NUM_BANKS(ADDR_SURF_8_BANK));
2372 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375 				NUM_BANKS(ADDR_SURF_8_BANK));
2376 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2377 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2378 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2379 				NUM_BANKS(ADDR_SURF_16_BANK));
2380 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2381 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2382 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2383 				NUM_BANKS(ADDR_SURF_16_BANK));
2384 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2385 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2386 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2387 				 NUM_BANKS(ADDR_SURF_16_BANK));
2388 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2389 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2390 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2391 				 NUM_BANKS(ADDR_SURF_16_BANK));
2392 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2393 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2394 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2395 				 NUM_BANKS(ADDR_SURF_16_BANK));
2396 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2397 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2398 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2399 				 NUM_BANKS(ADDR_SURF_16_BANK));
2400 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2401 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2402 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2403 				 NUM_BANKS(ADDR_SURF_8_BANK));
2404 
2405 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2406 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2407 			    reg_offset != 23)
2408 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2409 
2410 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2411 			if (reg_offset != 7)
2412 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2413 
2414 		break;
2415 	case CHIP_FIJI:
2416 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2419 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2423 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2425 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2427 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2431 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2433 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2435 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2436 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2437 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2439 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2440 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2443 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2444 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2447 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2448 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2449 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2450 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2452 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2453 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2457 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2460 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2461 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2464 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2465 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2467 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2470 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2471 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2472 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2474 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2475 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2476 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2477 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2478 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2479 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2480 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2482 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2484 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2486 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2487 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2488 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2489 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2491 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2492 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2496 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2497 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2499 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2500 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2503 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2504 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2507 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2508 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2509 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2510 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2511 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2512 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2513 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2514 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2515 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2516 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2517 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2518 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2519 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2520 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2521 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2522 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2523 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2524 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2525 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2528 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2529 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2531 				 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2532 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2533 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2534 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2536 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2537 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2538 
2539 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542 				NUM_BANKS(ADDR_SURF_8_BANK));
2543 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2546 				NUM_BANKS(ADDR_SURF_8_BANK));
2547 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2549 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2550 				NUM_BANKS(ADDR_SURF_8_BANK));
2551 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2553 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 				NUM_BANKS(ADDR_SURF_8_BANK));
2555 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2558 				NUM_BANKS(ADDR_SURF_8_BANK));
2559 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2562 				NUM_BANKS(ADDR_SURF_8_BANK));
2563 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 				NUM_BANKS(ADDR_SURF_8_BANK));
2567 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2569 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2570 				NUM_BANKS(ADDR_SURF_8_BANK));
2571 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2573 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2574 				NUM_BANKS(ADDR_SURF_8_BANK));
2575 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 				 NUM_BANKS(ADDR_SURF_8_BANK));
2579 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2581 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2582 				 NUM_BANKS(ADDR_SURF_8_BANK));
2583 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586 				 NUM_BANKS(ADDR_SURF_8_BANK));
2587 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590 				 NUM_BANKS(ADDR_SURF_8_BANK));
2591 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2594 				 NUM_BANKS(ADDR_SURF_4_BANK));
2595 
2596 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2597 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2598 
2599 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2600 			if (reg_offset != 7)
2601 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2602 
2603 		break;
2604 	case CHIP_TONGA:
2605 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2608 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2612 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2616 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2620 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2624 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2625 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2626 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2628 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2629 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2632 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2633 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2636 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2638 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2639 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2642 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2645 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2646 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2648 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2649 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2650 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2651 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2654 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2656 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2657 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2659 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2660 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2661 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2664 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2665 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2666 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2667 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2668 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2669 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2670 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2671 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2672 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2674 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2675 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2676 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2677 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2680 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2681 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2685 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2686 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2688 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2689 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2692 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2693 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2696 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2698 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2699 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2700 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2701 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2703 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2704 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2705 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2706 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2708 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2709 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2713 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2714 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2717 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2718 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2720 				 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2721 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2722 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2723 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724 				 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2726 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727 
2728 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2729 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2730 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2731 				NUM_BANKS(ADDR_SURF_16_BANK));
2732 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2734 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 				NUM_BANKS(ADDR_SURF_16_BANK));
2736 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2739 				NUM_BANKS(ADDR_SURF_16_BANK));
2740 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2741 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2742 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2743 				NUM_BANKS(ADDR_SURF_16_BANK));
2744 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2746 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 				NUM_BANKS(ADDR_SURF_16_BANK));
2748 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2750 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2751 				NUM_BANKS(ADDR_SURF_16_BANK));
2752 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2754 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2755 				NUM_BANKS(ADDR_SURF_16_BANK));
2756 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2758 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759 				NUM_BANKS(ADDR_SURF_16_BANK));
2760 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2761 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2762 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2763 				NUM_BANKS(ADDR_SURF_16_BANK));
2764 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2765 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2766 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2767 				 NUM_BANKS(ADDR_SURF_16_BANK));
2768 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2771 				 NUM_BANKS(ADDR_SURF_16_BANK));
2772 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2775 				 NUM_BANKS(ADDR_SURF_8_BANK));
2776 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2779 				 NUM_BANKS(ADDR_SURF_4_BANK));
2780 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2782 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2783 				 NUM_BANKS(ADDR_SURF_4_BANK));
2784 
2785 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2786 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2787 
2788 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2789 			if (reg_offset != 7)
2790 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2791 
2792 		break;
2793 	case CHIP_POLARIS11:
2794 	case CHIP_POLARIS12:
2795 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2798 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2802 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2806 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2810 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2826 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2827 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2828 				PIPE_CONFIG(ADDR_SURF_P4_16x16));
2829 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2830 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2831 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2835 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2837 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2842 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2843 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2844 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2845 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2846 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2847 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2850 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2851 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2854 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2855 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2862 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2863 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2865 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2868 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2870 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2871 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2874 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2875 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2878 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2886 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2887 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2888 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2890 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2891 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2894 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2895 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2898 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2899 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2900 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2901 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2907 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2909 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2914 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2916 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2917 
2918 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2920 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921 				NUM_BANKS(ADDR_SURF_16_BANK));
2922 
2923 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926 				NUM_BANKS(ADDR_SURF_16_BANK));
2927 
2928 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2930 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2931 				NUM_BANKS(ADDR_SURF_16_BANK));
2932 
2933 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2935 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2936 				NUM_BANKS(ADDR_SURF_16_BANK));
2937 
2938 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2941 				NUM_BANKS(ADDR_SURF_16_BANK));
2942 
2943 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2946 				NUM_BANKS(ADDR_SURF_16_BANK));
2947 
2948 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2951 				NUM_BANKS(ADDR_SURF_16_BANK));
2952 
2953 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2954 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2955 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2956 				NUM_BANKS(ADDR_SURF_16_BANK));
2957 
2958 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2959 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961 				NUM_BANKS(ADDR_SURF_16_BANK));
2962 
2963 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 				NUM_BANKS(ADDR_SURF_16_BANK));
2967 
2968 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 				NUM_BANKS(ADDR_SURF_16_BANK));
2972 
2973 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2975 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2976 				NUM_BANKS(ADDR_SURF_16_BANK));
2977 
2978 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2980 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981 				NUM_BANKS(ADDR_SURF_8_BANK));
2982 
2983 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986 				NUM_BANKS(ADDR_SURF_4_BANK));
2987 
2988 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2989 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2990 
2991 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2992 			if (reg_offset != 7)
2993 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2994 
2995 		break;
2996 	case CHIP_POLARIS10:
2997 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3000 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3004 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3008 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3012 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3018 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3024 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3025 		modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3028 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3029 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3030 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3031 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3032 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3033 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3035 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3036 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3037 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3038 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3040 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3041 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3042 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3043 		modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3044 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3045 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3046 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3047 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3048 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3049 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3051 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3052 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3053 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3055 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3056 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3057 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3058 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3059 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3061 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3062 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3063 		modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3064 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3065 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3066 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3067 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3068 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3069 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3070 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3072 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3073 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3074 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3076 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3077 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3080 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3081 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3084 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3085 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3086 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3087 		modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3088 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3089 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3090 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3091 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3092 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3093 				MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3095 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3096 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3097 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3098 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3100 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3101 				MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3104 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3105 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3106 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3107 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3109 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3110 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3112 				PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3113 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3114 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3115 		modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3116 				PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3117 				MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3118 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3119 
3120 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3122 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3123 				NUM_BANKS(ADDR_SURF_16_BANK));
3124 
3125 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3126 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3127 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3128 				NUM_BANKS(ADDR_SURF_16_BANK));
3129 
3130 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3131 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3132 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3133 				NUM_BANKS(ADDR_SURF_16_BANK));
3134 
3135 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 				NUM_BANKS(ADDR_SURF_16_BANK));
3139 
3140 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3143 				NUM_BANKS(ADDR_SURF_16_BANK));
3144 
3145 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3146 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3147 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3148 				NUM_BANKS(ADDR_SURF_16_BANK));
3149 
3150 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3151 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3152 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3153 				NUM_BANKS(ADDR_SURF_16_BANK));
3154 
3155 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3156 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3157 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3158 				NUM_BANKS(ADDR_SURF_16_BANK));
3159 
3160 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3162 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3163 				NUM_BANKS(ADDR_SURF_16_BANK));
3164 
3165 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3166 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3167 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3168 				NUM_BANKS(ADDR_SURF_16_BANK));
3169 
3170 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3171 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3172 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3173 				NUM_BANKS(ADDR_SURF_16_BANK));
3174 
3175 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3176 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3177 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3178 				NUM_BANKS(ADDR_SURF_8_BANK));
3179 
3180 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3183 				NUM_BANKS(ADDR_SURF_4_BANK));
3184 
3185 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3186 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3187 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3188 				NUM_BANKS(ADDR_SURF_4_BANK));
3189 
3190 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3191 			WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3192 
3193 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3194 			if (reg_offset != 7)
3195 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3196 
3197 		break;
3198 	case CHIP_STONEY:
3199 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 				PIPE_CONFIG(ADDR_SURF_P2) |
3201 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3202 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3203 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204 				PIPE_CONFIG(ADDR_SURF_P2) |
3205 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3206 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3207 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3208 				PIPE_CONFIG(ADDR_SURF_P2) |
3209 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3210 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3211 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3212 				PIPE_CONFIG(ADDR_SURF_P2) |
3213 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3214 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3215 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216 				PIPE_CONFIG(ADDR_SURF_P2) |
3217 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3218 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3219 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3220 				PIPE_CONFIG(ADDR_SURF_P2) |
3221 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3222 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3223 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224 				PIPE_CONFIG(ADDR_SURF_P2) |
3225 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3226 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3227 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3228 				PIPE_CONFIG(ADDR_SURF_P2));
3229 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3230 				PIPE_CONFIG(ADDR_SURF_P2) |
3231 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3232 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3233 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3234 				 PIPE_CONFIG(ADDR_SURF_P2) |
3235 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3236 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3237 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 				 PIPE_CONFIG(ADDR_SURF_P2) |
3239 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3240 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3241 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3242 				 PIPE_CONFIG(ADDR_SURF_P2) |
3243 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3244 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3245 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3246 				 PIPE_CONFIG(ADDR_SURF_P2) |
3247 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3248 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3249 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3250 				 PIPE_CONFIG(ADDR_SURF_P2) |
3251 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3252 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3253 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3254 				 PIPE_CONFIG(ADDR_SURF_P2) |
3255 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3257 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3258 				 PIPE_CONFIG(ADDR_SURF_P2) |
3259 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3260 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3261 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3262 				 PIPE_CONFIG(ADDR_SURF_P2) |
3263 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3264 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3265 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3266 				 PIPE_CONFIG(ADDR_SURF_P2) |
3267 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3268 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3269 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3270 				 PIPE_CONFIG(ADDR_SURF_P2) |
3271 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3272 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3273 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3274 				 PIPE_CONFIG(ADDR_SURF_P2) |
3275 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3276 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3277 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3278 				 PIPE_CONFIG(ADDR_SURF_P2) |
3279 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3280 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3281 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3282 				 PIPE_CONFIG(ADDR_SURF_P2) |
3283 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3284 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3285 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3286 				 PIPE_CONFIG(ADDR_SURF_P2) |
3287 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3288 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3289 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3290 				 PIPE_CONFIG(ADDR_SURF_P2) |
3291 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3292 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294 				 PIPE_CONFIG(ADDR_SURF_P2) |
3295 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3296 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3297 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3298 				 PIPE_CONFIG(ADDR_SURF_P2) |
3299 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3300 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3301 
3302 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3303 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3304 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3305 				NUM_BANKS(ADDR_SURF_8_BANK));
3306 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3307 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3308 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3309 				NUM_BANKS(ADDR_SURF_8_BANK));
3310 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3311 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3312 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3313 				NUM_BANKS(ADDR_SURF_8_BANK));
3314 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3315 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3316 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3317 				NUM_BANKS(ADDR_SURF_8_BANK));
3318 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3319 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3320 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3321 				NUM_BANKS(ADDR_SURF_8_BANK));
3322 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3325 				NUM_BANKS(ADDR_SURF_8_BANK));
3326 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3327 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3328 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3329 				NUM_BANKS(ADDR_SURF_8_BANK));
3330 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3331 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3332 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3333 				NUM_BANKS(ADDR_SURF_16_BANK));
3334 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3335 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3336 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 				NUM_BANKS(ADDR_SURF_16_BANK));
3338 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3339 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3340 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3341 				 NUM_BANKS(ADDR_SURF_16_BANK));
3342 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3343 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3344 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3345 				 NUM_BANKS(ADDR_SURF_16_BANK));
3346 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3347 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3348 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 				 NUM_BANKS(ADDR_SURF_16_BANK));
3350 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3351 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3352 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3353 				 NUM_BANKS(ADDR_SURF_16_BANK));
3354 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3355 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3356 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3357 				 NUM_BANKS(ADDR_SURF_8_BANK));
3358 
3359 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3360 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3361 			    reg_offset != 23)
3362 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3363 
3364 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3365 			if (reg_offset != 7)
3366 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3367 
3368 		break;
3369 	default:
3370 		dev_warn(adev->dev,
3371 			 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3372 			 adev->asic_type);
3373 
3374 	case CHIP_CARRIZO:
3375 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376 				PIPE_CONFIG(ADDR_SURF_P2) |
3377 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3378 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3379 		modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380 				PIPE_CONFIG(ADDR_SURF_P2) |
3381 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3382 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3383 		modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3384 				PIPE_CONFIG(ADDR_SURF_P2) |
3385 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3386 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3387 		modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3388 				PIPE_CONFIG(ADDR_SURF_P2) |
3389 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3390 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3391 		modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3392 				PIPE_CONFIG(ADDR_SURF_P2) |
3393 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3394 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3395 		modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3396 				PIPE_CONFIG(ADDR_SURF_P2) |
3397 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3398 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3399 		modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3400 				PIPE_CONFIG(ADDR_SURF_P2) |
3401 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3402 				MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3403 		modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3404 				PIPE_CONFIG(ADDR_SURF_P2));
3405 		modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3406 				PIPE_CONFIG(ADDR_SURF_P2) |
3407 				MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3408 				SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3409 		modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3410 				 PIPE_CONFIG(ADDR_SURF_P2) |
3411 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3412 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3413 		modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3414 				 PIPE_CONFIG(ADDR_SURF_P2) |
3415 				 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3416 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3417 		modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3418 				 PIPE_CONFIG(ADDR_SURF_P2) |
3419 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3420 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3421 		modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3422 				 PIPE_CONFIG(ADDR_SURF_P2) |
3423 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3424 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3425 		modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3426 				 PIPE_CONFIG(ADDR_SURF_P2) |
3427 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3428 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3429 		modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3430 				 PIPE_CONFIG(ADDR_SURF_P2) |
3431 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3432 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3433 		modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3434 				 PIPE_CONFIG(ADDR_SURF_P2) |
3435 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3436 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3437 		modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3438 				 PIPE_CONFIG(ADDR_SURF_P2) |
3439 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3440 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3441 		modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3442 				 PIPE_CONFIG(ADDR_SURF_P2) |
3443 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3444 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3445 		modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3446 				 PIPE_CONFIG(ADDR_SURF_P2) |
3447 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3448 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3449 		modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3450 				 PIPE_CONFIG(ADDR_SURF_P2) |
3451 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3452 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3453 		modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3454 				 PIPE_CONFIG(ADDR_SURF_P2) |
3455 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3456 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3457 		modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3458 				 PIPE_CONFIG(ADDR_SURF_P2) |
3459 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3460 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3461 		modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3462 				 PIPE_CONFIG(ADDR_SURF_P2) |
3463 				 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3464 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3465 		modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3466 				 PIPE_CONFIG(ADDR_SURF_P2) |
3467 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3468 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3469 		modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3470 				 PIPE_CONFIG(ADDR_SURF_P2) |
3471 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3472 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3473 		modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3474 				 PIPE_CONFIG(ADDR_SURF_P2) |
3475 				 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3476 				 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3477 
3478 		mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3479 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3480 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3481 				NUM_BANKS(ADDR_SURF_8_BANK));
3482 		mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3483 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3484 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3485 				NUM_BANKS(ADDR_SURF_8_BANK));
3486 		mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3487 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3488 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3489 				NUM_BANKS(ADDR_SURF_8_BANK));
3490 		mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3491 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3492 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3493 				NUM_BANKS(ADDR_SURF_8_BANK));
3494 		mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3495 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3496 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3497 				NUM_BANKS(ADDR_SURF_8_BANK));
3498 		mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3499 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3500 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3501 				NUM_BANKS(ADDR_SURF_8_BANK));
3502 		mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3503 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3504 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3505 				NUM_BANKS(ADDR_SURF_8_BANK));
3506 		mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3507 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3508 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3509 				NUM_BANKS(ADDR_SURF_16_BANK));
3510 		mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3511 				BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3512 				MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3513 				NUM_BANKS(ADDR_SURF_16_BANK));
3514 		mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3515 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3516 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3517 				 NUM_BANKS(ADDR_SURF_16_BANK));
3518 		mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3519 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3520 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3521 				 NUM_BANKS(ADDR_SURF_16_BANK));
3522 		mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3523 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3524 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3525 				 NUM_BANKS(ADDR_SURF_16_BANK));
3526 		mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3527 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3528 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3529 				 NUM_BANKS(ADDR_SURF_16_BANK));
3530 		mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3531 				 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3532 				 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3533 				 NUM_BANKS(ADDR_SURF_8_BANK));
3534 
3535 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3536 			if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3537 			    reg_offset != 23)
3538 				WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3539 
3540 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3541 			if (reg_offset != 7)
3542 				WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3543 
3544 		break;
3545 	}
3546 }
3547 
3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3549 				  u32 se_num, u32 sh_num, u32 instance)
3550 {
3551 	u32 data;
3552 
3553 	if (instance == 0xffffffff)
3554 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3555 	else
3556 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3557 
3558 	if (se_num == 0xffffffff)
3559 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3560 	else
3561 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3562 
3563 	if (sh_num == 0xffffffff)
3564 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3565 	else
3566 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3567 
3568 	WREG32(mmGRBM_GFX_INDEX, data);
3569 }
3570 
3571 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3572 {
3573 	return (u32)((1ULL << bit_width) - 1);
3574 }
3575 
3576 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3577 {
3578 	u32 data, mask;
3579 
3580 	data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3581 		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3582 
3583 	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3584 
3585 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3586 				       adev->gfx.config.max_sh_per_se);
3587 
3588 	return (~data) & mask;
3589 }
3590 
3591 static void
3592 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3593 {
3594 	switch (adev->asic_type) {
3595 	case CHIP_FIJI:
3596 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3597 			  RB_XSEL2(1) | PKR_MAP(2) |
3598 			  PKR_XSEL(1) | PKR_YSEL(1) |
3599 			  SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3600 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3601 			   SE_PAIR_YSEL(2);
3602 		break;
3603 	case CHIP_TONGA:
3604 	case CHIP_POLARIS10:
3605 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3606 			  SE_XSEL(1) | SE_YSEL(1);
3607 		*rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3608 			   SE_PAIR_YSEL(2);
3609 		break;
3610 	case CHIP_TOPAZ:
3611 	case CHIP_CARRIZO:
3612 		*rconf |= RB_MAP_PKR0(2);
3613 		*rconf1 |= 0x0;
3614 		break;
3615 	case CHIP_POLARIS11:
3616 	case CHIP_POLARIS12:
3617 		*rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3618 			  SE_XSEL(1) | SE_YSEL(1);
3619 		*rconf1 |= 0x0;
3620 		break;
3621 	case CHIP_STONEY:
3622 		*rconf |= 0x0;
3623 		*rconf1 |= 0x0;
3624 		break;
3625 	default:
3626 		DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3627 		break;
3628 	}
3629 }
3630 
3631 static void
3632 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3633 					u32 raster_config, u32 raster_config_1,
3634 					unsigned rb_mask, unsigned num_rb)
3635 {
3636 	unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3637 	unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3638 	unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3639 	unsigned rb_per_se = num_rb / num_se;
3640 	unsigned se_mask[4];
3641 	unsigned se;
3642 
3643 	se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3644 	se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3645 	se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3646 	se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3647 
3648 	WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3649 	WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3650 	WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3651 
3652 	if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3653 			     (!se_mask[2] && !se_mask[3]))) {
3654 		raster_config_1 &= ~SE_PAIR_MAP_MASK;
3655 
3656 		if (!se_mask[0] && !se_mask[1]) {
3657 			raster_config_1 |=
3658 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3659 		} else {
3660 			raster_config_1 |=
3661 				SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3662 		}
3663 	}
3664 
3665 	for (se = 0; se < num_se; se++) {
3666 		unsigned raster_config_se = raster_config;
3667 		unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3668 		unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3669 		int idx = (se / 2) * 2;
3670 
3671 		if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3672 			raster_config_se &= ~SE_MAP_MASK;
3673 
3674 			if (!se_mask[idx]) {
3675 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3676 			} else {
3677 				raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3678 			}
3679 		}
3680 
3681 		pkr0_mask &= rb_mask;
3682 		pkr1_mask &= rb_mask;
3683 		if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3684 			raster_config_se &= ~PKR_MAP_MASK;
3685 
3686 			if (!pkr0_mask) {
3687 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3688 			} else {
3689 				raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3690 			}
3691 		}
3692 
3693 		if (rb_per_se >= 2) {
3694 			unsigned rb0_mask = 1 << (se * rb_per_se);
3695 			unsigned rb1_mask = rb0_mask << 1;
3696 
3697 			rb0_mask &= rb_mask;
3698 			rb1_mask &= rb_mask;
3699 			if (!rb0_mask || !rb1_mask) {
3700 				raster_config_se &= ~RB_MAP_PKR0_MASK;
3701 
3702 				if (!rb0_mask) {
3703 					raster_config_se |=
3704 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3705 				} else {
3706 					raster_config_se |=
3707 						RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3708 				}
3709 			}
3710 
3711 			if (rb_per_se > 2) {
3712 				rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3713 				rb1_mask = rb0_mask << 1;
3714 				rb0_mask &= rb_mask;
3715 				rb1_mask &= rb_mask;
3716 				if (!rb0_mask || !rb1_mask) {
3717 					raster_config_se &= ~RB_MAP_PKR1_MASK;
3718 
3719 					if (!rb0_mask) {
3720 						raster_config_se |=
3721 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3722 					} else {
3723 						raster_config_se |=
3724 							RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3725 					}
3726 				}
3727 			}
3728 		}
3729 
3730 		/* GRBM_GFX_INDEX has a different offset on VI */
3731 		gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3732 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3733 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3734 	}
3735 
3736 	/* GRBM_GFX_INDEX has a different offset on VI */
3737 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3738 }
3739 
3740 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3741 {
3742 	int i, j;
3743 	u32 data;
3744 	u32 raster_config = 0, raster_config_1 = 0;
3745 	u32 active_rbs = 0;
3746 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3747 					adev->gfx.config.max_sh_per_se;
3748 	unsigned num_rb_pipes;
3749 
3750 	mutex_lock(&adev->grbm_idx_mutex);
3751 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3752 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3753 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3754 			data = gfx_v8_0_get_rb_active_bitmap(adev);
3755 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3756 					       rb_bitmap_width_per_sh);
3757 		}
3758 	}
3759 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3760 
3761 	adev->gfx.config.backend_enable_mask = active_rbs;
3762 	adev->gfx.config.num_rbs = hweight32(active_rbs);
3763 
3764 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3765 			     adev->gfx.config.max_shader_engines, 16);
3766 
3767 	gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3768 
3769 	if (!adev->gfx.config.backend_enable_mask ||
3770 			adev->gfx.config.num_rbs >= num_rb_pipes) {
3771 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3772 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3773 	} else {
3774 		gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3775 							adev->gfx.config.backend_enable_mask,
3776 							num_rb_pipes);
3777 	}
3778 
3779 	/* cache the values for userspace */
3780 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3781 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3782 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3783 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
3784 				RREG32(mmCC_RB_BACKEND_DISABLE);
3785 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3786 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3787 			adev->gfx.config.rb_config[i][j].raster_config =
3788 				RREG32(mmPA_SC_RASTER_CONFIG);
3789 			adev->gfx.config.rb_config[i][j].raster_config_1 =
3790 				RREG32(mmPA_SC_RASTER_CONFIG_1);
3791 		}
3792 	}
3793 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3794 	mutex_unlock(&adev->grbm_idx_mutex);
3795 }
3796 
3797 /**
3798  * gfx_v8_0_init_compute_vmid - gart enable
3799  *
3800  * @rdev: amdgpu_device pointer
3801  *
3802  * Initialize compute vmid sh_mem registers
3803  *
3804  */
3805 #define DEFAULT_SH_MEM_BASES	(0x6000)
3806 #define FIRST_COMPUTE_VMID	(8)
3807 #define LAST_COMPUTE_VMID	(16)
3808 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3809 {
3810 	int i;
3811 	uint32_t sh_mem_config;
3812 	uint32_t sh_mem_bases;
3813 
3814 	/*
3815 	 * Configure apertures:
3816 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3817 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3818 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3819 	 */
3820 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3821 
3822 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3823 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3824 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3825 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3826 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3827 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3828 
3829 	mutex_lock(&adev->srbm_mutex);
3830 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3831 		vi_srbm_select(adev, 0, 0, 0, i);
3832 		/* CP and shaders */
3833 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3834 		WREG32(mmSH_MEM_APE1_BASE, 1);
3835 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3836 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
3837 	}
3838 	vi_srbm_select(adev, 0, 0, 0, 0);
3839 	mutex_unlock(&adev->srbm_mutex);
3840 }
3841 
3842 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3843 {
3844 	u32 tmp;
3845 	int i;
3846 
3847 	WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3848 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3849 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3850 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3851 
3852 	gfx_v8_0_tiling_mode_table_init(adev);
3853 	gfx_v8_0_setup_rb(adev);
3854 	gfx_v8_0_get_cu_info(adev);
3855 
3856 	/* XXX SH_MEM regs */
3857 	/* where to put LDS, scratch, GPUVM in FSA64 space */
3858 	mutex_lock(&adev->srbm_mutex);
3859 	for (i = 0; i < 16; i++) {
3860 		vi_srbm_select(adev, 0, 0, 0, i);
3861 		/* CP and shaders */
3862 		if (i == 0) {
3863 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3864 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3865 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3866 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3867 			WREG32(mmSH_MEM_CONFIG, tmp);
3868 		} else {
3869 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3870 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3871 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3872 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3873 			WREG32(mmSH_MEM_CONFIG, tmp);
3874 		}
3875 
3876 		WREG32(mmSH_MEM_APE1_BASE, 1);
3877 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
3878 		WREG32(mmSH_MEM_BASES, 0);
3879 	}
3880 	vi_srbm_select(adev, 0, 0, 0, 0);
3881 	mutex_unlock(&adev->srbm_mutex);
3882 
3883 	gfx_v8_0_init_compute_vmid(adev);
3884 
3885 	mutex_lock(&adev->grbm_idx_mutex);
3886 	/*
3887 	 * making sure that the following register writes will be broadcasted
3888 	 * to all the shaders
3889 	 */
3890 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3891 
3892 	WREG32(mmPA_SC_FIFO_SIZE,
3893 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
3894 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3895 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
3896 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3897 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
3898 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3899 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3900 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3901 
3902 	tmp = RREG32(mmSPI_ARB_PRIORITY);
3903 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3904 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3905 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3906 	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3907 	WREG32(mmSPI_ARB_PRIORITY, tmp);
3908 
3909 	mutex_unlock(&adev->grbm_idx_mutex);
3910 
3911 }
3912 
3913 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3914 {
3915 	u32 i, j, k;
3916 	u32 mask;
3917 
3918 	mutex_lock(&adev->grbm_idx_mutex);
3919 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3920 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3921 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3922 			for (k = 0; k < adev->usec_timeout; k++) {
3923 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3924 					break;
3925 				udelay(1);
3926 			}
3927 		}
3928 	}
3929 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3930 	mutex_unlock(&adev->grbm_idx_mutex);
3931 
3932 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3933 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3934 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3935 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3936 	for (k = 0; k < adev->usec_timeout; k++) {
3937 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3938 			break;
3939 		udelay(1);
3940 	}
3941 }
3942 
3943 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3944 					       bool enable)
3945 {
3946 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3947 
3948 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3949 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3950 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3951 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3952 
3953 	WREG32(mmCP_INT_CNTL_RING0, tmp);
3954 }
3955 
3956 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3957 {
3958 	/* csib */
3959 	WREG32(mmRLC_CSIB_ADDR_HI,
3960 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
3961 	WREG32(mmRLC_CSIB_ADDR_LO,
3962 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3963 	WREG32(mmRLC_CSIB_LENGTH,
3964 			adev->gfx.rlc.clear_state_size);
3965 }
3966 
3967 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3968 				int ind_offset,
3969 				int list_size,
3970 				int *unique_indices,
3971 				int *indices_count,
3972 				int max_indices,
3973 				int *ind_start_offsets,
3974 				int *offset_count,
3975 				int max_offset)
3976 {
3977 	int indices;
3978 	bool new_entry = true;
3979 
3980 	for (; ind_offset < list_size; ind_offset++) {
3981 
3982 		if (new_entry) {
3983 			new_entry = false;
3984 			ind_start_offsets[*offset_count] = ind_offset;
3985 			*offset_count = *offset_count + 1;
3986 			BUG_ON(*offset_count >= max_offset);
3987 		}
3988 
3989 		if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3990 			new_entry = true;
3991 			continue;
3992 		}
3993 
3994 		ind_offset += 2;
3995 
3996 		/* look for the matching indice */
3997 		for (indices = 0;
3998 			indices < *indices_count;
3999 			indices++) {
4000 			if (unique_indices[indices] ==
4001 				register_list_format[ind_offset])
4002 				break;
4003 		}
4004 
4005 		if (indices >= *indices_count) {
4006 			unique_indices[*indices_count] =
4007 				register_list_format[ind_offset];
4008 			indices = *indices_count;
4009 			*indices_count = *indices_count + 1;
4010 			BUG_ON(*indices_count >= max_indices);
4011 		}
4012 
4013 		register_list_format[ind_offset] = indices;
4014 	}
4015 }
4016 
4017 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4018 {
4019 	int i, temp, data;
4020 	int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4021 	int indices_count = 0;
4022 	int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4023 	int offset_count = 0;
4024 
4025 	int list_size;
4026 	unsigned int *register_list_format =
4027 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4028 	if (!register_list_format)
4029 		return -ENOMEM;
4030 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4031 			adev->gfx.rlc.reg_list_format_size_bytes);
4032 
4033 	gfx_v8_0_parse_ind_reg_list(register_list_format,
4034 				RLC_FormatDirectRegListLength,
4035 				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4036 				unique_indices,
4037 				&indices_count,
4038 				sizeof(unique_indices) / sizeof(int),
4039 				indirect_start_offsets,
4040 				&offset_count,
4041 				sizeof(indirect_start_offsets)/sizeof(int));
4042 
4043 	/* save and restore list */
4044 	WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4045 
4046 	WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4047 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4048 		WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4049 
4050 	/* indirect list */
4051 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4052 	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4053 		WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4054 
4055 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4056 	list_size = list_size >> 1;
4057 	WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4058 	WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4059 
4060 	/* starting offsets starts */
4061 	WREG32(mmRLC_GPM_SCRATCH_ADDR,
4062 		adev->gfx.rlc.starting_offsets_start);
4063 	for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4064 		WREG32(mmRLC_GPM_SCRATCH_DATA,
4065 				indirect_start_offsets[i]);
4066 
4067 	/* unique indices */
4068 	temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4069 	data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4070 	for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4071 		if (unique_indices[i] != 0) {
4072 			amdgpu_mm_wreg(adev, temp + i,
4073 					unique_indices[i] & 0x3FFFF, false);
4074 			amdgpu_mm_wreg(adev, data + i,
4075 					unique_indices[i] >> 20, false);
4076 		}
4077 	}
4078 	kfree(register_list_format);
4079 
4080 	return 0;
4081 }
4082 
4083 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4084 {
4085 	WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4086 }
4087 
4088 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4089 {
4090 	uint32_t data;
4091 
4092 	WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4093 
4094 	data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4095 	data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4096 	data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4097 	data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4098 	WREG32(mmRLC_PG_DELAY, data);
4099 
4100 	WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4101 	WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4102 
4103 }
4104 
4105 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4106 						bool enable)
4107 {
4108 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4109 }
4110 
4111 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4112 						  bool enable)
4113 {
4114 	WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4115 }
4116 
4117 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4118 {
4119 	WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4120 }
4121 
4122 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4123 {
4124 	if ((adev->asic_type == CHIP_CARRIZO) ||
4125 	    (adev->asic_type == CHIP_STONEY)) {
4126 		gfx_v8_0_init_csb(adev);
4127 		gfx_v8_0_init_save_restore_list(adev);
4128 		gfx_v8_0_enable_save_restore_machine(adev);
4129 		WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4130 		gfx_v8_0_init_power_gating(adev);
4131 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4132 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
4133 		   (adev->asic_type == CHIP_POLARIS12)) {
4134 		gfx_v8_0_init_csb(adev);
4135 		gfx_v8_0_init_save_restore_list(adev);
4136 		gfx_v8_0_enable_save_restore_machine(adev);
4137 		gfx_v8_0_init_power_gating(adev);
4138 	}
4139 
4140 }
4141 
4142 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4143 {
4144 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4145 
4146 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4147 	gfx_v8_0_wait_for_rlc_serdes(adev);
4148 }
4149 
4150 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4151 {
4152 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4153 	udelay(50);
4154 
4155 	WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4156 	udelay(50);
4157 }
4158 
4159 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4160 {
4161 	WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4162 
4163 	/* carrizo do enable cp interrupt after cp inited */
4164 	if (!(adev->flags & AMD_IS_APU))
4165 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4166 
4167 	udelay(50);
4168 }
4169 
4170 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4171 {
4172 	const struct rlc_firmware_header_v2_0 *hdr;
4173 	const __le32 *fw_data;
4174 	unsigned i, fw_size;
4175 
4176 	if (!adev->gfx.rlc_fw)
4177 		return -EINVAL;
4178 
4179 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4180 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
4181 
4182 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4183 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4184 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4185 
4186 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4187 	for (i = 0; i < fw_size; i++)
4188 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4189 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4190 
4191 	return 0;
4192 }
4193 
4194 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4195 {
4196 	int r;
4197 	u32 tmp;
4198 
4199 	gfx_v8_0_rlc_stop(adev);
4200 
4201 	/* disable CG */
4202 	tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4203 	tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4204 		 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4205 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4206 	if (adev->asic_type == CHIP_POLARIS11 ||
4207 	    adev->asic_type == CHIP_POLARIS10 ||
4208 	    adev->asic_type == CHIP_POLARIS12) {
4209 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4210 		tmp &= ~0x3;
4211 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4212 	}
4213 
4214 	/* disable PG */
4215 	WREG32(mmRLC_PG_CNTL, 0);
4216 
4217 	gfx_v8_0_rlc_reset(adev);
4218 	gfx_v8_0_init_pg(adev);
4219 
4220 	if (!adev->pp_enabled) {
4221 		if (!adev->firmware.smu_load) {
4222 			/* legacy rlc firmware loading */
4223 			r = gfx_v8_0_rlc_load_microcode(adev);
4224 			if (r)
4225 				return r;
4226 		} else {
4227 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4228 							AMDGPU_UCODE_ID_RLC_G);
4229 			if (r)
4230 				return -EINVAL;
4231 		}
4232 	}
4233 
4234 	gfx_v8_0_rlc_start(adev);
4235 
4236 	return 0;
4237 }
4238 
4239 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4240 {
4241 	int i;
4242 	u32 tmp = RREG32(mmCP_ME_CNTL);
4243 
4244 	if (enable) {
4245 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4246 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4247 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4248 	} else {
4249 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4250 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4251 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4252 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4253 			adev->gfx.gfx_ring[i].ready = false;
4254 	}
4255 	WREG32(mmCP_ME_CNTL, tmp);
4256 	udelay(50);
4257 }
4258 
4259 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4260 {
4261 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
4262 	const struct gfx_firmware_header_v1_0 *ce_hdr;
4263 	const struct gfx_firmware_header_v1_0 *me_hdr;
4264 	const __le32 *fw_data;
4265 	unsigned i, fw_size;
4266 
4267 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4268 		return -EINVAL;
4269 
4270 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4271 		adev->gfx.pfp_fw->data;
4272 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4273 		adev->gfx.ce_fw->data;
4274 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
4275 		adev->gfx.me_fw->data;
4276 
4277 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4278 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4279 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4280 
4281 	gfx_v8_0_cp_gfx_enable(adev, false);
4282 
4283 	/* PFP */
4284 	fw_data = (const __le32 *)
4285 		(adev->gfx.pfp_fw->data +
4286 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4287 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4288 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
4289 	for (i = 0; i < fw_size; i++)
4290 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4291 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4292 
4293 	/* CE */
4294 	fw_data = (const __le32 *)
4295 		(adev->gfx.ce_fw->data +
4296 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4297 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4298 	WREG32(mmCP_CE_UCODE_ADDR, 0);
4299 	for (i = 0; i < fw_size; i++)
4300 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4301 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4302 
4303 	/* ME */
4304 	fw_data = (const __le32 *)
4305 		(adev->gfx.me_fw->data +
4306 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4307 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4308 	WREG32(mmCP_ME_RAM_WADDR, 0);
4309 	for (i = 0; i < fw_size; i++)
4310 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4311 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4312 
4313 	return 0;
4314 }
4315 
4316 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4317 {
4318 	u32 count = 0;
4319 	const struct cs_section_def *sect = NULL;
4320 	const struct cs_extent_def *ext = NULL;
4321 
4322 	/* begin clear state */
4323 	count += 2;
4324 	/* context control state */
4325 	count += 3;
4326 
4327 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4328 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4329 			if (sect->id == SECT_CONTEXT)
4330 				count += 2 + ext->reg_count;
4331 			else
4332 				return 0;
4333 		}
4334 	}
4335 	/* pa_sc_raster_config/pa_sc_raster_config1 */
4336 	count += 4;
4337 	/* end clear state */
4338 	count += 2;
4339 	/* clear state */
4340 	count += 2;
4341 
4342 	return count;
4343 }
4344 
4345 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4346 {
4347 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4348 	const struct cs_section_def *sect = NULL;
4349 	const struct cs_extent_def *ext = NULL;
4350 	int r, i;
4351 
4352 	/* init the CP */
4353 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4354 	WREG32(mmCP_ENDIAN_SWAP, 0);
4355 	WREG32(mmCP_DEVICE_ID, 1);
4356 
4357 	gfx_v8_0_cp_gfx_enable(adev, true);
4358 
4359 	r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4360 	if (r) {
4361 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4362 		return r;
4363 	}
4364 
4365 	/* clear state buffer */
4366 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4367 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4368 
4369 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4370 	amdgpu_ring_write(ring, 0x80000000);
4371 	amdgpu_ring_write(ring, 0x80000000);
4372 
4373 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4374 		for (ext = sect->section; ext->extent != NULL; ++ext) {
4375 			if (sect->id == SECT_CONTEXT) {
4376 				amdgpu_ring_write(ring,
4377 				       PACKET3(PACKET3_SET_CONTEXT_REG,
4378 					       ext->reg_count));
4379 				amdgpu_ring_write(ring,
4380 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4381 				for (i = 0; i < ext->reg_count; i++)
4382 					amdgpu_ring_write(ring, ext->extent[i]);
4383 			}
4384 		}
4385 	}
4386 
4387 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4388 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4389 	switch (adev->asic_type) {
4390 	case CHIP_TONGA:
4391 	case CHIP_POLARIS10:
4392 		amdgpu_ring_write(ring, 0x16000012);
4393 		amdgpu_ring_write(ring, 0x0000002A);
4394 		break;
4395 	case CHIP_POLARIS11:
4396 	case CHIP_POLARIS12:
4397 		amdgpu_ring_write(ring, 0x16000012);
4398 		amdgpu_ring_write(ring, 0x00000000);
4399 		break;
4400 	case CHIP_FIJI:
4401 		amdgpu_ring_write(ring, 0x3a00161a);
4402 		amdgpu_ring_write(ring, 0x0000002e);
4403 		break;
4404 	case CHIP_CARRIZO:
4405 		amdgpu_ring_write(ring, 0x00000002);
4406 		amdgpu_ring_write(ring, 0x00000000);
4407 		break;
4408 	case CHIP_TOPAZ:
4409 		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4410 				0x00000000 : 0x00000002);
4411 		amdgpu_ring_write(ring, 0x00000000);
4412 		break;
4413 	case CHIP_STONEY:
4414 		amdgpu_ring_write(ring, 0x00000000);
4415 		amdgpu_ring_write(ring, 0x00000000);
4416 		break;
4417 	default:
4418 		BUG();
4419 	}
4420 
4421 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4422 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4423 
4424 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4425 	amdgpu_ring_write(ring, 0);
4426 
4427 	/* init the CE partitions */
4428 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4429 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4430 	amdgpu_ring_write(ring, 0x8000);
4431 	amdgpu_ring_write(ring, 0x8000);
4432 
4433 	amdgpu_ring_commit(ring);
4434 
4435 	return 0;
4436 }
4437 
4438 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4439 {
4440 	struct amdgpu_ring *ring;
4441 	u32 tmp;
4442 	u32 rb_bufsz;
4443 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
4444 	int r;
4445 
4446 	/* Set the write pointer delay */
4447 	WREG32(mmCP_RB_WPTR_DELAY, 0);
4448 
4449 	/* set the RB to use vmid 0 */
4450 	WREG32(mmCP_RB_VMID, 0);
4451 
4452 	/* Set ring buffer size */
4453 	ring = &adev->gfx.gfx_ring[0];
4454 	rb_bufsz = order_base_2(ring->ring_size / 8);
4455 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4456 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4457 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4458 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4459 #ifdef __BIG_ENDIAN
4460 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4461 #endif
4462 	WREG32(mmCP_RB0_CNTL, tmp);
4463 
4464 	/* Initialize the ring buffer's read and write pointers */
4465 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4466 	ring->wptr = 0;
4467 	WREG32(mmCP_RB0_WPTR, ring->wptr);
4468 
4469 	/* set the wb address wether it's enabled or not */
4470 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4471 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4472 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4473 
4474 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4475 	WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4476 	WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4477 	mdelay(1);
4478 	WREG32(mmCP_RB0_CNTL, tmp);
4479 
4480 	rb_addr = ring->gpu_addr >> 8;
4481 	WREG32(mmCP_RB0_BASE, rb_addr);
4482 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4483 
4484 	/* no gfx doorbells on iceland */
4485 	if (adev->asic_type != CHIP_TOPAZ) {
4486 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4487 		if (ring->use_doorbell) {
4488 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4489 					    DOORBELL_OFFSET, ring->doorbell_index);
4490 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4491 					    DOORBELL_HIT, 0);
4492 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4493 					    DOORBELL_EN, 1);
4494 		} else {
4495 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4496 					    DOORBELL_EN, 0);
4497 		}
4498 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4499 
4500 		if (adev->asic_type == CHIP_TONGA) {
4501 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4502 					    DOORBELL_RANGE_LOWER,
4503 					    AMDGPU_DOORBELL_GFX_RING0);
4504 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4505 
4506 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4507 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4508 		}
4509 
4510 	}
4511 
4512 	/* start the ring */
4513 	gfx_v8_0_cp_gfx_start(adev);
4514 	ring->ready = true;
4515 	r = amdgpu_ring_test_ring(ring);
4516 	if (r)
4517 		ring->ready = false;
4518 
4519 	return r;
4520 }
4521 
4522 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4523 {
4524 	int i;
4525 
4526 	if (enable) {
4527 		WREG32(mmCP_MEC_CNTL, 0);
4528 	} else {
4529 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4530 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
4531 			adev->gfx.compute_ring[i].ready = false;
4532 	}
4533 	udelay(50);
4534 }
4535 
4536 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4537 {
4538 	const struct gfx_firmware_header_v1_0 *mec_hdr;
4539 	const __le32 *fw_data;
4540 	unsigned i, fw_size;
4541 
4542 	if (!adev->gfx.mec_fw)
4543 		return -EINVAL;
4544 
4545 	gfx_v8_0_cp_compute_enable(adev, false);
4546 
4547 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4548 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4549 
4550 	fw_data = (const __le32 *)
4551 		(adev->gfx.mec_fw->data +
4552 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4553 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4554 
4555 	/* MEC1 */
4556 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4557 	for (i = 0; i < fw_size; i++)
4558 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4559 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4560 
4561 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4562 	if (adev->gfx.mec2_fw) {
4563 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
4564 
4565 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4566 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4567 
4568 		fw_data = (const __le32 *)
4569 			(adev->gfx.mec2_fw->data +
4570 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4571 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4572 
4573 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4574 		for (i = 0; i < fw_size; i++)
4575 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4576 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4577 	}
4578 
4579 	return 0;
4580 }
4581 
4582 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4583 {
4584 	int i, r;
4585 
4586 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4587 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4588 
4589 		if (ring->mqd_obj) {
4590 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
4591 			if (unlikely(r != 0))
4592 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4593 
4594 			amdgpu_bo_unpin(ring->mqd_obj);
4595 			amdgpu_bo_unreserve(ring->mqd_obj);
4596 
4597 			amdgpu_bo_unref(&ring->mqd_obj);
4598 			ring->mqd_obj = NULL;
4599 		}
4600 	}
4601 }
4602 
4603 /* KIQ functions */
4604 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4605 {
4606 	uint32_t tmp;
4607 	struct amdgpu_device *adev = ring->adev;
4608 
4609 	/* tell RLC which is KIQ queue */
4610 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
4611 	tmp &= 0xffffff00;
4612 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4613 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4614 	tmp |= 0x80;
4615 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
4616 }
4617 
4618 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4619 {
4620 	amdgpu_ring_alloc(ring, 8);
4621 	/* set resources */
4622 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4623 	amdgpu_ring_write(ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
4624 	amdgpu_ring_write(ring, 0x000000FF);	/* queue mask lo */
4625 	amdgpu_ring_write(ring, 0);	/* queue mask hi */
4626 	amdgpu_ring_write(ring, 0);	/* gws mask lo */
4627 	amdgpu_ring_write(ring, 0);	/* gws mask hi */
4628 	amdgpu_ring_write(ring, 0);	/* oac mask */
4629 	amdgpu_ring_write(ring, 0);	/* gds heap base:0, gds heap size:0 */
4630 	amdgpu_ring_commit(ring);
4631 	udelay(50);
4632 }
4633 
4634 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4635 				   struct amdgpu_ring *ring)
4636 {
4637 	struct amdgpu_device *adev = kiq_ring->adev;
4638 	uint64_t mqd_addr, wptr_addr;
4639 
4640 	mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4641 	wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4642 	amdgpu_ring_alloc(kiq_ring, 8);
4643 
4644 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4645 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4646 	amdgpu_ring_write(kiq_ring, 0x21010000);
4647 	amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4648 			(ring->queue << 26) |
4649 			(ring->pipe << 29) |
4650 			((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4651 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4652 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4653 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4654 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4655 	amdgpu_ring_commit(kiq_ring);
4656 	udelay(50);
4657 }
4658 
4659 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4660 			     struct vi_mqd *mqd,
4661 			     uint64_t mqd_gpu_addr,
4662 			     uint64_t eop_gpu_addr,
4663 			     struct amdgpu_ring *ring)
4664 {
4665 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4666 	uint32_t tmp;
4667 
4668 	mqd->header = 0xC0310800;
4669 	mqd->compute_pipelinestat_enable = 0x00000001;
4670 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4671 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4672 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4673 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4674 	mqd->compute_misc_reserved = 0x00000003;
4675 
4676 	eop_base_addr = eop_gpu_addr >> 8;
4677 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4678 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4679 
4680 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4681 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4682 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4683 			(order_base_2(MEC_HPD_SIZE / 4) - 1));
4684 
4685 	mqd->cp_hqd_eop_control = tmp;
4686 
4687 	/* enable doorbell? */
4688 	tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4689 
4690 	if (ring->use_doorbell)
4691 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4692 					 DOORBELL_EN, 1);
4693 	else
4694 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4695 					 DOORBELL_EN, 0);
4696 
4697 	mqd->cp_hqd_pq_doorbell_control = tmp;
4698 
4699 	/* disable the queue if it's active */
4700 	mqd->cp_hqd_dequeue_request = 0;
4701 	mqd->cp_hqd_pq_rptr = 0;
4702 	mqd->cp_hqd_pq_wptr = 0;
4703 
4704 	/* set the pointer to the MQD */
4705 	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4706 	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4707 
4708 	/* set MQD vmid to 0 */
4709 	tmp = RREG32(mmCP_MQD_CONTROL);
4710 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4711 	mqd->cp_mqd_control = tmp;
4712 
4713 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4714 	hqd_gpu_addr = ring->gpu_addr >> 8;
4715 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4716 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4717 
4718 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4719 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4720 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4721 			    (order_base_2(ring->ring_size / 4) - 1));
4722 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4723 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4724 #ifdef __BIG_ENDIAN
4725 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4726 #endif
4727 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4728 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4729 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4730 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4731 	mqd->cp_hqd_pq_control = tmp;
4732 
4733 	/* set the wb address whether it's enabled or not */
4734 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4735 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4736 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4737 		upper_32_bits(wb_gpu_addr) & 0xffff;
4738 
4739 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4740 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4741 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4742 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4743 
4744 	tmp = 0;
4745 	/* enable the doorbell if requested */
4746 	if (ring->use_doorbell) {
4747 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4748 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4749 				DOORBELL_OFFSET, ring->doorbell_index);
4750 
4751 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4752 					 DOORBELL_EN, 1);
4753 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4754 					 DOORBELL_SOURCE, 0);
4755 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4756 					 DOORBELL_HIT, 0);
4757 	}
4758 
4759 	mqd->cp_hqd_pq_doorbell_control = tmp;
4760 
4761 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4762 	ring->wptr = 0;
4763 	mqd->cp_hqd_pq_wptr = ring->wptr;
4764 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4765 
4766 	/* set the vmid for the queue */
4767 	mqd->cp_hqd_vmid = 0;
4768 
4769 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4770 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4771 	mqd->cp_hqd_persistent_state = tmp;
4772 
4773 	/* activate the queue */
4774 	mqd->cp_hqd_active = 1;
4775 
4776 	return 0;
4777 }
4778 
4779 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4780 				      struct vi_mqd *mqd,
4781 				      struct amdgpu_ring *ring)
4782 {
4783 	uint32_t tmp;
4784 	int j;
4785 
4786 	/* disable wptr polling */
4787 	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4788 	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4789 	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4790 
4791 	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4792 	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4793 
4794 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4795 	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4796 
4797 	/* enable doorbell? */
4798 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4799 
4800 	/* disable the queue if it's active */
4801 	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4802 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4803 		for (j = 0; j < adev->usec_timeout; j++) {
4804 			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4805 				break;
4806 			udelay(1);
4807 		}
4808 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4809 		WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4810 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4811 	}
4812 
4813 	/* set the pointer to the MQD */
4814 	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4815 	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4816 
4817 	/* set MQD vmid to 0 */
4818 	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4819 
4820 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4821 	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4822 	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4823 
4824 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4825 	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4826 
4827 	/* set the wb address whether it's enabled or not */
4828 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4829 				mqd->cp_hqd_pq_rptr_report_addr_lo);
4830 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4831 				mqd->cp_hqd_pq_rptr_report_addr_hi);
4832 
4833 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4834 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4835 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4836 
4837 	/* enable the doorbell if requested */
4838 	if (ring->use_doorbell) {
4839 		if ((adev->asic_type == CHIP_CARRIZO) ||
4840 				(adev->asic_type == CHIP_FIJI) ||
4841 				(adev->asic_type == CHIP_STONEY)) {
4842 			WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4843 						AMDGPU_DOORBELL_KIQ << 2);
4844 			WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4845 						AMDGPU_DOORBELL_MEC_RING7 << 2);
4846 		}
4847 	}
4848 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4849 
4850 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4851 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4852 
4853 	/* set the vmid for the queue */
4854 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4855 
4856 	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4857 
4858 	/* activate the queue */
4859 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4860 
4861 	if (ring->use_doorbell) {
4862 		tmp = RREG32(mmCP_PQ_STATUS);
4863 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4864 		WREG32(mmCP_PQ_STATUS, tmp);
4865 	}
4866 
4867 	return 0;
4868 }
4869 
4870 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4871 				   struct vi_mqd *mqd,
4872 				   u64 mqd_gpu_addr)
4873 {
4874 	struct amdgpu_device *adev = ring->adev;
4875 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4876 	uint64_t eop_gpu_addr;
4877 	bool is_kiq = false;
4878 
4879 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4880 		is_kiq = true;
4881 
4882 	if (is_kiq) {
4883 		eop_gpu_addr = kiq->eop_gpu_addr;
4884 		gfx_v8_0_kiq_setting(&kiq->ring);
4885 	} else
4886 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4887 					ring->queue * MEC_HPD_SIZE;
4888 
4889 	mutex_lock(&adev->srbm_mutex);
4890 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4891 
4892 	gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4893 
4894 	if (is_kiq)
4895 		gfx_v8_0_kiq_init_register(adev, mqd, ring);
4896 
4897 	vi_srbm_select(adev, 0, 0, 0, 0);
4898 	mutex_unlock(&adev->srbm_mutex);
4899 
4900 	if (is_kiq)
4901 		gfx_v8_0_kiq_enable(ring);
4902 	else
4903 		gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4904 
4905 	return 0;
4906 }
4907 
4908 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
4909 {
4910 	struct amdgpu_ring *ring = NULL;
4911 	int i;
4912 
4913 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4914 		ring = &adev->gfx.compute_ring[i];
4915 		amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4916 		ring->mqd_obj = NULL;
4917 	}
4918 
4919 	ring = &adev->gfx.kiq.ring;
4920 	amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4921 	ring->mqd_obj = NULL;
4922 }
4923 
4924 static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
4925 				    struct amdgpu_ring *ring)
4926 {
4927 	struct vi_mqd *mqd;
4928 	u64 mqd_gpu_addr;
4929 	u32 *buf;
4930 	int r = 0;
4931 
4932 	r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
4933 				    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4934 				    &mqd_gpu_addr, (void **)&buf);
4935 	if (r) {
4936 		dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
4937 		return r;
4938 	}
4939 
4940 	/* init the mqd struct */
4941 	memset(buf, 0, sizeof(struct vi_mqd));
4942 	mqd = (struct vi_mqd *)buf;
4943 
4944 	r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
4945 	if (r)
4946 		return r;
4947 
4948 	amdgpu_bo_kunmap(ring->mqd_obj);
4949 
4950 	return 0;
4951 }
4952 
4953 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4954 {
4955 	struct amdgpu_ring *ring = NULL;
4956 	int r, i;
4957 
4958 	ring = &adev->gfx.kiq.ring;
4959 	r = gfx_v8_0_kiq_setup_queue(adev, ring);
4960 	if (r)
4961 		return r;
4962 
4963 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4964 		ring = &adev->gfx.compute_ring[i];
4965 		r = gfx_v8_0_kiq_setup_queue(adev, ring);
4966 		if (r)
4967 			return r;
4968 	}
4969 
4970 	gfx_v8_0_cp_compute_enable(adev, true);
4971 
4972 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4973 		ring = &adev->gfx.compute_ring[i];
4974 
4975 		ring->ready = true;
4976 		r = amdgpu_ring_test_ring(ring);
4977 		if (r)
4978 			ring->ready = false;
4979 	}
4980 
4981 	ring = &adev->gfx.kiq.ring;
4982 	ring->ready = true;
4983 	r = amdgpu_ring_test_ring(ring);
4984 	if (r)
4985 		ring->ready = false;
4986 
4987 	return 0;
4988 }
4989 
4990 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4991 {
4992 	int r, i, j;
4993 	u32 tmp;
4994 	bool use_doorbell = true;
4995 	u64 hqd_gpu_addr;
4996 	u64 mqd_gpu_addr;
4997 	u64 eop_gpu_addr;
4998 	u64 wb_gpu_addr;
4999 	u32 *buf;
5000 	struct vi_mqd *mqd;
5001 
5002 	/* init the queues.  */
5003 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5004 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5005 
5006 		if (ring->mqd_obj == NULL) {
5007 			r = amdgpu_bo_create(adev,
5008 					     sizeof(struct vi_mqd),
5009 					     PAGE_SIZE, true,
5010 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
5011 					     NULL, &ring->mqd_obj);
5012 			if (r) {
5013 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
5014 				return r;
5015 			}
5016 		}
5017 
5018 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
5019 		if (unlikely(r != 0)) {
5020 			gfx_v8_0_cp_compute_fini(adev);
5021 			return r;
5022 		}
5023 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5024 				  &mqd_gpu_addr);
5025 		if (r) {
5026 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5027 			gfx_v8_0_cp_compute_fini(adev);
5028 			return r;
5029 		}
5030 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5031 		if (r) {
5032 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5033 			gfx_v8_0_cp_compute_fini(adev);
5034 			return r;
5035 		}
5036 
5037 		/* init the mqd struct */
5038 		memset(buf, 0, sizeof(struct vi_mqd));
5039 
5040 		mqd = (struct vi_mqd *)buf;
5041 		mqd->header = 0xC0310800;
5042 		mqd->compute_pipelinestat_enable = 0x00000001;
5043 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5044 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5045 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5046 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5047 		mqd->compute_misc_reserved = 0x00000003;
5048 
5049 		mutex_lock(&adev->srbm_mutex);
5050 		vi_srbm_select(adev, ring->me,
5051 			       ring->pipe,
5052 			       ring->queue, 0);
5053 
5054 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5055 		eop_gpu_addr >>= 8;
5056 
5057 		/* write the EOP addr */
5058 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5059 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5060 
5061 		/* set the VMID assigned */
5062 		WREG32(mmCP_HQD_VMID, 0);
5063 
5064 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5065 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5066 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5067 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
5068 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5069 
5070 		/* disable wptr polling */
5071 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5072 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5073 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5074 
5075 		mqd->cp_hqd_eop_base_addr_lo =
5076 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
5077 		mqd->cp_hqd_eop_base_addr_hi =
5078 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5079 
5080 		/* enable doorbell? */
5081 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5082 		if (use_doorbell) {
5083 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5084 		} else {
5085 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5086 		}
5087 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5088 		mqd->cp_hqd_pq_doorbell_control = tmp;
5089 
5090 		/* disable the queue if it's active */
5091 		mqd->cp_hqd_dequeue_request = 0;
5092 		mqd->cp_hqd_pq_rptr = 0;
5093 		mqd->cp_hqd_pq_wptr= 0;
5094 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5095 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5096 			for (j = 0; j < adev->usec_timeout; j++) {
5097 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5098 					break;
5099 				udelay(1);
5100 			}
5101 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5102 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5103 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5104 		}
5105 
5106 		/* set the pointer to the MQD */
5107 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5108 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5109 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5110 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5111 
5112 		/* set MQD vmid to 0 */
5113 		tmp = RREG32(mmCP_MQD_CONTROL);
5114 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5115 		WREG32(mmCP_MQD_CONTROL, tmp);
5116 		mqd->cp_mqd_control = tmp;
5117 
5118 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5119 		hqd_gpu_addr = ring->gpu_addr >> 8;
5120 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5121 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5122 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5123 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5124 
5125 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5126 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5127 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5128 				    (order_base_2(ring->ring_size / 4) - 1));
5129 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5130 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5131 #ifdef __BIG_ENDIAN
5132 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5133 #endif
5134 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5135 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5136 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5137 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5138 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5139 		mqd->cp_hqd_pq_control = tmp;
5140 
5141 		/* set the wb address wether it's enabled or not */
5142 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5143 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5144 		mqd->cp_hqd_pq_rptr_report_addr_hi =
5145 			upper_32_bits(wb_gpu_addr) & 0xffff;
5146 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5147 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
5148 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5149 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
5150 
5151 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5152 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5153 		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5154 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5155 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5156 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5157 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
5158 
5159 		/* enable the doorbell if requested */
5160 		if (use_doorbell) {
5161 			if ((adev->asic_type == CHIP_CARRIZO) ||
5162 			    (adev->asic_type == CHIP_FIJI) ||
5163 			    (adev->asic_type == CHIP_STONEY) ||
5164 			    (adev->asic_type == CHIP_POLARIS11) ||
5165 			    (adev->asic_type == CHIP_POLARIS10) ||
5166 			    (adev->asic_type == CHIP_POLARIS12)) {
5167 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5168 				       AMDGPU_DOORBELL_KIQ << 2);
5169 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5170 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
5171 			}
5172 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5173 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5174 					    DOORBELL_OFFSET, ring->doorbell_index);
5175 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5176 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5177 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5178 			mqd->cp_hqd_pq_doorbell_control = tmp;
5179 
5180 		} else {
5181 			mqd->cp_hqd_pq_doorbell_control = 0;
5182 		}
5183 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5184 		       mqd->cp_hqd_pq_doorbell_control);
5185 
5186 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5187 		ring->wptr = 0;
5188 		mqd->cp_hqd_pq_wptr = ring->wptr;
5189 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5190 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5191 
5192 		/* set the vmid for the queue */
5193 		mqd->cp_hqd_vmid = 0;
5194 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5195 
5196 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5197 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5198 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5199 		mqd->cp_hqd_persistent_state = tmp;
5200 		if (adev->asic_type == CHIP_STONEY ||
5201 			adev->asic_type == CHIP_POLARIS11 ||
5202 			adev->asic_type == CHIP_POLARIS10 ||
5203 			adev->asic_type == CHIP_POLARIS12) {
5204 			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5205 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5206 			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5207 		}
5208 
5209 		/* activate the queue */
5210 		mqd->cp_hqd_active = 1;
5211 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5212 
5213 		vi_srbm_select(adev, 0, 0, 0, 0);
5214 		mutex_unlock(&adev->srbm_mutex);
5215 
5216 		amdgpu_bo_kunmap(ring->mqd_obj);
5217 		amdgpu_bo_unreserve(ring->mqd_obj);
5218 	}
5219 
5220 	if (use_doorbell) {
5221 		tmp = RREG32(mmCP_PQ_STATUS);
5222 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5223 		WREG32(mmCP_PQ_STATUS, tmp);
5224 	}
5225 
5226 	gfx_v8_0_cp_compute_enable(adev, true);
5227 
5228 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5229 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5230 
5231 		ring->ready = true;
5232 		r = amdgpu_ring_test_ring(ring);
5233 		if (r)
5234 			ring->ready = false;
5235 	}
5236 
5237 	return 0;
5238 }
5239 
5240 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5241 {
5242 	int r;
5243 
5244 	if (!(adev->flags & AMD_IS_APU))
5245 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5246 
5247 	if (!adev->pp_enabled) {
5248 		if (!adev->firmware.smu_load) {
5249 			/* legacy firmware loading */
5250 			r = gfx_v8_0_cp_gfx_load_microcode(adev);
5251 			if (r)
5252 				return r;
5253 
5254 			r = gfx_v8_0_cp_compute_load_microcode(adev);
5255 			if (r)
5256 				return r;
5257 		} else {
5258 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5259 							AMDGPU_UCODE_ID_CP_CE);
5260 			if (r)
5261 				return -EINVAL;
5262 
5263 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5264 							AMDGPU_UCODE_ID_CP_PFP);
5265 			if (r)
5266 				return -EINVAL;
5267 
5268 			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5269 							AMDGPU_UCODE_ID_CP_ME);
5270 			if (r)
5271 				return -EINVAL;
5272 
5273 			if (adev->asic_type == CHIP_TOPAZ) {
5274 				r = gfx_v8_0_cp_compute_load_microcode(adev);
5275 				if (r)
5276 					return r;
5277 			} else {
5278 				r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5279 										 AMDGPU_UCODE_ID_CP_MEC1);
5280 				if (r)
5281 					return -EINVAL;
5282 			}
5283 		}
5284 	}
5285 
5286 	r = gfx_v8_0_cp_gfx_resume(adev);
5287 	if (r)
5288 		return r;
5289 
5290 	if (amdgpu_sriov_vf(adev))
5291 		r = gfx_v8_0_kiq_resume(adev);
5292 	else
5293 		r = gfx_v8_0_cp_compute_resume(adev);
5294 	if (r)
5295 		return r;
5296 
5297 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5298 
5299 	return 0;
5300 }
5301 
5302 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5303 {
5304 	gfx_v8_0_cp_gfx_enable(adev, enable);
5305 	gfx_v8_0_cp_compute_enable(adev, enable);
5306 }
5307 
5308 static int gfx_v8_0_hw_init(void *handle)
5309 {
5310 	int r;
5311 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5312 
5313 	gfx_v8_0_init_golden_registers(adev);
5314 	gfx_v8_0_gpu_init(adev);
5315 
5316 	r = gfx_v8_0_rlc_resume(adev);
5317 	if (r)
5318 		return r;
5319 
5320 	r = gfx_v8_0_cp_resume(adev);
5321 
5322 	return r;
5323 }
5324 
5325 static int gfx_v8_0_hw_fini(void *handle)
5326 {
5327 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5328 
5329 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5330 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5331 	if (amdgpu_sriov_vf(adev)) {
5332 		gfx_v8_0_kiq_free_queue(adev);
5333 		pr_debug("For SRIOV client, shouldn't do anything.\n");
5334 		return 0;
5335 	}
5336 	gfx_v8_0_cp_enable(adev, false);
5337 	gfx_v8_0_rlc_stop(adev);
5338 	gfx_v8_0_cp_compute_fini(adev);
5339 
5340 	amdgpu_set_powergating_state(adev,
5341 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5342 
5343 	return 0;
5344 }
5345 
5346 static int gfx_v8_0_suspend(void *handle)
5347 {
5348 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5349 
5350 	return gfx_v8_0_hw_fini(adev);
5351 }
5352 
5353 static int gfx_v8_0_resume(void *handle)
5354 {
5355 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5356 
5357 	return gfx_v8_0_hw_init(adev);
5358 }
5359 
5360 static bool gfx_v8_0_is_idle(void *handle)
5361 {
5362 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5363 
5364 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5365 		return false;
5366 	else
5367 		return true;
5368 }
5369 
5370 static int gfx_v8_0_wait_for_idle(void *handle)
5371 {
5372 	unsigned i;
5373 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5374 
5375 	for (i = 0; i < adev->usec_timeout; i++) {
5376 		if (gfx_v8_0_is_idle(handle))
5377 			return 0;
5378 
5379 		udelay(1);
5380 	}
5381 	return -ETIMEDOUT;
5382 }
5383 
5384 static bool gfx_v8_0_check_soft_reset(void *handle)
5385 {
5386 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5387 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5388 	u32 tmp;
5389 
5390 	/* GRBM_STATUS */
5391 	tmp = RREG32(mmGRBM_STATUS);
5392 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5393 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5394 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5395 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5396 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5397 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5398 		   GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5399 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5400 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5401 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5402 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5403 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5404 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5405 	}
5406 
5407 	/* GRBM_STATUS2 */
5408 	tmp = RREG32(mmGRBM_STATUS2);
5409 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5410 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5411 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5412 
5413 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5414 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5415 	    REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5416 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5417 						SOFT_RESET_CPF, 1);
5418 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5419 						SOFT_RESET_CPC, 1);
5420 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5421 						SOFT_RESET_CPG, 1);
5422 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5423 						SOFT_RESET_GRBM, 1);
5424 	}
5425 
5426 	/* SRBM_STATUS */
5427 	tmp = RREG32(mmSRBM_STATUS);
5428 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5429 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5430 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5431 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5432 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5433 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5434 
5435 	if (grbm_soft_reset || srbm_soft_reset) {
5436 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
5437 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
5438 		return true;
5439 	} else {
5440 		adev->gfx.grbm_soft_reset = 0;
5441 		adev->gfx.srbm_soft_reset = 0;
5442 		return false;
5443 	}
5444 }
5445 
5446 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5447 				  struct amdgpu_ring *ring)
5448 {
5449 	int i;
5450 
5451 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5452 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5453 		u32 tmp;
5454 		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5455 		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5456 				    DEQUEUE_REQ, 2);
5457 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5458 		for (i = 0; i < adev->usec_timeout; i++) {
5459 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5460 				break;
5461 			udelay(1);
5462 		}
5463 	}
5464 }
5465 
5466 static int gfx_v8_0_pre_soft_reset(void *handle)
5467 {
5468 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5469 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5470 
5471 	if ((!adev->gfx.grbm_soft_reset) &&
5472 	    (!adev->gfx.srbm_soft_reset))
5473 		return 0;
5474 
5475 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5476 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5477 
5478 	/* stop the rlc */
5479 	gfx_v8_0_rlc_stop(adev);
5480 
5481 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5482 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5483 		/* Disable GFX parsing/prefetching */
5484 		gfx_v8_0_cp_gfx_enable(adev, false);
5485 
5486 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5487 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5488 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5489 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5490 		int i;
5491 
5492 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5493 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5494 
5495 			gfx_v8_0_inactive_hqd(adev, ring);
5496 		}
5497 		/* Disable MEC parsing/prefetching */
5498 		gfx_v8_0_cp_compute_enable(adev, false);
5499 	}
5500 
5501        return 0;
5502 }
5503 
5504 static int gfx_v8_0_soft_reset(void *handle)
5505 {
5506 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5507 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5508 	u32 tmp;
5509 
5510 	if ((!adev->gfx.grbm_soft_reset) &&
5511 	    (!adev->gfx.srbm_soft_reset))
5512 		return 0;
5513 
5514 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5515 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5516 
5517 	if (grbm_soft_reset || srbm_soft_reset) {
5518 		tmp = RREG32(mmGMCON_DEBUG);
5519 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5520 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5521 		WREG32(mmGMCON_DEBUG, tmp);
5522 		udelay(50);
5523 	}
5524 
5525 	if (grbm_soft_reset) {
5526 		tmp = RREG32(mmGRBM_SOFT_RESET);
5527 		tmp |= grbm_soft_reset;
5528 		dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5529 		WREG32(mmGRBM_SOFT_RESET, tmp);
5530 		tmp = RREG32(mmGRBM_SOFT_RESET);
5531 
5532 		udelay(50);
5533 
5534 		tmp &= ~grbm_soft_reset;
5535 		WREG32(mmGRBM_SOFT_RESET, tmp);
5536 		tmp = RREG32(mmGRBM_SOFT_RESET);
5537 	}
5538 
5539 	if (srbm_soft_reset) {
5540 		tmp = RREG32(mmSRBM_SOFT_RESET);
5541 		tmp |= srbm_soft_reset;
5542 		dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5543 		WREG32(mmSRBM_SOFT_RESET, tmp);
5544 		tmp = RREG32(mmSRBM_SOFT_RESET);
5545 
5546 		udelay(50);
5547 
5548 		tmp &= ~srbm_soft_reset;
5549 		WREG32(mmSRBM_SOFT_RESET, tmp);
5550 		tmp = RREG32(mmSRBM_SOFT_RESET);
5551 	}
5552 
5553 	if (grbm_soft_reset || srbm_soft_reset) {
5554 		tmp = RREG32(mmGMCON_DEBUG);
5555 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5556 		tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5557 		WREG32(mmGMCON_DEBUG, tmp);
5558 	}
5559 
5560 	/* Wait a little for things to settle down */
5561 	udelay(50);
5562 
5563 	return 0;
5564 }
5565 
5566 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5567 			      struct amdgpu_ring *ring)
5568 {
5569 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5570 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5571 	WREG32(mmCP_HQD_PQ_RPTR, 0);
5572 	WREG32(mmCP_HQD_PQ_WPTR, 0);
5573 	vi_srbm_select(adev, 0, 0, 0, 0);
5574 }
5575 
5576 static int gfx_v8_0_post_soft_reset(void *handle)
5577 {
5578 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5579 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5580 
5581 	if ((!adev->gfx.grbm_soft_reset) &&
5582 	    (!adev->gfx.srbm_soft_reset))
5583 		return 0;
5584 
5585 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
5586 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
5587 
5588 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5589 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5590 		gfx_v8_0_cp_gfx_resume(adev);
5591 
5592 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5593 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5594 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5595 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5596 		int i;
5597 
5598 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5599 			struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5600 
5601 			gfx_v8_0_init_hqd(adev, ring);
5602 		}
5603 		gfx_v8_0_cp_compute_resume(adev);
5604 	}
5605 	gfx_v8_0_rlc_start(adev);
5606 
5607 	return 0;
5608 }
5609 
5610 /**
5611  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5612  *
5613  * @adev: amdgpu_device pointer
5614  *
5615  * Fetches a GPU clock counter snapshot.
5616  * Returns the 64 bit clock counter snapshot.
5617  */
5618 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5619 {
5620 	uint64_t clock;
5621 
5622 	mutex_lock(&adev->gfx.gpu_clock_mutex);
5623 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5624 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5625 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5626 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
5627 	return clock;
5628 }
5629 
5630 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5631 					  uint32_t vmid,
5632 					  uint32_t gds_base, uint32_t gds_size,
5633 					  uint32_t gws_base, uint32_t gws_size,
5634 					  uint32_t oa_base, uint32_t oa_size)
5635 {
5636 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5637 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5638 
5639 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5640 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5641 
5642 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
5643 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
5644 
5645 	/* GDS Base */
5646 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5647 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5648 				WRITE_DATA_DST_SEL(0)));
5649 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5650 	amdgpu_ring_write(ring, 0);
5651 	amdgpu_ring_write(ring, gds_base);
5652 
5653 	/* GDS Size */
5654 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5655 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5656 				WRITE_DATA_DST_SEL(0)));
5657 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5658 	amdgpu_ring_write(ring, 0);
5659 	amdgpu_ring_write(ring, gds_size);
5660 
5661 	/* GWS */
5662 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5663 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5664 				WRITE_DATA_DST_SEL(0)));
5665 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5666 	amdgpu_ring_write(ring, 0);
5667 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5668 
5669 	/* OA */
5670 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5671 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5672 				WRITE_DATA_DST_SEL(0)));
5673 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5674 	amdgpu_ring_write(ring, 0);
5675 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5676 }
5677 
5678 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5679 {
5680 	WREG32(mmSQ_IND_INDEX,
5681 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5682 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5683 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
5684 		(SQ_IND_INDEX__FORCE_READ_MASK));
5685 	return RREG32(mmSQ_IND_DATA);
5686 }
5687 
5688 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5689 			   uint32_t wave, uint32_t thread,
5690 			   uint32_t regno, uint32_t num, uint32_t *out)
5691 {
5692 	WREG32(mmSQ_IND_INDEX,
5693 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5694 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5695 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
5696 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5697 		(SQ_IND_INDEX__FORCE_READ_MASK) |
5698 		(SQ_IND_INDEX__AUTO_INCR_MASK));
5699 	while (num--)
5700 		*(out++) = RREG32(mmSQ_IND_DATA);
5701 }
5702 
5703 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5704 {
5705 	/* type 0 wave data */
5706 	dst[(*no_fields)++] = 0;
5707 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5708 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5709 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5710 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5711 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5712 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5713 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5714 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5715 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5716 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5717 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5718 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5719 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5720 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5721 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5722 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5723 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5724 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5725 }
5726 
5727 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5728 				     uint32_t wave, uint32_t start,
5729 				     uint32_t size, uint32_t *dst)
5730 {
5731 	wave_read_regs(
5732 		adev, simd, wave, 0,
5733 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5734 }
5735 
5736 
5737 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5738 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5739 	.select_se_sh = &gfx_v8_0_select_se_sh,
5740 	.read_wave_data = &gfx_v8_0_read_wave_data,
5741 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5742 };
5743 
5744 static int gfx_v8_0_early_init(void *handle)
5745 {
5746 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5747 
5748 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5749 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5750 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5751 	gfx_v8_0_set_ring_funcs(adev);
5752 	gfx_v8_0_set_irq_funcs(adev);
5753 	gfx_v8_0_set_gds_init(adev);
5754 	gfx_v8_0_set_rlc_funcs(adev);
5755 
5756 	return 0;
5757 }
5758 
5759 static int gfx_v8_0_late_init(void *handle)
5760 {
5761 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5762 	int r;
5763 
5764 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5765 	if (r)
5766 		return r;
5767 
5768 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5769 	if (r)
5770 		return r;
5771 
5772 	/* requires IBs so do in late init after IB pool is initialized */
5773 	r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5774 	if (r)
5775 		return r;
5776 
5777 	amdgpu_set_powergating_state(adev,
5778 			AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5779 
5780 	return 0;
5781 }
5782 
5783 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5784 						       bool enable)
5785 {
5786 	if ((adev->asic_type == CHIP_POLARIS11) ||
5787 	    (adev->asic_type == CHIP_POLARIS12))
5788 		/* Send msg to SMU via Powerplay */
5789 		amdgpu_set_powergating_state(adev,
5790 					     AMD_IP_BLOCK_TYPE_SMC,
5791 					     enable ?
5792 					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5793 
5794 	WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5795 }
5796 
5797 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5798 							bool enable)
5799 {
5800 	WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5801 }
5802 
5803 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5804 		bool enable)
5805 {
5806 	WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5807 }
5808 
5809 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5810 					  bool enable)
5811 {
5812 	WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5813 }
5814 
5815 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5816 						bool enable)
5817 {
5818 	WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5819 
5820 	/* Read any GFX register to wake up GFX. */
5821 	if (!enable)
5822 		RREG32(mmDB_RENDER_CONTROL);
5823 }
5824 
5825 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5826 					  bool enable)
5827 {
5828 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5829 		cz_enable_gfx_cg_power_gating(adev, true);
5830 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5831 			cz_enable_gfx_pipeline_power_gating(adev, true);
5832 	} else {
5833 		cz_enable_gfx_cg_power_gating(adev, false);
5834 		cz_enable_gfx_pipeline_power_gating(adev, false);
5835 	}
5836 }
5837 
5838 static int gfx_v8_0_set_powergating_state(void *handle,
5839 					  enum amd_powergating_state state)
5840 {
5841 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5842 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5843 
5844 	switch (adev->asic_type) {
5845 	case CHIP_CARRIZO:
5846 	case CHIP_STONEY:
5847 
5848 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5849 			cz_enable_sck_slow_down_on_power_up(adev, true);
5850 			cz_enable_sck_slow_down_on_power_down(adev, true);
5851 		} else {
5852 			cz_enable_sck_slow_down_on_power_up(adev, false);
5853 			cz_enable_sck_slow_down_on_power_down(adev, false);
5854 		}
5855 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5856 			cz_enable_cp_power_gating(adev, true);
5857 		else
5858 			cz_enable_cp_power_gating(adev, false);
5859 
5860 		cz_update_gfx_cg_power_gating(adev, enable);
5861 
5862 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5863 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5864 		else
5865 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5866 
5867 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5868 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5869 		else
5870 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5871 		break;
5872 	case CHIP_POLARIS11:
5873 	case CHIP_POLARIS12:
5874 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5875 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5876 		else
5877 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5878 
5879 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5880 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5881 		else
5882 			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5883 
5884 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5885 			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5886 		else
5887 			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5888 		break;
5889 	default:
5890 		break;
5891 	}
5892 
5893 	return 0;
5894 }
5895 
5896 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5897 {
5898 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5899 	int data;
5900 
5901 	/* AMD_CG_SUPPORT_GFX_MGCG */
5902 	data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5903 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5904 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5905 
5906 	/* AMD_CG_SUPPORT_GFX_CGLG */
5907 	data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5908 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5909 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5910 
5911 	/* AMD_CG_SUPPORT_GFX_CGLS */
5912 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5913 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5914 
5915 	/* AMD_CG_SUPPORT_GFX_CGTS */
5916 	data = RREG32(mmCGTS_SM_CTRL_REG);
5917 	if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5918 		*flags |= AMD_CG_SUPPORT_GFX_CGTS;
5919 
5920 	/* AMD_CG_SUPPORT_GFX_CGTS_LS */
5921 	if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5922 		*flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5923 
5924 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5925 	data = RREG32(mmRLC_MEM_SLP_CNTL);
5926 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5927 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5928 
5929 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5930 	data = RREG32(mmCP_MEM_SLP_CNTL);
5931 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5932 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5933 }
5934 
5935 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5936 				     uint32_t reg_addr, uint32_t cmd)
5937 {
5938 	uint32_t data;
5939 
5940 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5941 
5942 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5943 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5944 
5945 	data = RREG32(mmRLC_SERDES_WR_CTRL);
5946 	if (adev->asic_type == CHIP_STONEY)
5947 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5948 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5949 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5950 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5951 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5952 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5953 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5954 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5955 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5956 	else
5957 		data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5958 			  RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5959 			  RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5960 			  RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5961 			  RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5962 			  RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5963 			  RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5964 			  RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5965 			  RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5966 			  RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5967 			  RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5968 	data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5969 		 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5970 		 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5971 		 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5972 
5973 	WREG32(mmRLC_SERDES_WR_CTRL, data);
5974 }
5975 
5976 #define MSG_ENTER_RLC_SAFE_MODE     1
5977 #define MSG_EXIT_RLC_SAFE_MODE      0
5978 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5979 #define RLC_GPR_REG2__REQ__SHIFT 0
5980 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5981 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5982 
5983 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5984 {
5985 	u32 data;
5986 	unsigned i;
5987 
5988 	data = RREG32(mmRLC_CNTL);
5989 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5990 		return;
5991 
5992 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5993 		data |= RLC_SAFE_MODE__CMD_MASK;
5994 		data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5995 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5996 		WREG32(mmRLC_SAFE_MODE, data);
5997 
5998 		for (i = 0; i < adev->usec_timeout; i++) {
5999 			if ((RREG32(mmRLC_GPM_STAT) &
6000 			     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6001 			      RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
6002 			    (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
6003 			     RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
6004 				break;
6005 			udelay(1);
6006 		}
6007 
6008 		for (i = 0; i < adev->usec_timeout; i++) {
6009 			if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6010 				break;
6011 			udelay(1);
6012 		}
6013 		adev->gfx.rlc.in_safe_mode = true;
6014 	}
6015 }
6016 
6017 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
6018 {
6019 	u32 data = 0;
6020 	unsigned i;
6021 
6022 	data = RREG32(mmRLC_CNTL);
6023 	if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
6024 		return;
6025 
6026 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
6027 		if (adev->gfx.rlc.in_safe_mode) {
6028 			data |= RLC_SAFE_MODE__CMD_MASK;
6029 			data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
6030 			WREG32(mmRLC_SAFE_MODE, data);
6031 			adev->gfx.rlc.in_safe_mode = false;
6032 		}
6033 	}
6034 
6035 	for (i = 0; i < adev->usec_timeout; i++) {
6036 		if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
6037 			break;
6038 		udelay(1);
6039 	}
6040 }
6041 
6042 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
6043 	.enter_safe_mode = iceland_enter_rlc_safe_mode,
6044 	.exit_safe_mode = iceland_exit_rlc_safe_mode
6045 };
6046 
6047 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
6048 						      bool enable)
6049 {
6050 	uint32_t temp, data;
6051 
6052 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
6053 
6054 	/* It is disabled by HW by default */
6055 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
6056 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6057 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
6058 				/* 1 - RLC memory Light sleep */
6059 				WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6060 
6061 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6062 				WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6063 		}
6064 
6065 		/* 3 - RLC_CGTT_MGCG_OVERRIDE */
6066 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6067 		if (adev->flags & AMD_IS_APU)
6068 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6069 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6070 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6071 		else
6072 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6073 				  RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6074 				  RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6075 				  RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6076 
6077 		if (temp != data)
6078 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6079 
6080 		/* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6081 		gfx_v8_0_wait_for_rlc_serdes(adev);
6082 
6083 		/* 5 - clear mgcg override */
6084 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6085 
6086 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6087 			/* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6088 			temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6089 			data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6090 			data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6091 			data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6092 			data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6093 			if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6094 			    (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6095 				data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6096 			data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6097 			data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6098 			if (temp != data)
6099 				WREG32(mmCGTS_SM_CTRL_REG, data);
6100 		}
6101 		udelay(50);
6102 
6103 		/* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6104 		gfx_v8_0_wait_for_rlc_serdes(adev);
6105 	} else {
6106 		/* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6107 		temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6108 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6109 				RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6110 				RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6111 				RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6112 		if (temp != data)
6113 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6114 
6115 		/* 2 - disable MGLS in RLC */
6116 		data = RREG32(mmRLC_MEM_SLP_CNTL);
6117 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6118 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6119 			WREG32(mmRLC_MEM_SLP_CNTL, data);
6120 		}
6121 
6122 		/* 3 - disable MGLS in CP */
6123 		data = RREG32(mmCP_MEM_SLP_CNTL);
6124 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6125 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6126 			WREG32(mmCP_MEM_SLP_CNTL, data);
6127 		}
6128 
6129 		/* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6130 		temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6131 		data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6132 				CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6133 		if (temp != data)
6134 			WREG32(mmCGTS_SM_CTRL_REG, data);
6135 
6136 		/* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6137 		gfx_v8_0_wait_for_rlc_serdes(adev);
6138 
6139 		/* 6 - set mgcg override */
6140 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6141 
6142 		udelay(50);
6143 
6144 		/* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6145 		gfx_v8_0_wait_for_rlc_serdes(adev);
6146 	}
6147 
6148 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
6149 }
6150 
6151 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6152 						      bool enable)
6153 {
6154 	uint32_t temp, temp1, data, data1;
6155 
6156 	temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6157 
6158 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
6159 
6160 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6161 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6162 		data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6163 		if (temp1 != data1)
6164 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6165 
6166 		/* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6167 		gfx_v8_0_wait_for_rlc_serdes(adev);
6168 
6169 		/* 2 - clear cgcg override */
6170 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6171 
6172 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6173 		gfx_v8_0_wait_for_rlc_serdes(adev);
6174 
6175 		/* 3 - write cmd to set CGLS */
6176 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6177 
6178 		/* 4 - enable cgcg */
6179 		data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6180 
6181 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6182 			/* enable cgls*/
6183 			data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6184 
6185 			temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6186 			data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6187 
6188 			if (temp1 != data1)
6189 				WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6190 		} else {
6191 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6192 		}
6193 
6194 		if (temp != data)
6195 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6196 
6197 		/* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6198 		 * Cmp_busy/GFX_Idle interrupts
6199 		 */
6200 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6201 	} else {
6202 		/* disable cntx_empty_int_enable & GFX Idle interrupt */
6203 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6204 
6205 		/* TEST CGCG */
6206 		temp1 = data1 =	RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6207 		data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6208 				RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6209 		if (temp1 != data1)
6210 			WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6211 
6212 		/* read gfx register to wake up cgcg */
6213 		RREG32(mmCB_CGTT_SCLK_CTRL);
6214 		RREG32(mmCB_CGTT_SCLK_CTRL);
6215 		RREG32(mmCB_CGTT_SCLK_CTRL);
6216 		RREG32(mmCB_CGTT_SCLK_CTRL);
6217 
6218 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6219 		gfx_v8_0_wait_for_rlc_serdes(adev);
6220 
6221 		/* write cmd to Set CGCG Overrride */
6222 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6223 
6224 		/* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6225 		gfx_v8_0_wait_for_rlc_serdes(adev);
6226 
6227 		/* write cmd to Clear CGLS */
6228 		gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6229 
6230 		/* disable cgcg, cgls should be disabled too. */
6231 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6232 			  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6233 		if (temp != data)
6234 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6235 	}
6236 
6237 	gfx_v8_0_wait_for_rlc_serdes(adev);
6238 
6239 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
6240 }
6241 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6242 					    bool enable)
6243 {
6244 	if (enable) {
6245 		/* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6246 		 * ===  MGCG + MGLS + TS(CG/LS) ===
6247 		 */
6248 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6249 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6250 	} else {
6251 		/* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6252 		 * ===  CGCG + CGLS ===
6253 		 */
6254 		gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6255 		gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6256 	}
6257 	return 0;
6258 }
6259 
6260 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6261 					  enum amd_clockgating_state state)
6262 {
6263 	uint32_t msg_id, pp_state = 0;
6264 	uint32_t pp_support_state = 0;
6265 	void *pp_handle = adev->powerplay.pp_handle;
6266 
6267 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6268 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6269 			pp_support_state = PP_STATE_SUPPORT_LS;
6270 			pp_state = PP_STATE_LS;
6271 		}
6272 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6273 			pp_support_state |= PP_STATE_SUPPORT_CG;
6274 			pp_state |= PP_STATE_CG;
6275 		}
6276 		if (state == AMD_CG_STATE_UNGATE)
6277 			pp_state = 0;
6278 
6279 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6280 				PP_BLOCK_GFX_CG,
6281 				pp_support_state,
6282 				pp_state);
6283 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6284 	}
6285 
6286 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6287 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6288 			pp_support_state = PP_STATE_SUPPORT_LS;
6289 			pp_state = PP_STATE_LS;
6290 		}
6291 
6292 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6293 			pp_support_state |= PP_STATE_SUPPORT_CG;
6294 			pp_state |= PP_STATE_CG;
6295 		}
6296 
6297 		if (state == AMD_CG_STATE_UNGATE)
6298 			pp_state = 0;
6299 
6300 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6301 				PP_BLOCK_GFX_MG,
6302 				pp_support_state,
6303 				pp_state);
6304 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6305 	}
6306 
6307 	return 0;
6308 }
6309 
6310 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6311 					  enum amd_clockgating_state state)
6312 {
6313 
6314 	uint32_t msg_id, pp_state = 0;
6315 	uint32_t pp_support_state = 0;
6316 	void *pp_handle = adev->powerplay.pp_handle;
6317 
6318 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6319 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6320 			pp_support_state = PP_STATE_SUPPORT_LS;
6321 			pp_state = PP_STATE_LS;
6322 		}
6323 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6324 			pp_support_state |= PP_STATE_SUPPORT_CG;
6325 			pp_state |= PP_STATE_CG;
6326 		}
6327 		if (state == AMD_CG_STATE_UNGATE)
6328 			pp_state = 0;
6329 
6330 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6331 				PP_BLOCK_GFX_CG,
6332 				pp_support_state,
6333 				pp_state);
6334 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6335 	}
6336 
6337 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6338 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6339 			pp_support_state = PP_STATE_SUPPORT_LS;
6340 			pp_state = PP_STATE_LS;
6341 		}
6342 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6343 			pp_support_state |= PP_STATE_SUPPORT_CG;
6344 			pp_state |= PP_STATE_CG;
6345 		}
6346 		if (state == AMD_CG_STATE_UNGATE)
6347 			pp_state = 0;
6348 
6349 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6350 				PP_BLOCK_GFX_3D,
6351 				pp_support_state,
6352 				pp_state);
6353 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6354 	}
6355 
6356 	if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6357 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6358 			pp_support_state = PP_STATE_SUPPORT_LS;
6359 			pp_state = PP_STATE_LS;
6360 		}
6361 
6362 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6363 			pp_support_state |= PP_STATE_SUPPORT_CG;
6364 			pp_state |= PP_STATE_CG;
6365 		}
6366 
6367 		if (state == AMD_CG_STATE_UNGATE)
6368 			pp_state = 0;
6369 
6370 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6371 				PP_BLOCK_GFX_MG,
6372 				pp_support_state,
6373 				pp_state);
6374 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6375 	}
6376 
6377 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6378 		pp_support_state = PP_STATE_SUPPORT_LS;
6379 
6380 		if (state == AMD_CG_STATE_UNGATE)
6381 			pp_state = 0;
6382 		else
6383 			pp_state = PP_STATE_LS;
6384 
6385 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6386 				PP_BLOCK_GFX_RLC,
6387 				pp_support_state,
6388 				pp_state);
6389 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6390 	}
6391 
6392 	if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6393 		pp_support_state = PP_STATE_SUPPORT_LS;
6394 
6395 		if (state == AMD_CG_STATE_UNGATE)
6396 			pp_state = 0;
6397 		else
6398 			pp_state = PP_STATE_LS;
6399 		msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6400 			PP_BLOCK_GFX_CP,
6401 			pp_support_state,
6402 			pp_state);
6403 		amd_set_clockgating_by_smu(pp_handle, msg_id);
6404 	}
6405 
6406 	return 0;
6407 }
6408 
6409 static int gfx_v8_0_set_clockgating_state(void *handle,
6410 					  enum amd_clockgating_state state)
6411 {
6412 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6413 
6414 	switch (adev->asic_type) {
6415 	case CHIP_FIJI:
6416 	case CHIP_CARRIZO:
6417 	case CHIP_STONEY:
6418 		gfx_v8_0_update_gfx_clock_gating(adev,
6419 						 state == AMD_CG_STATE_GATE ? true : false);
6420 		break;
6421 	case CHIP_TONGA:
6422 		gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6423 		break;
6424 	case CHIP_POLARIS10:
6425 	case CHIP_POLARIS11:
6426 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6427 		break;
6428 	default:
6429 		break;
6430 	}
6431 	return 0;
6432 }
6433 
6434 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6435 {
6436 	return ring->adev->wb.wb[ring->rptr_offs];
6437 }
6438 
6439 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6440 {
6441 	struct amdgpu_device *adev = ring->adev;
6442 
6443 	if (ring->use_doorbell)
6444 		/* XXX check if swapping is necessary on BE */
6445 		return ring->adev->wb.wb[ring->wptr_offs];
6446 	else
6447 		return RREG32(mmCP_RB0_WPTR);
6448 }
6449 
6450 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6451 {
6452 	struct amdgpu_device *adev = ring->adev;
6453 
6454 	if (ring->use_doorbell) {
6455 		/* XXX check if swapping is necessary on BE */
6456 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
6457 		WDOORBELL32(ring->doorbell_index, ring->wptr);
6458 	} else {
6459 		WREG32(mmCP_RB0_WPTR, ring->wptr);
6460 		(void)RREG32(mmCP_RB0_WPTR);
6461 	}
6462 }
6463 
6464 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6465 {
6466 	u32 ref_and_mask, reg_mem_engine;
6467 
6468 	if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6469 	    (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6470 		switch (ring->me) {
6471 		case 1:
6472 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6473 			break;
6474 		case 2:
6475 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6476 			break;
6477 		default:
6478 			return;
6479 		}
6480 		reg_mem_engine = 0;
6481 	} else {
6482 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6483 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6484 	}
6485 
6486 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6487 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6488 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
6489 				 reg_mem_engine));
6490 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6491 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6492 	amdgpu_ring_write(ring, ref_and_mask);
6493 	amdgpu_ring_write(ring, ref_and_mask);
6494 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6495 }
6496 
6497 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6498 {
6499 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6500 	amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6501 		EVENT_INDEX(4));
6502 
6503 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6504 	amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6505 		EVENT_INDEX(0));
6506 }
6507 
6508 
6509 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6510 {
6511 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6512 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6513 				 WRITE_DATA_DST_SEL(0) |
6514 				 WR_CONFIRM));
6515 	amdgpu_ring_write(ring, mmHDP_DEBUG0);
6516 	amdgpu_ring_write(ring, 0);
6517 	amdgpu_ring_write(ring, 1);
6518 
6519 }
6520 
6521 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6522 				      struct amdgpu_ib *ib,
6523 				      unsigned vm_id, bool ctx_switch)
6524 {
6525 	u32 header, control = 0;
6526 
6527 	if (ib->flags & AMDGPU_IB_FLAG_CE)
6528 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6529 	else
6530 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6531 
6532 	control |= ib->length_dw | (vm_id << 24);
6533 
6534 	amdgpu_ring_write(ring, header);
6535 	amdgpu_ring_write(ring,
6536 #ifdef __BIG_ENDIAN
6537 			  (2 << 0) |
6538 #endif
6539 			  (ib->gpu_addr & 0xFFFFFFFC));
6540 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6541 	amdgpu_ring_write(ring, control);
6542 }
6543 
6544 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6545 					  struct amdgpu_ib *ib,
6546 					  unsigned vm_id, bool ctx_switch)
6547 {
6548 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6549 
6550 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6551 	amdgpu_ring_write(ring,
6552 #ifdef __BIG_ENDIAN
6553 				(2 << 0) |
6554 #endif
6555 				(ib->gpu_addr & 0xFFFFFFFC));
6556 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6557 	amdgpu_ring_write(ring, control);
6558 }
6559 
6560 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6561 					 u64 seq, unsigned flags)
6562 {
6563 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6564 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6565 
6566 	/* EVENT_WRITE_EOP - flush caches, send int */
6567 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6568 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6569 				 EOP_TC_ACTION_EN |
6570 				 EOP_TC_WB_ACTION_EN |
6571 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6572 				 EVENT_INDEX(5)));
6573 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6574 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6575 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6576 	amdgpu_ring_write(ring, lower_32_bits(seq));
6577 	amdgpu_ring_write(ring, upper_32_bits(seq));
6578 
6579 }
6580 
6581 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6582 {
6583 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6584 	uint32_t seq = ring->fence_drv.sync_seq;
6585 	uint64_t addr = ring->fence_drv.gpu_addr;
6586 
6587 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6588 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6589 				 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6590 				 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6591 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6592 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6593 	amdgpu_ring_write(ring, seq);
6594 	amdgpu_ring_write(ring, 0xffffffff);
6595 	amdgpu_ring_write(ring, 4); /* poll interval */
6596 }
6597 
6598 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6599 					unsigned vm_id, uint64_t pd_addr)
6600 {
6601 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6602 
6603 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6604 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6605 				 WRITE_DATA_DST_SEL(0)) |
6606 				 WR_CONFIRM);
6607 	if (vm_id < 8) {
6608 		amdgpu_ring_write(ring,
6609 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6610 	} else {
6611 		amdgpu_ring_write(ring,
6612 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6613 	}
6614 	amdgpu_ring_write(ring, 0);
6615 	amdgpu_ring_write(ring, pd_addr >> 12);
6616 
6617 	/* bits 0-15 are the VM contexts0-15 */
6618 	/* invalidate the cache */
6619 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6620 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6621 				 WRITE_DATA_DST_SEL(0)));
6622 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6623 	amdgpu_ring_write(ring, 0);
6624 	amdgpu_ring_write(ring, 1 << vm_id);
6625 
6626 	/* wait for the invalidate to complete */
6627 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6628 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6629 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6630 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6631 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6632 	amdgpu_ring_write(ring, 0);
6633 	amdgpu_ring_write(ring, 0); /* ref */
6634 	amdgpu_ring_write(ring, 0); /* mask */
6635 	amdgpu_ring_write(ring, 0x20); /* poll interval */
6636 
6637 	/* compute doesn't have PFP */
6638 	if (usepfp) {
6639 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6640 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6641 		amdgpu_ring_write(ring, 0x0);
6642 		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6643 		amdgpu_ring_insert_nop(ring, 128);
6644 	}
6645 }
6646 
6647 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6648 {
6649 	return ring->adev->wb.wb[ring->wptr_offs];
6650 }
6651 
6652 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6653 {
6654 	struct amdgpu_device *adev = ring->adev;
6655 
6656 	/* XXX check if swapping is necessary on BE */
6657 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
6658 	WDOORBELL32(ring->doorbell_index, ring->wptr);
6659 }
6660 
6661 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6662 					     u64 addr, u64 seq,
6663 					     unsigned flags)
6664 {
6665 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6666 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6667 
6668 	/* RELEASE_MEM - flush caches, send int */
6669 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6670 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6671 				 EOP_TC_ACTION_EN |
6672 				 EOP_TC_WB_ACTION_EN |
6673 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6674 				 EVENT_INDEX(5)));
6675 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6676 	amdgpu_ring_write(ring, addr & 0xfffffffc);
6677 	amdgpu_ring_write(ring, upper_32_bits(addr));
6678 	amdgpu_ring_write(ring, lower_32_bits(seq));
6679 	amdgpu_ring_write(ring, upper_32_bits(seq));
6680 }
6681 
6682 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6683 					 u64 seq, unsigned int flags)
6684 {
6685 	/* we only allocate 32bit for each seq wb address */
6686 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6687 
6688 	/* write fence seq to the "addr" */
6689 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6690 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6691 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6692 	amdgpu_ring_write(ring, lower_32_bits(addr));
6693 	amdgpu_ring_write(ring, upper_32_bits(addr));
6694 	amdgpu_ring_write(ring, lower_32_bits(seq));
6695 
6696 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6697 		/* set register to trigger INT */
6698 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6699 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6700 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6701 		amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6702 		amdgpu_ring_write(ring, 0);
6703 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6704 	}
6705 }
6706 
6707 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6708 {
6709 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6710 	amdgpu_ring_write(ring, 0);
6711 }
6712 
6713 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6714 {
6715 	uint32_t dw2 = 0;
6716 
6717 	if (amdgpu_sriov_vf(ring->adev))
6718 		gfx_v8_0_ring_emit_ce_meta_init(ring,
6719 			(flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6720 
6721 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6722 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6723 		gfx_v8_0_ring_emit_vgt_flush(ring);
6724 		/* set load_global_config & load_global_uconfig */
6725 		dw2 |= 0x8001;
6726 		/* set load_cs_sh_regs */
6727 		dw2 |= 0x01000000;
6728 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6729 		dw2 |= 0x10002;
6730 
6731 		/* set load_ce_ram if preamble presented */
6732 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6733 			dw2 |= 0x10000000;
6734 	} else {
6735 		/* still load_ce_ram if this is the first time preamble presented
6736 		 * although there is no context switch happens.
6737 		 */
6738 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6739 			dw2 |= 0x10000000;
6740 	}
6741 
6742 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6743 	amdgpu_ring_write(ring, dw2);
6744 	amdgpu_ring_write(ring, 0);
6745 
6746 	if (amdgpu_sriov_vf(ring->adev))
6747 		gfx_v8_0_ring_emit_de_meta_init(ring,
6748 			(flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
6749 }
6750 
6751 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6752 {
6753 	struct amdgpu_device *adev = ring->adev;
6754 
6755 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6756 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6757 				(5 << 8) |	/* dst: memory */
6758 				(1 << 20));	/* write confirm */
6759 	amdgpu_ring_write(ring, reg);
6760 	amdgpu_ring_write(ring, 0);
6761 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6762 				adev->virt.reg_val_offs * 4));
6763 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6764 				adev->virt.reg_val_offs * 4));
6765 }
6766 
6767 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6768 				  uint32_t val)
6769 {
6770 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6771 	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6772 	amdgpu_ring_write(ring, reg);
6773 	amdgpu_ring_write(ring, 0);
6774 	amdgpu_ring_write(ring, val);
6775 }
6776 
6777 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6778 						 enum amdgpu_interrupt_state state)
6779 {
6780 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6781 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6782 }
6783 
6784 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6785 						     int me, int pipe,
6786 						     enum amdgpu_interrupt_state state)
6787 {
6788 	/*
6789 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6790 	 * handles the setting of interrupts for this specific pipe. All other
6791 	 * pipes' interrupts are set by amdkfd.
6792 	 */
6793 
6794 	if (me == 1) {
6795 		switch (pipe) {
6796 		case 0:
6797 			break;
6798 		default:
6799 			DRM_DEBUG("invalid pipe %d\n", pipe);
6800 			return;
6801 		}
6802 	} else {
6803 		DRM_DEBUG("invalid me %d\n", me);
6804 		return;
6805 	}
6806 
6807 	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6808 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6809 }
6810 
6811 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6812 					     struct amdgpu_irq_src *source,
6813 					     unsigned type,
6814 					     enum amdgpu_interrupt_state state)
6815 {
6816 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6817 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6818 
6819 	return 0;
6820 }
6821 
6822 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6823 					      struct amdgpu_irq_src *source,
6824 					      unsigned type,
6825 					      enum amdgpu_interrupt_state state)
6826 {
6827 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6828 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6829 
6830 	return 0;
6831 }
6832 
6833 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6834 					    struct amdgpu_irq_src *src,
6835 					    unsigned type,
6836 					    enum amdgpu_interrupt_state state)
6837 {
6838 	switch (type) {
6839 	case AMDGPU_CP_IRQ_GFX_EOP:
6840 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6841 		break;
6842 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6843 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6844 		break;
6845 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6846 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6847 		break;
6848 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6849 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6850 		break;
6851 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6852 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6853 		break;
6854 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6855 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6856 		break;
6857 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6858 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6859 		break;
6860 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6861 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6862 		break;
6863 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6864 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6865 		break;
6866 	default:
6867 		break;
6868 	}
6869 	return 0;
6870 }
6871 
6872 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6873 			    struct amdgpu_irq_src *source,
6874 			    struct amdgpu_iv_entry *entry)
6875 {
6876 	int i;
6877 	u8 me_id, pipe_id, queue_id;
6878 	struct amdgpu_ring *ring;
6879 
6880 	DRM_DEBUG("IH: CP EOP\n");
6881 	me_id = (entry->ring_id & 0x0c) >> 2;
6882 	pipe_id = (entry->ring_id & 0x03) >> 0;
6883 	queue_id = (entry->ring_id & 0x70) >> 4;
6884 
6885 	switch (me_id) {
6886 	case 0:
6887 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6888 		break;
6889 	case 1:
6890 	case 2:
6891 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6892 			ring = &adev->gfx.compute_ring[i];
6893 			/* Per-queue interrupt is supported for MEC starting from VI.
6894 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6895 			  */
6896 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6897 				amdgpu_fence_process(ring);
6898 		}
6899 		break;
6900 	}
6901 	return 0;
6902 }
6903 
6904 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6905 				 struct amdgpu_irq_src *source,
6906 				 struct amdgpu_iv_entry *entry)
6907 {
6908 	DRM_ERROR("Illegal register access in command stream\n");
6909 	schedule_work(&adev->reset_work);
6910 	return 0;
6911 }
6912 
6913 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6914 				  struct amdgpu_irq_src *source,
6915 				  struct amdgpu_iv_entry *entry)
6916 {
6917 	DRM_ERROR("Illegal instruction in command stream\n");
6918 	schedule_work(&adev->reset_work);
6919 	return 0;
6920 }
6921 
6922 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6923 					    struct amdgpu_irq_src *src,
6924 					    unsigned int type,
6925 					    enum amdgpu_interrupt_state state)
6926 {
6927 	uint32_t tmp, target;
6928 	struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
6929 
6930 	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6931 
6932 	if (ring->me == 1)
6933 		target = mmCP_ME1_PIPE0_INT_CNTL;
6934 	else
6935 		target = mmCP_ME2_PIPE0_INT_CNTL;
6936 	target += ring->pipe;
6937 
6938 	switch (type) {
6939 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6940 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6941 			tmp = RREG32(mmCPC_INT_CNTL);
6942 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6943 						 GENERIC2_INT_ENABLE, 0);
6944 			WREG32(mmCPC_INT_CNTL, tmp);
6945 
6946 			tmp = RREG32(target);
6947 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6948 						 GENERIC2_INT_ENABLE, 0);
6949 			WREG32(target, tmp);
6950 		} else {
6951 			tmp = RREG32(mmCPC_INT_CNTL);
6952 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6953 						 GENERIC2_INT_ENABLE, 1);
6954 			WREG32(mmCPC_INT_CNTL, tmp);
6955 
6956 			tmp = RREG32(target);
6957 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6958 						 GENERIC2_INT_ENABLE, 1);
6959 			WREG32(target, tmp);
6960 		}
6961 		break;
6962 	default:
6963 		BUG(); /* kiq only support GENERIC2_INT now */
6964 		break;
6965 	}
6966 	return 0;
6967 }
6968 
6969 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6970 			    struct amdgpu_irq_src *source,
6971 			    struct amdgpu_iv_entry *entry)
6972 {
6973 	u8 me_id, pipe_id, queue_id;
6974 	struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
6975 
6976 	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6977 
6978 	me_id = (entry->ring_id & 0x0c) >> 2;
6979 	pipe_id = (entry->ring_id & 0x03) >> 0;
6980 	queue_id = (entry->ring_id & 0x70) >> 4;
6981 	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6982 		   me_id, pipe_id, queue_id);
6983 
6984 	amdgpu_fence_process(ring);
6985 	return 0;
6986 }
6987 
6988 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6989 	.name = "gfx_v8_0",
6990 	.early_init = gfx_v8_0_early_init,
6991 	.late_init = gfx_v8_0_late_init,
6992 	.sw_init = gfx_v8_0_sw_init,
6993 	.sw_fini = gfx_v8_0_sw_fini,
6994 	.hw_init = gfx_v8_0_hw_init,
6995 	.hw_fini = gfx_v8_0_hw_fini,
6996 	.suspend = gfx_v8_0_suspend,
6997 	.resume = gfx_v8_0_resume,
6998 	.is_idle = gfx_v8_0_is_idle,
6999 	.wait_for_idle = gfx_v8_0_wait_for_idle,
7000 	.check_soft_reset = gfx_v8_0_check_soft_reset,
7001 	.pre_soft_reset = gfx_v8_0_pre_soft_reset,
7002 	.soft_reset = gfx_v8_0_soft_reset,
7003 	.post_soft_reset = gfx_v8_0_post_soft_reset,
7004 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
7005 	.set_powergating_state = gfx_v8_0_set_powergating_state,
7006 	.get_clockgating_state = gfx_v8_0_get_clockgating_state,
7007 };
7008 
7009 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
7010 	.type = AMDGPU_RING_TYPE_GFX,
7011 	.align_mask = 0xff,
7012 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7013 	.get_rptr = gfx_v8_0_ring_get_rptr,
7014 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
7015 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
7016 	.emit_frame_size =
7017 		20 + /* gfx_v8_0_ring_emit_gds_switch */
7018 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7019 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7020 		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7021 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7022 		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
7023 		2 + /* gfx_v8_ring_emit_sb */
7024 		3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
7025 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
7026 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
7027 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
7028 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7029 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7030 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7031 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7032 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7033 	.test_ring = gfx_v8_0_ring_test_ring,
7034 	.test_ib = gfx_v8_0_ring_test_ib,
7035 	.insert_nop = amdgpu_ring_insert_nop,
7036 	.pad_ib = amdgpu_ring_generic_pad_ib,
7037 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
7038 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
7039 };
7040 
7041 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7042 	.type = AMDGPU_RING_TYPE_COMPUTE,
7043 	.align_mask = 0xff,
7044 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7045 	.get_rptr = gfx_v8_0_ring_get_rptr,
7046 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7047 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7048 	.emit_frame_size =
7049 		20 + /* gfx_v8_0_ring_emit_gds_switch */
7050 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7051 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7052 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7053 		17 + /* gfx_v8_0_ring_emit_vm_flush */
7054 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7055 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
7056 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
7057 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
7058 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
7059 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
7060 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
7061 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7062 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7063 	.test_ring = gfx_v8_0_ring_test_ring,
7064 	.test_ib = gfx_v8_0_ring_test_ib,
7065 	.insert_nop = amdgpu_ring_insert_nop,
7066 	.pad_ib = amdgpu_ring_generic_pad_ib,
7067 };
7068 
7069 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
7070 	.type = AMDGPU_RING_TYPE_KIQ,
7071 	.align_mask = 0xff,
7072 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7073 	.get_rptr = gfx_v8_0_ring_get_rptr,
7074 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
7075 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
7076 	.emit_frame_size =
7077 		20 + /* gfx_v8_0_ring_emit_gds_switch */
7078 		7 + /* gfx_v8_0_ring_emit_hdp_flush */
7079 		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
7080 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
7081 		17 + /* gfx_v8_0_ring_emit_vm_flush */
7082 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7083 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
7084 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
7085 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
7086 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
7087 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
7088 	.test_ring = gfx_v8_0_ring_test_ring,
7089 	.test_ib = gfx_v8_0_ring_test_ib,
7090 	.insert_nop = amdgpu_ring_insert_nop,
7091 	.pad_ib = amdgpu_ring_generic_pad_ib,
7092 	.emit_rreg = gfx_v8_0_ring_emit_rreg,
7093 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
7094 };
7095 
7096 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7097 {
7098 	int i;
7099 
7100 	adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7101 
7102 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7103 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7104 
7105 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7106 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7107 }
7108 
7109 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7110 	.set = gfx_v8_0_set_eop_interrupt_state,
7111 	.process = gfx_v8_0_eop_irq,
7112 };
7113 
7114 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7115 	.set = gfx_v8_0_set_priv_reg_fault_state,
7116 	.process = gfx_v8_0_priv_reg_irq,
7117 };
7118 
7119 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7120 	.set = gfx_v8_0_set_priv_inst_fault_state,
7121 	.process = gfx_v8_0_priv_inst_irq,
7122 };
7123 
7124 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7125 	.set = gfx_v8_0_kiq_set_interrupt_state,
7126 	.process = gfx_v8_0_kiq_irq,
7127 };
7128 
7129 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7130 {
7131 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7132 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7133 
7134 	adev->gfx.priv_reg_irq.num_types = 1;
7135 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7136 
7137 	adev->gfx.priv_inst_irq.num_types = 1;
7138 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7139 
7140 	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7141 	adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7142 }
7143 
7144 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7145 {
7146 	adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7147 }
7148 
7149 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7150 {
7151 	/* init asci gds info */
7152 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7153 	adev->gds.gws.total_size = 64;
7154 	adev->gds.oa.total_size = 16;
7155 
7156 	if (adev->gds.mem.total_size == 64 * 1024) {
7157 		adev->gds.mem.gfx_partition_size = 4096;
7158 		adev->gds.mem.cs_partition_size = 4096;
7159 
7160 		adev->gds.gws.gfx_partition_size = 4;
7161 		adev->gds.gws.cs_partition_size = 4;
7162 
7163 		adev->gds.oa.gfx_partition_size = 4;
7164 		adev->gds.oa.cs_partition_size = 1;
7165 	} else {
7166 		adev->gds.mem.gfx_partition_size = 1024;
7167 		adev->gds.mem.cs_partition_size = 1024;
7168 
7169 		adev->gds.gws.gfx_partition_size = 16;
7170 		adev->gds.gws.cs_partition_size = 16;
7171 
7172 		adev->gds.oa.gfx_partition_size = 4;
7173 		adev->gds.oa.cs_partition_size = 4;
7174 	}
7175 }
7176 
7177 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7178 						 u32 bitmap)
7179 {
7180 	u32 data;
7181 
7182 	if (!bitmap)
7183 		return;
7184 
7185 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7186 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7187 
7188 	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7189 }
7190 
7191 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7192 {
7193 	u32 data, mask;
7194 
7195 	data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7196 		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7197 
7198 	mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7199 
7200 	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7201 }
7202 
7203 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7204 {
7205 	int i, j, k, counter, active_cu_number = 0;
7206 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7207 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7208 	unsigned disable_masks[4 * 2];
7209 
7210 	memset(cu_info, 0, sizeof(*cu_info));
7211 
7212 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7213 
7214 	mutex_lock(&adev->grbm_idx_mutex);
7215 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7216 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7217 			mask = 1;
7218 			ao_bitmap = 0;
7219 			counter = 0;
7220 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7221 			if (i < 4 && j < 2)
7222 				gfx_v8_0_set_user_cu_inactive_bitmap(
7223 					adev, disable_masks[i * 2 + j]);
7224 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7225 			cu_info->bitmap[i][j] = bitmap;
7226 
7227 			for (k = 0; k < 16; k ++) {
7228 				if (bitmap & mask) {
7229 					if (counter < 2)
7230 						ao_bitmap |= mask;
7231 					counter ++;
7232 				}
7233 				mask <<= 1;
7234 			}
7235 			active_cu_number += counter;
7236 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7237 		}
7238 	}
7239 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7240 	mutex_unlock(&adev->grbm_idx_mutex);
7241 
7242 	cu_info->number = active_cu_number;
7243 	cu_info->ao_cu_mask = ao_cu_mask;
7244 }
7245 
7246 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7247 {
7248 	.type = AMD_IP_BLOCK_TYPE_GFX,
7249 	.major = 8,
7250 	.minor = 0,
7251 	.rev = 0,
7252 	.funcs = &gfx_v8_0_ip_funcs,
7253 };
7254 
7255 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7256 {
7257 	.type = AMD_IP_BLOCK_TYPE_GFX,
7258 	.major = 8,
7259 	.minor = 1,
7260 	.rev = 0,
7261 	.funcs = &gfx_v8_0_ip_funcs,
7262 };
7263 
7264 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7265 {
7266 	uint64_t ce_payload_addr;
7267 	int cnt_ce;
7268 	static union {
7269 		struct amdgpu_ce_ib_state regular;
7270 		struct amdgpu_ce_ib_state_chained_ib chained;
7271 	} ce_payload = {};
7272 
7273 	if (ring->adev->virt.chained_ib_support) {
7274 		ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload);
7275 		cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7276 	} else {
7277 		ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload);
7278 		cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7279 	}
7280 
7281 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7282 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7283 				WRITE_DATA_DST_SEL(8) |
7284 				WR_CONFIRM) |
7285 				WRITE_DATA_CACHE_POLICY(0));
7286 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7287 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7288 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7289 }
7290 
7291 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr)
7292 {
7293 	uint64_t de_payload_addr, gds_addr;
7294 	int cnt_de;
7295 	static union {
7296 		struct amdgpu_de_ib_state regular;
7297 		struct amdgpu_de_ib_state_chained_ib chained;
7298 	} de_payload = {};
7299 
7300 	gds_addr = csa_addr + 4096;
7301 	if (ring->adev->virt.chained_ib_support) {
7302 		de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7303 		de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7304 		de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload);
7305 		cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7306 	} else {
7307 		de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7308 		de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7309 		de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload);
7310 		cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7311 	}
7312 
7313 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7314 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7315 				WRITE_DATA_DST_SEL(8) |
7316 				WR_CONFIRM) |
7317 				WRITE_DATA_CACHE_POLICY(0));
7318 	amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7319 	amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7320 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7321 }
7322