xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c (revision e0a37f85fc95e3f2550446316bc4a27d00d75993)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31 
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34 
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37 
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40 
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45 
46 #include "uvd/uvd_5_0_d.h"
47 #include "uvd/uvd_5_0_sh_mask.h"
48 
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51 
52 #define GFX8_NUM_GFX_RINGS     1
53 #define GFX8_NUM_COMPUTE_RINGS 8
54 
55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58 
59 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68 
69 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
70 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
72 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
73 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
75 
76 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
77 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
79 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
80 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
83 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
85 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
86 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
90 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
92 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
93 MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
97 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
99 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
100 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
102 
103 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
104 {
105 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
106 	{mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
107 	{mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
108 	{mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
109 	{mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
110 	{mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
111 	{mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
112 	{mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
113 	{mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
114 	{mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
115 	{mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
116 	{mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
117 	{mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
118 	{mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
119 	{mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
120 	{mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
121 };
122 
123 static const u32 golden_settings_tonga_a11[] =
124 {
125 	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
126 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
127 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
128 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
129 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
130 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
131 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
132 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
133 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
135 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
136 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
139 	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
140 };
141 
142 static const u32 tonga_golden_common_all[] =
143 {
144 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
151 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
152 };
153 
154 static const u32 tonga_mgcg_cgcg_init[] =
155 {
156 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
231 };
232 
233 static const u32 fiji_golden_common_all[] =
234 {
235 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
236 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
237 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
238 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003,
239 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
240 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
241 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
242 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
243 };
244 
245 static const u32 golden_settings_fiji_a10[] =
246 {
247 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
248 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
249 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
250 	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x00000100,
251 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
252 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
253 	mmTCC_CTRL, 0x00100000, 0xf30fff7f,
254 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
255 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x7d6cf5e4,
256 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x3928b1a0,
257 };
258 
259 static const u32 fiji_mgcg_cgcg_init[] =
260 {
261 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffc0,
262 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
263 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
264 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
265 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
266 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
267 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
268 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
269 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
270 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
271 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
272 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
273 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
274 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
275 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
276 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
277 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
278 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
279 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
280 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
281 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
282 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
283 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
284 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
285 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
286 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
287 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
288 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
289 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
290 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
291 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
292 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
293 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
294 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
295 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
296 };
297 
298 static const u32 golden_settings_iceland_a11[] =
299 {
300 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
301 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
302 	mmDB_DEBUG3, 0xc0000000, 0xc0000000,
303 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
304 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
305 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
306 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
307 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
308 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
309 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
310 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
311 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
312 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
313 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
314 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
315 };
316 
317 static const u32 iceland_golden_common_all[] =
318 {
319 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
320 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
321 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
322 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
323 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
324 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
325 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
326 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
327 };
328 
329 static const u32 iceland_mgcg_cgcg_init[] =
330 {
331 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
332 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
333 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
334 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
335 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
336 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
337 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
338 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
339 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
340 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
341 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
342 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
343 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
344 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
345 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
346 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
347 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
348 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
349 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
350 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
351 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
352 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
353 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
354 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
355 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
356 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
357 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
358 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
359 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
360 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
361 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
363 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
364 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
365 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
366 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
367 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
368 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
369 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
370 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
371 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
372 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
373 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
374 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
375 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
376 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
377 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
378 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
379 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
380 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
381 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
382 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
390 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
393 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
394 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
395 };
396 
397 static const u32 cz_golden_settings_a11[] =
398 {
399 	mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
400 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
401 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
402 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
403 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
405 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
406 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
407 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
408 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
409 };
410 
411 static const u32 cz_golden_common_all[] =
412 {
413 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
415 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
416 	mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
417 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
418 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
419 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
420 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
421 };
422 
423 static const u32 cz_mgcg_cgcg_init[] =
424 {
425 	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
426 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
427 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
428 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
429 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
430 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
431 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
432 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
433 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
434 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
435 	mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
436 	mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
437 	mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
438 	mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
439 	mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
440 	mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
441 	mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
442 	mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
443 	mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
444 	mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
445 	mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
446 	mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
447 	mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
448 	mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
449 	mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
450 	mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
451 	mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
452 	mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
453 	mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
454 	mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
455 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
456 	mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
457 	mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
458 	mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
459 	mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
460 	mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
461 	mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
462 	mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
463 	mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
464 	mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
465 	mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
466 	mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 	mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 	mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
469 	mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 	mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 	mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 	mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 	mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 	mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 	mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 	mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 	mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 	mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479 	mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 	mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 	mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 	mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 	mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 	mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 	mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 	mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 	mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 	mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 	mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 	mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 	mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 	mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 	mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 	mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 	mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
499 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
500 };
501 
502 static const u32 stoney_golden_settings_a11[] =
503 {
504 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
506 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
507 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
508 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
509 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
510   	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
511 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
512 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
513 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
514 };
515 
516 static const u32 stoney_golden_common_all[] =
517 {
518 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
519 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
520 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
521 	mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
522 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
523 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
524 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
525 	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
526 };
527 
528 static const u32 stoney_mgcg_cgcg_init[] =
529 {
530 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
532 	mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
533 	mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
534 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
535 	mmATC_MISC_CG, 0xffffffff, 0x000c0200,
536 };
537 
538 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
539 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
540 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
541 
542 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
543 {
544 	switch (adev->asic_type) {
545 	case CHIP_TOPAZ:
546 		amdgpu_program_register_sequence(adev,
547 						 iceland_mgcg_cgcg_init,
548 						 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
549 		amdgpu_program_register_sequence(adev,
550 						 golden_settings_iceland_a11,
551 						 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
552 		amdgpu_program_register_sequence(adev,
553 						 iceland_golden_common_all,
554 						 (const u32)ARRAY_SIZE(iceland_golden_common_all));
555 		break;
556 	case CHIP_FIJI:
557 		amdgpu_program_register_sequence(adev,
558 						 fiji_mgcg_cgcg_init,
559 						 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
560 		amdgpu_program_register_sequence(adev,
561 						 golden_settings_fiji_a10,
562 						 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
563 		amdgpu_program_register_sequence(adev,
564 						 fiji_golden_common_all,
565 						 (const u32)ARRAY_SIZE(fiji_golden_common_all));
566 		break;
567 
568 	case CHIP_TONGA:
569 		amdgpu_program_register_sequence(adev,
570 						 tonga_mgcg_cgcg_init,
571 						 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
572 		amdgpu_program_register_sequence(adev,
573 						 golden_settings_tonga_a11,
574 						 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
575 		amdgpu_program_register_sequence(adev,
576 						 tonga_golden_common_all,
577 						 (const u32)ARRAY_SIZE(tonga_golden_common_all));
578 		break;
579 	case CHIP_CARRIZO:
580 		amdgpu_program_register_sequence(adev,
581 						 cz_mgcg_cgcg_init,
582 						 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
583 		amdgpu_program_register_sequence(adev,
584 						 cz_golden_settings_a11,
585 						 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
586 		amdgpu_program_register_sequence(adev,
587 						 cz_golden_common_all,
588 						 (const u32)ARRAY_SIZE(cz_golden_common_all));
589 		break;
590 	case CHIP_STONEY:
591 		amdgpu_program_register_sequence(adev,
592 						 stoney_mgcg_cgcg_init,
593 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
594 		amdgpu_program_register_sequence(adev,
595 						 stoney_golden_settings_a11,
596 						 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
597 		amdgpu_program_register_sequence(adev,
598 						 stoney_golden_common_all,
599 						 (const u32)ARRAY_SIZE(stoney_golden_common_all));
600 		break;
601 	default:
602 		break;
603 	}
604 }
605 
606 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
607 {
608 	int i;
609 
610 	adev->gfx.scratch.num_reg = 7;
611 	adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
612 	for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
613 		adev->gfx.scratch.free[i] = true;
614 		adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
615 	}
616 }
617 
618 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
619 {
620 	struct amdgpu_device *adev = ring->adev;
621 	uint32_t scratch;
622 	uint32_t tmp = 0;
623 	unsigned i;
624 	int r;
625 
626 	r = amdgpu_gfx_scratch_get(adev, &scratch);
627 	if (r) {
628 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
629 		return r;
630 	}
631 	WREG32(scratch, 0xCAFEDEAD);
632 	r = amdgpu_ring_lock(ring, 3);
633 	if (r) {
634 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
635 			  ring->idx, r);
636 		amdgpu_gfx_scratch_free(adev, scratch);
637 		return r;
638 	}
639 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
640 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
641 	amdgpu_ring_write(ring, 0xDEADBEEF);
642 	amdgpu_ring_unlock_commit(ring);
643 
644 	for (i = 0; i < adev->usec_timeout; i++) {
645 		tmp = RREG32(scratch);
646 		if (tmp == 0xDEADBEEF)
647 			break;
648 		DRM_UDELAY(1);
649 	}
650 	if (i < adev->usec_timeout) {
651 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
652 			 ring->idx, i);
653 	} else {
654 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
655 			  ring->idx, scratch, tmp);
656 		r = -EINVAL;
657 	}
658 	amdgpu_gfx_scratch_free(adev, scratch);
659 	return r;
660 }
661 
662 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
663 {
664 	struct amdgpu_device *adev = ring->adev;
665 	struct amdgpu_ib ib;
666 	struct fence *f = NULL;
667 	uint32_t scratch;
668 	uint32_t tmp = 0;
669 	unsigned i;
670 	int r;
671 
672 	r = amdgpu_gfx_scratch_get(adev, &scratch);
673 	if (r) {
674 		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
675 		return r;
676 	}
677 	WREG32(scratch, 0xCAFEDEAD);
678 	memset(&ib, 0, sizeof(ib));
679 	r = amdgpu_ib_get(ring, NULL, 256, &ib);
680 	if (r) {
681 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
682 		goto err1;
683 	}
684 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
685 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
686 	ib.ptr[2] = 0xDEADBEEF;
687 	ib.length_dw = 3;
688 
689 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
690 						 AMDGPU_FENCE_OWNER_UNDEFINED,
691 						 &f);
692 	if (r)
693 		goto err2;
694 
695 	r = fence_wait(f, false);
696 	if (r) {
697 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
698 		goto err2;
699 	}
700 	for (i = 0; i < adev->usec_timeout; i++) {
701 		tmp = RREG32(scratch);
702 		if (tmp == 0xDEADBEEF)
703 			break;
704 		DRM_UDELAY(1);
705 	}
706 	if (i < adev->usec_timeout) {
707 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
708 			 ring->idx, i);
709 		goto err2;
710 	} else {
711 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
712 			  scratch, tmp);
713 		r = -EINVAL;
714 	}
715 err2:
716 	fence_put(f);
717 	amdgpu_ib_free(adev, &ib);
718 err1:
719 	amdgpu_gfx_scratch_free(adev, scratch);
720 	return r;
721 }
722 
723 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
724 {
725 	const char *chip_name;
726 	char fw_name[30];
727 	int err;
728 	struct amdgpu_firmware_info *info = NULL;
729 	const struct common_firmware_header *header = NULL;
730 	const struct gfx_firmware_header_v1_0 *cp_hdr;
731 
732 	DRM_DEBUG("\n");
733 
734 	switch (adev->asic_type) {
735 	case CHIP_TOPAZ:
736 		chip_name = "topaz";
737 		break;
738 	case CHIP_TONGA:
739 		chip_name = "tonga";
740 		break;
741 	case CHIP_CARRIZO:
742 		chip_name = "carrizo";
743 		break;
744 	case CHIP_FIJI:
745 		chip_name = "fiji";
746 		break;
747 	case CHIP_STONEY:
748 		chip_name = "stoney";
749 		break;
750 	default:
751 		BUG();
752 	}
753 
754 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
755 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
756 	if (err)
757 		goto out;
758 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
759 	if (err)
760 		goto out;
761 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
762 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
763 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
764 
765 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
766 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
767 	if (err)
768 		goto out;
769 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
770 	if (err)
771 		goto out;
772 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
773 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
774 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
775 
776 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
777 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
778 	if (err)
779 		goto out;
780 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
781 	if (err)
782 		goto out;
783 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
784 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
785 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
786 
787 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
788 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
789 	if (err)
790 		goto out;
791 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
792 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
793 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
794 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
795 
796 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
797 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
798 	if (err)
799 		goto out;
800 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
801 	if (err)
802 		goto out;
803 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
804 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
805 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
806 
807 	if (adev->asic_type != CHIP_STONEY) {
808 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
809 		err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
810 		if (!err) {
811 			err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
812 			if (err)
813 				goto out;
814 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)
815 				adev->gfx.mec2_fw->data;
816 			adev->gfx.mec2_fw_version =
817 				le32_to_cpu(cp_hdr->header.ucode_version);
818 			adev->gfx.mec2_feature_version =
819 				le32_to_cpu(cp_hdr->ucode_feature_version);
820 		} else {
821 			err = 0;
822 			adev->gfx.mec2_fw = NULL;
823 		}
824 	}
825 
826 	if (adev->firmware.smu_load) {
827 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
828 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
829 		info->fw = adev->gfx.pfp_fw;
830 		header = (const struct common_firmware_header *)info->fw->data;
831 		adev->firmware.fw_size +=
832 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
833 
834 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
835 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
836 		info->fw = adev->gfx.me_fw;
837 		header = (const struct common_firmware_header *)info->fw->data;
838 		adev->firmware.fw_size +=
839 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
840 
841 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
842 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
843 		info->fw = adev->gfx.ce_fw;
844 		header = (const struct common_firmware_header *)info->fw->data;
845 		adev->firmware.fw_size +=
846 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
847 
848 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
849 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
850 		info->fw = adev->gfx.rlc_fw;
851 		header = (const struct common_firmware_header *)info->fw->data;
852 		adev->firmware.fw_size +=
853 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
854 
855 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
856 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
857 		info->fw = adev->gfx.mec_fw;
858 		header = (const struct common_firmware_header *)info->fw->data;
859 		adev->firmware.fw_size +=
860 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
861 
862 		if (adev->gfx.mec2_fw) {
863 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
864 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
865 			info->fw = adev->gfx.mec2_fw;
866 			header = (const struct common_firmware_header *)info->fw->data;
867 			adev->firmware.fw_size +=
868 				ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
869 		}
870 
871 	}
872 
873 out:
874 	if (err) {
875 		dev_err(adev->dev,
876 			"gfx8: Failed to load firmware \"%s\"\n",
877 			fw_name);
878 		release_firmware(adev->gfx.pfp_fw);
879 		adev->gfx.pfp_fw = NULL;
880 		release_firmware(adev->gfx.me_fw);
881 		adev->gfx.me_fw = NULL;
882 		release_firmware(adev->gfx.ce_fw);
883 		adev->gfx.ce_fw = NULL;
884 		release_firmware(adev->gfx.rlc_fw);
885 		adev->gfx.rlc_fw = NULL;
886 		release_firmware(adev->gfx.mec_fw);
887 		adev->gfx.mec_fw = NULL;
888 		release_firmware(adev->gfx.mec2_fw);
889 		adev->gfx.mec2_fw = NULL;
890 	}
891 	return err;
892 }
893 
894 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
895 {
896 	int r;
897 
898 	if (adev->gfx.mec.hpd_eop_obj) {
899 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
900 		if (unlikely(r != 0))
901 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
902 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
903 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
904 
905 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
906 		adev->gfx.mec.hpd_eop_obj = NULL;
907 	}
908 }
909 
910 #define MEC_HPD_SIZE 2048
911 
912 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
913 {
914 	int r;
915 	u32 *hpd;
916 
917 	/*
918 	 * we assign only 1 pipe because all other pipes will
919 	 * be handled by KFD
920 	 */
921 	adev->gfx.mec.num_mec = 1;
922 	adev->gfx.mec.num_pipe = 1;
923 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
924 
925 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
926 		r = amdgpu_bo_create(adev,
927 				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
928 				     PAGE_SIZE, true,
929 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
930 				     &adev->gfx.mec.hpd_eop_obj);
931 		if (r) {
932 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
933 			return r;
934 		}
935 	}
936 
937 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
938 	if (unlikely(r != 0)) {
939 		gfx_v8_0_mec_fini(adev);
940 		return r;
941 	}
942 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
943 			  &adev->gfx.mec.hpd_eop_gpu_addr);
944 	if (r) {
945 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
946 		gfx_v8_0_mec_fini(adev);
947 		return r;
948 	}
949 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
950 	if (r) {
951 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
952 		gfx_v8_0_mec_fini(adev);
953 		return r;
954 	}
955 
956 	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
957 
958 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
959 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
960 
961 	return 0;
962 }
963 
964 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
965 {
966 	u32 gb_addr_config;
967 	u32 mc_shared_chmap, mc_arb_ramcfg;
968 	u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
969 	u32 tmp;
970 
971 	switch (adev->asic_type) {
972 	case CHIP_TOPAZ:
973 		adev->gfx.config.max_shader_engines = 1;
974 		adev->gfx.config.max_tile_pipes = 2;
975 		adev->gfx.config.max_cu_per_sh = 6;
976 		adev->gfx.config.max_sh_per_se = 1;
977 		adev->gfx.config.max_backends_per_se = 2;
978 		adev->gfx.config.max_texture_channel_caches = 2;
979 		adev->gfx.config.max_gprs = 256;
980 		adev->gfx.config.max_gs_threads = 32;
981 		adev->gfx.config.max_hw_contexts = 8;
982 
983 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
984 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
985 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
986 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
987 		gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
988 		break;
989 	case CHIP_FIJI:
990 		adev->gfx.config.max_shader_engines = 4;
991 		adev->gfx.config.max_tile_pipes = 16;
992 		adev->gfx.config.max_cu_per_sh = 16;
993 		adev->gfx.config.max_sh_per_se = 1;
994 		adev->gfx.config.max_backends_per_se = 4;
995 		adev->gfx.config.max_texture_channel_caches = 8;
996 		adev->gfx.config.max_gprs = 256;
997 		adev->gfx.config.max_gs_threads = 32;
998 		adev->gfx.config.max_hw_contexts = 8;
999 
1000 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1001 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1002 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1003 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1004 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1005 		break;
1006 	case CHIP_TONGA:
1007 		adev->gfx.config.max_shader_engines = 4;
1008 		adev->gfx.config.max_tile_pipes = 8;
1009 		adev->gfx.config.max_cu_per_sh = 8;
1010 		adev->gfx.config.max_sh_per_se = 1;
1011 		adev->gfx.config.max_backends_per_se = 2;
1012 		adev->gfx.config.max_texture_channel_caches = 8;
1013 		adev->gfx.config.max_gprs = 256;
1014 		adev->gfx.config.max_gs_threads = 32;
1015 		adev->gfx.config.max_hw_contexts = 8;
1016 
1017 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1018 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1019 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1020 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1021 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1022 		break;
1023 	case CHIP_CARRIZO:
1024 		adev->gfx.config.max_shader_engines = 1;
1025 		adev->gfx.config.max_tile_pipes = 2;
1026 		adev->gfx.config.max_sh_per_se = 1;
1027 		adev->gfx.config.max_backends_per_se = 2;
1028 
1029 		switch (adev->pdev->revision) {
1030 		case 0xc4:
1031 		case 0x84:
1032 		case 0xc8:
1033 		case 0xcc:
1034 			/* B10 */
1035 			adev->gfx.config.max_cu_per_sh = 8;
1036 			break;
1037 		case 0xc5:
1038 		case 0x81:
1039 		case 0x85:
1040 		case 0xc9:
1041 		case 0xcd:
1042 			/* B8 */
1043 			adev->gfx.config.max_cu_per_sh = 6;
1044 			break;
1045 		case 0xc6:
1046 		case 0xca:
1047 		case 0xce:
1048 			/* B6 */
1049 			adev->gfx.config.max_cu_per_sh = 6;
1050 			break;
1051 		case 0xc7:
1052 		case 0x87:
1053 		case 0xcb:
1054 		default:
1055 			/* B4 */
1056 			adev->gfx.config.max_cu_per_sh = 4;
1057 			break;
1058 		}
1059 
1060 		adev->gfx.config.max_texture_channel_caches = 2;
1061 		adev->gfx.config.max_gprs = 256;
1062 		adev->gfx.config.max_gs_threads = 32;
1063 		adev->gfx.config.max_hw_contexts = 8;
1064 
1065 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1066 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1067 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1068 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1069 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1070 		break;
1071 	case CHIP_STONEY:
1072 		adev->gfx.config.max_shader_engines = 1;
1073 		adev->gfx.config.max_tile_pipes = 2;
1074 		adev->gfx.config.max_sh_per_se = 1;
1075 		adev->gfx.config.max_backends_per_se = 1;
1076 
1077 		switch (adev->pdev->revision) {
1078 		case 0xc0:
1079 		case 0xc1:
1080 		case 0xc2:
1081 		case 0xc4:
1082 		case 0xc8:
1083 		case 0xc9:
1084 			adev->gfx.config.max_cu_per_sh = 3;
1085 			break;
1086 		case 0xd0:
1087 		case 0xd1:
1088 		case 0xd2:
1089 		default:
1090 			adev->gfx.config.max_cu_per_sh = 2;
1091 			break;
1092 		}
1093 
1094 		adev->gfx.config.max_texture_channel_caches = 2;
1095 		adev->gfx.config.max_gprs = 256;
1096 		adev->gfx.config.max_gs_threads = 16;
1097 		adev->gfx.config.max_hw_contexts = 8;
1098 
1099 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1100 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1101 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1102 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1103 		gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1104 		break;
1105 	default:
1106 		adev->gfx.config.max_shader_engines = 2;
1107 		adev->gfx.config.max_tile_pipes = 4;
1108 		adev->gfx.config.max_cu_per_sh = 2;
1109 		adev->gfx.config.max_sh_per_se = 1;
1110 		adev->gfx.config.max_backends_per_se = 2;
1111 		adev->gfx.config.max_texture_channel_caches = 4;
1112 		adev->gfx.config.max_gprs = 256;
1113 		adev->gfx.config.max_gs_threads = 32;
1114 		adev->gfx.config.max_hw_contexts = 8;
1115 
1116 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1117 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1118 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1119 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1120 		gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1121 		break;
1122 	}
1123 
1124 	mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1125 	adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1126 	mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1127 
1128 	adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1129 	adev->gfx.config.mem_max_burst_length_bytes = 256;
1130 	if (adev->flags & AMD_IS_APU) {
1131 		/* Get memory bank mapping mode. */
1132 		tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1133 		dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1134 		dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1135 
1136 		tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1137 		dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1138 		dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1139 
1140 		/* Validate settings in case only one DIMM installed. */
1141 		if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1142 			dimm00_addr_map = 0;
1143 		if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1144 			dimm01_addr_map = 0;
1145 		if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1146 			dimm10_addr_map = 0;
1147 		if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1148 			dimm11_addr_map = 0;
1149 
1150 		/* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1151 		/* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1152 		if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1153 			adev->gfx.config.mem_row_size_in_kb = 2;
1154 		else
1155 			adev->gfx.config.mem_row_size_in_kb = 1;
1156 	} else {
1157 		tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1158 		adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1159 		if (adev->gfx.config.mem_row_size_in_kb > 4)
1160 			adev->gfx.config.mem_row_size_in_kb = 4;
1161 	}
1162 
1163 	adev->gfx.config.shader_engine_tile_size = 32;
1164 	adev->gfx.config.num_gpus = 1;
1165 	adev->gfx.config.multi_gpu_tile_size = 64;
1166 
1167 	/* fix up row size */
1168 	switch (adev->gfx.config.mem_row_size_in_kb) {
1169 	case 1:
1170 	default:
1171 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1172 		break;
1173 	case 2:
1174 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1175 		break;
1176 	case 4:
1177 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1178 		break;
1179 	}
1180 	adev->gfx.config.gb_addr_config = gb_addr_config;
1181 }
1182 
1183 static int gfx_v8_0_sw_init(void *handle)
1184 {
1185 	int i, r;
1186 	struct amdgpu_ring *ring;
1187 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1188 
1189 	/* EOP Event */
1190 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1191 	if (r)
1192 		return r;
1193 
1194 	/* Privileged reg */
1195 	r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1196 	if (r)
1197 		return r;
1198 
1199 	/* Privileged inst */
1200 	r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1201 	if (r)
1202 		return r;
1203 
1204 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1205 
1206 	gfx_v8_0_scratch_init(adev);
1207 
1208 	r = gfx_v8_0_init_microcode(adev);
1209 	if (r) {
1210 		DRM_ERROR("Failed to load gfx firmware!\n");
1211 		return r;
1212 	}
1213 
1214 	r = gfx_v8_0_mec_init(adev);
1215 	if (r) {
1216 		DRM_ERROR("Failed to init MEC BOs!\n");
1217 		return r;
1218 	}
1219 
1220 	/* set up the gfx ring */
1221 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1222 		ring = &adev->gfx.gfx_ring[i];
1223 		ring->ring_obj = NULL;
1224 		sprintf(ring->name, "gfx");
1225 		/* no gfx doorbells on iceland */
1226 		if (adev->asic_type != CHIP_TOPAZ) {
1227 			ring->use_doorbell = true;
1228 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1229 		}
1230 
1231 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1232 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1233 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1234 				     AMDGPU_RING_TYPE_GFX);
1235 		if (r)
1236 			return r;
1237 	}
1238 
1239 	/* set up the compute queues */
1240 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1241 		unsigned irq_type;
1242 
1243 		/* max 32 queues per MEC */
1244 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1245 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1246 			break;
1247 		}
1248 		ring = &adev->gfx.compute_ring[i];
1249 		ring->ring_obj = NULL;
1250 		ring->use_doorbell = true;
1251 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1252 		ring->me = 1; /* first MEC */
1253 		ring->pipe = i / 8;
1254 		ring->queue = i % 8;
1255 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1256 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1257 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1258 		r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1259 				     PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1260 				     &adev->gfx.eop_irq, irq_type,
1261 				     AMDGPU_RING_TYPE_COMPUTE);
1262 		if (r)
1263 			return r;
1264 	}
1265 
1266 	/* reserve GDS, GWS and OA resource for gfx */
1267 	r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1268 			PAGE_SIZE, true,
1269 			AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1270 			NULL, &adev->gds.gds_gfx_bo);
1271 	if (r)
1272 		return r;
1273 
1274 	r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1275 		PAGE_SIZE, true,
1276 		AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1277 		NULL, &adev->gds.gws_gfx_bo);
1278 	if (r)
1279 		return r;
1280 
1281 	r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1282 			PAGE_SIZE, true,
1283 			AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1284 			NULL, &adev->gds.oa_gfx_bo);
1285 	if (r)
1286 		return r;
1287 
1288 	adev->gfx.ce_ram_size = 0x8000;
1289 
1290 	gfx_v8_0_gpu_early_init(adev);
1291 
1292 	return 0;
1293 }
1294 
1295 static int gfx_v8_0_sw_fini(void *handle)
1296 {
1297 	int i;
1298 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1299 
1300 	amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1301 	amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1302 	amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1303 
1304 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1305 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1306 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1307 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1308 
1309 	gfx_v8_0_mec_fini(adev);
1310 
1311 	return 0;
1312 }
1313 
1314 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1315 {
1316 	const u32 num_tile_mode_states = 32;
1317 	const u32 num_secondary_tile_mode_states = 16;
1318 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1319 
1320 	switch (adev->gfx.config.mem_row_size_in_kb) {
1321 	case 1:
1322 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1323 		break;
1324 	case 2:
1325 	default:
1326 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1327 		break;
1328 	case 4:
1329 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1330 		break;
1331 	}
1332 
1333 	switch (adev->asic_type) {
1334 	case CHIP_TOPAZ:
1335 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1336 			switch (reg_offset) {
1337 			case 0:
1338 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1339 						PIPE_CONFIG(ADDR_SURF_P2) |
1340 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1341 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1342 				break;
1343 			case 1:
1344 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1345 						PIPE_CONFIG(ADDR_SURF_P2) |
1346 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1347 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1348 				break;
1349 			case 2:
1350 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1351 						PIPE_CONFIG(ADDR_SURF_P2) |
1352 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1353 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1354 				break;
1355 			case 3:
1356 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1357 						PIPE_CONFIG(ADDR_SURF_P2) |
1358 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1359 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1360 				break;
1361 			case 4:
1362 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1363 						PIPE_CONFIG(ADDR_SURF_P2) |
1364 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1365 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1366 				break;
1367 			case 5:
1368 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1369 						PIPE_CONFIG(ADDR_SURF_P2) |
1370 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1371 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1372 				break;
1373 			case 6:
1374 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1375 						PIPE_CONFIG(ADDR_SURF_P2) |
1376 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1377 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1378 				break;
1379 			case 8:
1380 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1381 						PIPE_CONFIG(ADDR_SURF_P2));
1382 				break;
1383 			case 9:
1384 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1385 						PIPE_CONFIG(ADDR_SURF_P2) |
1386 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1387 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1388 				break;
1389 			case 10:
1390 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1391 						PIPE_CONFIG(ADDR_SURF_P2) |
1392 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1393 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1394 				break;
1395 			case 11:
1396 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1397 						PIPE_CONFIG(ADDR_SURF_P2) |
1398 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1399 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1400 				break;
1401 			case 13:
1402 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1403 						PIPE_CONFIG(ADDR_SURF_P2) |
1404 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1405 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1406 				break;
1407 			case 14:
1408 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1409 						PIPE_CONFIG(ADDR_SURF_P2) |
1410 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1411 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1412 				break;
1413 			case 15:
1414 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1415 						PIPE_CONFIG(ADDR_SURF_P2) |
1416 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1417 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1418 				break;
1419 			case 16:
1420 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1421 						PIPE_CONFIG(ADDR_SURF_P2) |
1422 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1423 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1424 				break;
1425 			case 18:
1426 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1427 						PIPE_CONFIG(ADDR_SURF_P2) |
1428 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1429 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1430 				break;
1431 			case 19:
1432 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1433 						PIPE_CONFIG(ADDR_SURF_P2) |
1434 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1435 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1436 				break;
1437 			case 20:
1438 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1439 						PIPE_CONFIG(ADDR_SURF_P2) |
1440 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1441 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1442 				break;
1443 			case 21:
1444 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1445 						PIPE_CONFIG(ADDR_SURF_P2) |
1446 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1447 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1448 				break;
1449 			case 22:
1450 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1451 						PIPE_CONFIG(ADDR_SURF_P2) |
1452 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1453 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1454 				break;
1455 			case 24:
1456 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1457 						PIPE_CONFIG(ADDR_SURF_P2) |
1458 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1459 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1460 				break;
1461 			case 25:
1462 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1463 						PIPE_CONFIG(ADDR_SURF_P2) |
1464 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1465 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1466 				break;
1467 			case 26:
1468 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1469 						PIPE_CONFIG(ADDR_SURF_P2) |
1470 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1471 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1472 				break;
1473 			case 27:
1474 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1475 						PIPE_CONFIG(ADDR_SURF_P2) |
1476 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1477 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1478 				break;
1479 			case 28:
1480 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1481 						PIPE_CONFIG(ADDR_SURF_P2) |
1482 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1483 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1484 				break;
1485 			case 29:
1486 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1487 						PIPE_CONFIG(ADDR_SURF_P2) |
1488 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1489 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1490 				break;
1491 			case 7:
1492 			case 12:
1493 			case 17:
1494 			case 23:
1495 				/* unused idx */
1496 				continue;
1497 			default:
1498 				gb_tile_moden = 0;
1499 				break;
1500 			};
1501 			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1502 			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1503 		}
1504 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1505 			switch (reg_offset) {
1506 			case 0:
1507 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1508 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1509 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1510 						NUM_BANKS(ADDR_SURF_8_BANK));
1511 				break;
1512 			case 1:
1513 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1514 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1515 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1516 						NUM_BANKS(ADDR_SURF_8_BANK));
1517 				break;
1518 			case 2:
1519 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1520 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1521 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1522 						NUM_BANKS(ADDR_SURF_8_BANK));
1523 				break;
1524 			case 3:
1525 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1526 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1527 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1528 						NUM_BANKS(ADDR_SURF_8_BANK));
1529 				break;
1530 			case 4:
1531 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1532 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1533 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1534 						NUM_BANKS(ADDR_SURF_8_BANK));
1535 				break;
1536 			case 5:
1537 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1538 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1539 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1540 						NUM_BANKS(ADDR_SURF_8_BANK));
1541 				break;
1542 			case 6:
1543 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1544 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1545 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1546 						NUM_BANKS(ADDR_SURF_8_BANK));
1547 				break;
1548 			case 8:
1549 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1550 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1551 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1552 						NUM_BANKS(ADDR_SURF_16_BANK));
1553 				break;
1554 			case 9:
1555 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1556 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1557 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1558 						NUM_BANKS(ADDR_SURF_16_BANK));
1559 				break;
1560 			case 10:
1561 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1562 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1563 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1564 						NUM_BANKS(ADDR_SURF_16_BANK));
1565 				break;
1566 			case 11:
1567 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1568 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1569 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1570 						NUM_BANKS(ADDR_SURF_16_BANK));
1571 				break;
1572 			case 12:
1573 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1574 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1575 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1576 						NUM_BANKS(ADDR_SURF_16_BANK));
1577 				break;
1578 			case 13:
1579 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1580 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1581 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1582 						NUM_BANKS(ADDR_SURF_16_BANK));
1583 				break;
1584 			case 14:
1585 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1586 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1587 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1588 						NUM_BANKS(ADDR_SURF_8_BANK));
1589 				break;
1590 			case 7:
1591 				/* unused idx */
1592 				continue;
1593 			default:
1594 				gb_tile_moden = 0;
1595 				break;
1596 			};
1597 			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1598 			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1599 		}
1600 	case CHIP_FIJI:
1601 	case CHIP_TONGA:
1602 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1603 			switch (reg_offset) {
1604 			case 0:
1605 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1606 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1607 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1608 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1609 				break;
1610 			case 1:
1611 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1612 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1613 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1614 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1615 				break;
1616 			case 2:
1617 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1618 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1619 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1620 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1621 				break;
1622 			case 3:
1623 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1624 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1625 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1626 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1627 				break;
1628 			case 4:
1629 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1630 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1631 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1632 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1633 				break;
1634 			case 5:
1635 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1636 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1637 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1638 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1639 				break;
1640 			case 6:
1641 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1642 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1643 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1644 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1645 				break;
1646 			case 7:
1647 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1648 						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1649 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1650 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1651 				break;
1652 			case 8:
1653 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1654 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1655 				break;
1656 			case 9:
1657 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1658 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1659 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1660 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1661 				break;
1662 			case 10:
1663 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1664 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1665 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1666 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1667 				break;
1668 			case 11:
1669 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1670 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1671 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1672 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1673 				break;
1674 			case 12:
1675 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1676 						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1677 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1678 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1679 				break;
1680 			case 13:
1681 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1682 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1683 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1684 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1685 				break;
1686 			case 14:
1687 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1689 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1690 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1691 				break;
1692 			case 15:
1693 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1694 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1695 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1696 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1697 				break;
1698 			case 16:
1699 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1700 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1701 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1702 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1703 				break;
1704 			case 17:
1705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1706 						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1707 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1708 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1709 				break;
1710 			case 18:
1711 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1712 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1713 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1714 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1715 				break;
1716 			case 19:
1717 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1718 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1719 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1720 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1721 				break;
1722 			case 20:
1723 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1724 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1725 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1726 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1727 				break;
1728 			case 21:
1729 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1730 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1731 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1732 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1733 				break;
1734 			case 22:
1735 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1736 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1737 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1738 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1739 				break;
1740 			case 23:
1741 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1742 						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1743 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 				break;
1746 			case 24:
1747 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1748 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1749 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1750 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1751 				break;
1752 			case 25:
1753 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1754 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1755 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 				break;
1758 			case 26:
1759 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1760 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1761 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1762 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1763 				break;
1764 			case 27:
1765 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1766 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1767 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1768 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1769 				break;
1770 			case 28:
1771 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1773 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1774 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1775 				break;
1776 			case 29:
1777 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778 						PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1779 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1781 				break;
1782 			case 30:
1783 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1784 						PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1785 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1786 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1787 				break;
1788 			default:
1789 				gb_tile_moden = 0;
1790 				break;
1791 			};
1792 			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1793 			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1794 		}
1795 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1796 			switch (reg_offset) {
1797 			case 0:
1798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1800 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1801 						NUM_BANKS(ADDR_SURF_16_BANK));
1802 				break;
1803 			case 1:
1804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1806 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1807 						NUM_BANKS(ADDR_SURF_16_BANK));
1808 				break;
1809 			case 2:
1810 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1811 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 						NUM_BANKS(ADDR_SURF_16_BANK));
1814 				break;
1815 			case 3:
1816 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1817 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1818 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1819 						NUM_BANKS(ADDR_SURF_16_BANK));
1820 				break;
1821 			case 4:
1822 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1823 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1825 						NUM_BANKS(ADDR_SURF_16_BANK));
1826 				break;
1827 			case 5:
1828 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1829 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1830 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1831 						NUM_BANKS(ADDR_SURF_16_BANK));
1832 				break;
1833 			case 6:
1834 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1837 						NUM_BANKS(ADDR_SURF_16_BANK));
1838 				break;
1839 			case 8:
1840 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1841 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1842 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1843 						NUM_BANKS(ADDR_SURF_16_BANK));
1844 				break;
1845 			case 9:
1846 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1847 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1848 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1849 						NUM_BANKS(ADDR_SURF_16_BANK));
1850 				break;
1851 			case 10:
1852 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1853 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1854 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1855 						NUM_BANKS(ADDR_SURF_16_BANK));
1856 				break;
1857 			case 11:
1858 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1859 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1860 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1861 						NUM_BANKS(ADDR_SURF_16_BANK));
1862 				break;
1863 			case 12:
1864 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1865 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1866 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1867 						NUM_BANKS(ADDR_SURF_8_BANK));
1868 				break;
1869 			case 13:
1870 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1871 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1872 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1873 						NUM_BANKS(ADDR_SURF_4_BANK));
1874 				break;
1875 			case 14:
1876 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1877 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1878 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1879 						NUM_BANKS(ADDR_SURF_4_BANK));
1880 				break;
1881 			case 7:
1882 				/* unused idx */
1883 				continue;
1884 			default:
1885 				gb_tile_moden = 0;
1886 				break;
1887 			};
1888 			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1889 			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1890 		}
1891 		break;
1892 	case CHIP_STONEY:
1893 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1894 			switch (reg_offset) {
1895 			case 0:
1896 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1897 						PIPE_CONFIG(ADDR_SURF_P2) |
1898 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1899 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1900 				break;
1901 			case 1:
1902 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1903 						PIPE_CONFIG(ADDR_SURF_P2) |
1904 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1905 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1906 				break;
1907 			case 2:
1908 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1909 						PIPE_CONFIG(ADDR_SURF_P2) |
1910 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1911 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1912 				break;
1913 			case 3:
1914 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1915 						PIPE_CONFIG(ADDR_SURF_P2) |
1916 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1917 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1918 				break;
1919 			case 4:
1920 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1921 						PIPE_CONFIG(ADDR_SURF_P2) |
1922 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1923 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1924 				break;
1925 			case 5:
1926 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1927 						PIPE_CONFIG(ADDR_SURF_P2) |
1928 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1929 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1930 				break;
1931 			case 6:
1932 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1933 						PIPE_CONFIG(ADDR_SURF_P2) |
1934 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1935 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1936 				break;
1937 			case 8:
1938 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1939 						PIPE_CONFIG(ADDR_SURF_P2));
1940 				break;
1941 			case 9:
1942 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1943 						PIPE_CONFIG(ADDR_SURF_P2) |
1944 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1945 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1946 				break;
1947 			case 10:
1948 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1949 						PIPE_CONFIG(ADDR_SURF_P2) |
1950 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1951 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1952 				break;
1953 			case 11:
1954 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1955 						PIPE_CONFIG(ADDR_SURF_P2) |
1956 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1957 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1958 				break;
1959 			case 13:
1960 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1961 						PIPE_CONFIG(ADDR_SURF_P2) |
1962 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1963 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964 				break;
1965 			case 14:
1966 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1967 						PIPE_CONFIG(ADDR_SURF_P2) |
1968 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1969 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1970 				break;
1971 			case 15:
1972 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1973 						PIPE_CONFIG(ADDR_SURF_P2) |
1974 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1975 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1976 				break;
1977 			case 16:
1978 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1979 						PIPE_CONFIG(ADDR_SURF_P2) |
1980 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1981 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1982 				break;
1983 			case 18:
1984 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1985 						PIPE_CONFIG(ADDR_SURF_P2) |
1986 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1987 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1988 				break;
1989 			case 19:
1990 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1991 						PIPE_CONFIG(ADDR_SURF_P2) |
1992 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1993 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1994 				break;
1995 			case 20:
1996 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1997 						PIPE_CONFIG(ADDR_SURF_P2) |
1998 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1999 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2000 				break;
2001 			case 21:
2002 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2003 						PIPE_CONFIG(ADDR_SURF_P2) |
2004 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2005 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2006 				break;
2007 			case 22:
2008 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2009 						PIPE_CONFIG(ADDR_SURF_P2) |
2010 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2011 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2012 				break;
2013 			case 24:
2014 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2015 						PIPE_CONFIG(ADDR_SURF_P2) |
2016 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2017 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2018 				break;
2019 			case 25:
2020 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2021 						PIPE_CONFIG(ADDR_SURF_P2) |
2022 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2023 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2024 				break;
2025 			case 26:
2026 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2027 						PIPE_CONFIG(ADDR_SURF_P2) |
2028 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2029 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2030 				break;
2031 			case 27:
2032 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2033 						PIPE_CONFIG(ADDR_SURF_P2) |
2034 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2035 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2036 				break;
2037 			case 28:
2038 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2039 						PIPE_CONFIG(ADDR_SURF_P2) |
2040 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2041 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2042 				break;
2043 			case 29:
2044 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2045 						PIPE_CONFIG(ADDR_SURF_P2) |
2046 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2047 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2048 				break;
2049 			case 7:
2050 			case 12:
2051 			case 17:
2052 			case 23:
2053 				/* unused idx */
2054 				continue;
2055 			default:
2056 				gb_tile_moden = 0;
2057 				break;
2058 			};
2059 			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2060 			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2061 		}
2062 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2063 			switch (reg_offset) {
2064 			case 0:
2065 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2066 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2067 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2068 						NUM_BANKS(ADDR_SURF_8_BANK));
2069 				break;
2070 			case 1:
2071 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2072 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2073 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2074 						NUM_BANKS(ADDR_SURF_8_BANK));
2075 				break;
2076 			case 2:
2077 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2078 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2079 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2080 						NUM_BANKS(ADDR_SURF_8_BANK));
2081 				break;
2082 			case 3:
2083 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2084 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2085 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2086 						NUM_BANKS(ADDR_SURF_8_BANK));
2087 				break;
2088 			case 4:
2089 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2090 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2091 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2092 						NUM_BANKS(ADDR_SURF_8_BANK));
2093 				break;
2094 			case 5:
2095 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2096 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2097 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2098 						NUM_BANKS(ADDR_SURF_8_BANK));
2099 				break;
2100 			case 6:
2101 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2102 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2103 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2104 						NUM_BANKS(ADDR_SURF_8_BANK));
2105 				break;
2106 			case 8:
2107 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2108 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2109 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2110 						NUM_BANKS(ADDR_SURF_16_BANK));
2111 				break;
2112 			case 9:
2113 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2114 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2115 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2116 						NUM_BANKS(ADDR_SURF_16_BANK));
2117 				break;
2118 			case 10:
2119 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2120 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2121 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2122 						NUM_BANKS(ADDR_SURF_16_BANK));
2123 				break;
2124 			case 11:
2125 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2126 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2127 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2128 						NUM_BANKS(ADDR_SURF_16_BANK));
2129 				break;
2130 			case 12:
2131 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2134 						NUM_BANKS(ADDR_SURF_16_BANK));
2135 				break;
2136 			case 13:
2137 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2140 						NUM_BANKS(ADDR_SURF_16_BANK));
2141 				break;
2142 			case 14:
2143 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2146 						NUM_BANKS(ADDR_SURF_8_BANK));
2147 				break;
2148 			case 7:
2149 				/* unused idx */
2150 				continue;
2151 			default:
2152 				gb_tile_moden = 0;
2153 				break;
2154 			};
2155 			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
2156 			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
2157 		}
2158 		break;
2159 	case CHIP_CARRIZO:
2160 	default:
2161 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2162 			switch (reg_offset) {
2163 			case 0:
2164 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165 						PIPE_CONFIG(ADDR_SURF_P2) |
2166 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2167 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168 				break;
2169 			case 1:
2170 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2171 						PIPE_CONFIG(ADDR_SURF_P2) |
2172 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2173 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174 				break;
2175 			case 2:
2176 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177 						PIPE_CONFIG(ADDR_SURF_P2) |
2178 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2179 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180 				break;
2181 			case 3:
2182 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2183 						PIPE_CONFIG(ADDR_SURF_P2) |
2184 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2185 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2186 				break;
2187 			case 4:
2188 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2189 						PIPE_CONFIG(ADDR_SURF_P2) |
2190 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2191 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2192 				break;
2193 			case 5:
2194 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195 						PIPE_CONFIG(ADDR_SURF_P2) |
2196 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2197 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2198 				break;
2199 			case 6:
2200 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2201 						PIPE_CONFIG(ADDR_SURF_P2) |
2202 						TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2203 						MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2204 				break;
2205 			case 8:
2206 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2207 						PIPE_CONFIG(ADDR_SURF_P2));
2208 				break;
2209 			case 9:
2210 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2211 						PIPE_CONFIG(ADDR_SURF_P2) |
2212 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2213 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2214 				break;
2215 			case 10:
2216 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2217 						PIPE_CONFIG(ADDR_SURF_P2) |
2218 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2219 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2220 				break;
2221 			case 11:
2222 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2223 						PIPE_CONFIG(ADDR_SURF_P2) |
2224 						MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2225 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2226 				break;
2227 			case 13:
2228 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2229 						PIPE_CONFIG(ADDR_SURF_P2) |
2230 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2232 				break;
2233 			case 14:
2234 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2235 						PIPE_CONFIG(ADDR_SURF_P2) |
2236 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238 				break;
2239 			case 15:
2240 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2241 						PIPE_CONFIG(ADDR_SURF_P2) |
2242 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2243 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244 				break;
2245 			case 16:
2246 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247 						PIPE_CONFIG(ADDR_SURF_P2) |
2248 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2249 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250 				break;
2251 			case 18:
2252 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2253 						PIPE_CONFIG(ADDR_SURF_P2) |
2254 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2255 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2256 				break;
2257 			case 19:
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2259 						PIPE_CONFIG(ADDR_SURF_P2) |
2260 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2261 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2262 				break;
2263 			case 20:
2264 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2265 						PIPE_CONFIG(ADDR_SURF_P2) |
2266 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2267 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2268 				break;
2269 			case 21:
2270 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2271 						PIPE_CONFIG(ADDR_SURF_P2) |
2272 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2273 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2274 				break;
2275 			case 22:
2276 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2277 						PIPE_CONFIG(ADDR_SURF_P2) |
2278 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2279 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2280 				break;
2281 			case 24:
2282 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2283 						PIPE_CONFIG(ADDR_SURF_P2) |
2284 						MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 				break;
2287 			case 25:
2288 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2289 						PIPE_CONFIG(ADDR_SURF_P2) |
2290 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 				break;
2293 			case 26:
2294 				gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2295 						PIPE_CONFIG(ADDR_SURF_P2) |
2296 						MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 				break;
2299 			case 27:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2301 						PIPE_CONFIG(ADDR_SURF_P2) |
2302 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2303 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 				break;
2305 			case 28:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 						PIPE_CONFIG(ADDR_SURF_P2) |
2308 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2309 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 				break;
2311 			case 29:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2313 						PIPE_CONFIG(ADDR_SURF_P2) |
2314 						MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2315 						SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2316 				break;
2317 			case 7:
2318 			case 12:
2319 			case 17:
2320 			case 23:
2321 				/* unused idx */
2322 				continue;
2323 			default:
2324 				gb_tile_moden = 0;
2325 				break;
2326 			};
2327 			adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
2328 			WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
2329 		}
2330 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2331 			switch (reg_offset) {
2332 			case 0:
2333 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2335 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2336 						NUM_BANKS(ADDR_SURF_8_BANK));
2337 				break;
2338 			case 1:
2339 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2341 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2342 						NUM_BANKS(ADDR_SURF_8_BANK));
2343 				break;
2344 			case 2:
2345 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2346 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2347 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348 						NUM_BANKS(ADDR_SURF_8_BANK));
2349 				break;
2350 			case 3:
2351 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 						NUM_BANKS(ADDR_SURF_8_BANK));
2355 				break;
2356 			case 4:
2357 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2358 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2359 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2360 						NUM_BANKS(ADDR_SURF_8_BANK));
2361 				break;
2362 			case 5:
2363 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2365 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2366 						NUM_BANKS(ADDR_SURF_8_BANK));
2367 				break;
2368 			case 6:
2369 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2370 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2371 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2372 						NUM_BANKS(ADDR_SURF_8_BANK));
2373 				break;
2374 			case 8:
2375 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2376 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2377 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 						NUM_BANKS(ADDR_SURF_16_BANK));
2379 				break;
2380 			case 9:
2381 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2382 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2384 						NUM_BANKS(ADDR_SURF_16_BANK));
2385 				break;
2386 			case 10:
2387 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2388 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2389 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2390 						NUM_BANKS(ADDR_SURF_16_BANK));
2391 				break;
2392 			case 11:
2393 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2394 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2395 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2396 						NUM_BANKS(ADDR_SURF_16_BANK));
2397 				break;
2398 			case 12:
2399 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2400 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2401 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2402 						NUM_BANKS(ADDR_SURF_16_BANK));
2403 				break;
2404 			case 13:
2405 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2407 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2408 						NUM_BANKS(ADDR_SURF_16_BANK));
2409 				break;
2410 			case 14:
2411 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2412 						BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2413 						MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2414 						NUM_BANKS(ADDR_SURF_8_BANK));
2415 				break;
2416 			case 7:
2417 				/* unused idx */
2418 				continue;
2419 			default:
2420 				gb_tile_moden = 0;
2421 				break;
2422 			};
2423 			adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
2424 			WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
2425 		}
2426 	}
2427 }
2428 
2429 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2430 {
2431 	u32 i, mask = 0;
2432 
2433 	for (i = 0; i < bit_width; i++) {
2434 		mask <<= 1;
2435 		mask |= 1;
2436 	}
2437 	return mask;
2438 }
2439 
2440 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2441 {
2442 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2443 
2444 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2445 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2446 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2447 	} else if (se_num == 0xffffffff) {
2448 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2449 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2450 	} else if (sh_num == 0xffffffff) {
2451 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2452 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2453 	} else {
2454 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2455 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2456 	}
2457 	WREG32(mmGRBM_GFX_INDEX, data);
2458 }
2459 
2460 static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2461 				    u32 max_rb_num_per_se,
2462 				    u32 sh_per_se)
2463 {
2464 	u32 data, mask;
2465 
2466 	data = RREG32(mmCC_RB_BACKEND_DISABLE);
2467 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2468 
2469 	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2470 
2471 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2472 
2473 	mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2474 
2475 	return data & mask;
2476 }
2477 
2478 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2479 			      u32 se_num, u32 sh_per_se,
2480 			      u32 max_rb_num_per_se)
2481 {
2482 	int i, j;
2483 	u32 data, mask;
2484 	u32 disabled_rbs = 0;
2485 	u32 enabled_rbs = 0;
2486 
2487 	mutex_lock(&adev->grbm_idx_mutex);
2488 	for (i = 0; i < se_num; i++) {
2489 		for (j = 0; j < sh_per_se; j++) {
2490 			gfx_v8_0_select_se_sh(adev, i, j);
2491 			data = gfx_v8_0_get_rb_disabled(adev,
2492 					      max_rb_num_per_se, sh_per_se);
2493 			disabled_rbs |= data << ((i * sh_per_se + j) *
2494 						 RB_BITMAP_WIDTH_PER_SH);
2495 		}
2496 	}
2497 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2498 	mutex_unlock(&adev->grbm_idx_mutex);
2499 
2500 	mask = 1;
2501 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2502 		if (!(disabled_rbs & mask))
2503 			enabled_rbs |= mask;
2504 		mask <<= 1;
2505 	}
2506 
2507 	adev->gfx.config.backend_enable_mask = enabled_rbs;
2508 
2509 	mutex_lock(&adev->grbm_idx_mutex);
2510 	for (i = 0; i < se_num; i++) {
2511 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2512 		data = 0;
2513 		for (j = 0; j < sh_per_se; j++) {
2514 			switch (enabled_rbs & 3) {
2515 			case 0:
2516 				if (j == 0)
2517 					data |= (RASTER_CONFIG_RB_MAP_3 <<
2518 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2519 				else
2520 					data |= (RASTER_CONFIG_RB_MAP_0 <<
2521 						 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2522 				break;
2523 			case 1:
2524 				data |= (RASTER_CONFIG_RB_MAP_0 <<
2525 					 (i * sh_per_se + j) * 2);
2526 				break;
2527 			case 2:
2528 				data |= (RASTER_CONFIG_RB_MAP_3 <<
2529 					 (i * sh_per_se + j) * 2);
2530 				break;
2531 			case 3:
2532 			default:
2533 				data |= (RASTER_CONFIG_RB_MAP_2 <<
2534 					 (i * sh_per_se + j) * 2);
2535 				break;
2536 			}
2537 			enabled_rbs >>= 2;
2538 		}
2539 		WREG32(mmPA_SC_RASTER_CONFIG, data);
2540 	}
2541 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2542 	mutex_unlock(&adev->grbm_idx_mutex);
2543 }
2544 
2545 /**
2546  * gfx_v8_0_init_compute_vmid - gart enable
2547  *
2548  * @rdev: amdgpu_device pointer
2549  *
2550  * Initialize compute vmid sh_mem registers
2551  *
2552  */
2553 #define DEFAULT_SH_MEM_BASES	(0x6000)
2554 #define FIRST_COMPUTE_VMID	(8)
2555 #define LAST_COMPUTE_VMID	(16)
2556 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2557 {
2558 	int i;
2559 	uint32_t sh_mem_config;
2560 	uint32_t sh_mem_bases;
2561 
2562 	/*
2563 	 * Configure apertures:
2564 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2565 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2566 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2567 	 */
2568 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2569 
2570 	sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2571 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2572 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2573 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2574 			MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2575 			SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2576 
2577 	mutex_lock(&adev->srbm_mutex);
2578 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2579 		vi_srbm_select(adev, 0, 0, 0, i);
2580 		/* CP and shaders */
2581 		WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2582 		WREG32(mmSH_MEM_APE1_BASE, 1);
2583 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2584 		WREG32(mmSH_MEM_BASES, sh_mem_bases);
2585 	}
2586 	vi_srbm_select(adev, 0, 0, 0, 0);
2587 	mutex_unlock(&adev->srbm_mutex);
2588 }
2589 
2590 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2591 {
2592 	u32 tmp;
2593 	int i;
2594 
2595 	tmp = RREG32(mmGRBM_CNTL);
2596 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2597 	WREG32(mmGRBM_CNTL, tmp);
2598 
2599 	WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2600 	WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2601 	WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2602 	WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2603 	       adev->gfx.config.gb_addr_config & 0x70);
2604 	WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2605 	       adev->gfx.config.gb_addr_config & 0x70);
2606 	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2607 	WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2608 	WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2609 
2610 	gfx_v8_0_tiling_mode_table_init(adev);
2611 
2612 	gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2613 				 adev->gfx.config.max_sh_per_se,
2614 				 adev->gfx.config.max_backends_per_se);
2615 
2616 	/* XXX SH_MEM regs */
2617 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2618 	mutex_lock(&adev->srbm_mutex);
2619 	for (i = 0; i < 16; i++) {
2620 		vi_srbm_select(adev, 0, 0, 0, i);
2621 		/* CP and shaders */
2622 		if (i == 0) {
2623 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2624 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2625 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2626 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2627 			WREG32(mmSH_MEM_CONFIG, tmp);
2628 		} else {
2629 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2630 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2631 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2632 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2633 			WREG32(mmSH_MEM_CONFIG, tmp);
2634 		}
2635 
2636 		WREG32(mmSH_MEM_APE1_BASE, 1);
2637 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
2638 		WREG32(mmSH_MEM_BASES, 0);
2639 	}
2640 	vi_srbm_select(adev, 0, 0, 0, 0);
2641 	mutex_unlock(&adev->srbm_mutex);
2642 
2643 	gfx_v8_0_init_compute_vmid(adev);
2644 
2645 	mutex_lock(&adev->grbm_idx_mutex);
2646 	/*
2647 	 * making sure that the following register writes will be broadcasted
2648 	 * to all the shaders
2649 	 */
2650 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2651 
2652 	WREG32(mmPA_SC_FIFO_SIZE,
2653 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
2654 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2655 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
2656 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2657 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
2658 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2659 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2660 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2661 	mutex_unlock(&adev->grbm_idx_mutex);
2662 
2663 }
2664 
2665 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2666 {
2667 	u32 i, j, k;
2668 	u32 mask;
2669 
2670 	mutex_lock(&adev->grbm_idx_mutex);
2671 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2672 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2673 			gfx_v8_0_select_se_sh(adev, i, j);
2674 			for (k = 0; k < adev->usec_timeout; k++) {
2675 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2676 					break;
2677 				udelay(1);
2678 			}
2679 		}
2680 	}
2681 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2682 	mutex_unlock(&adev->grbm_idx_mutex);
2683 
2684 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2685 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2686 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2687 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2688 	for (k = 0; k < adev->usec_timeout; k++) {
2689 		if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2690 			break;
2691 		udelay(1);
2692 	}
2693 }
2694 
2695 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2696 					       bool enable)
2697 {
2698 	u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2699 
2700 	if (enable) {
2701 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1);
2702 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1);
2703 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1);
2704 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1);
2705 	} else {
2706 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 0);
2707 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 0);
2708 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 0);
2709 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 0);
2710 	}
2711 	WREG32(mmCP_INT_CNTL_RING0, tmp);
2712 }
2713 
2714 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2715 {
2716 	u32 tmp = RREG32(mmRLC_CNTL);
2717 
2718 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2719 	WREG32(mmRLC_CNTL, tmp);
2720 
2721 	gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2722 
2723 	gfx_v8_0_wait_for_rlc_serdes(adev);
2724 }
2725 
2726 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2727 {
2728 	u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2729 
2730 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2731 	WREG32(mmGRBM_SOFT_RESET, tmp);
2732 	udelay(50);
2733 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2734 	WREG32(mmGRBM_SOFT_RESET, tmp);
2735 	udelay(50);
2736 }
2737 
2738 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2739 {
2740 	u32 tmp = RREG32(mmRLC_CNTL);
2741 
2742 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2743 	WREG32(mmRLC_CNTL, tmp);
2744 
2745 	/* carrizo do enable cp interrupt after cp inited */
2746 	if (!(adev->flags & AMD_IS_APU))
2747 		gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2748 
2749 	udelay(50);
2750 }
2751 
2752 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2753 {
2754 	const struct rlc_firmware_header_v2_0 *hdr;
2755 	const __le32 *fw_data;
2756 	unsigned i, fw_size;
2757 
2758 	if (!adev->gfx.rlc_fw)
2759 		return -EINVAL;
2760 
2761 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2762 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2763 
2764 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2765 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2766 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2767 
2768 	WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2769 	for (i = 0; i < fw_size; i++)
2770 		WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2771 	WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2772 
2773 	return 0;
2774 }
2775 
2776 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2777 {
2778 	int r;
2779 
2780 	gfx_v8_0_rlc_stop(adev);
2781 
2782 	/* disable CG */
2783 	WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2784 
2785 	/* disable PG */
2786 	WREG32(mmRLC_PG_CNTL, 0);
2787 
2788 	gfx_v8_0_rlc_reset(adev);
2789 
2790 	if (!adev->firmware.smu_load) {
2791 		/* legacy rlc firmware loading */
2792 		r = gfx_v8_0_rlc_load_microcode(adev);
2793 		if (r)
2794 			return r;
2795 	} else {
2796 		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2797 						AMDGPU_UCODE_ID_RLC_G);
2798 		if (r)
2799 			return -EINVAL;
2800 	}
2801 
2802 	gfx_v8_0_rlc_start(adev);
2803 
2804 	return 0;
2805 }
2806 
2807 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2808 {
2809 	int i;
2810 	u32 tmp = RREG32(mmCP_ME_CNTL);
2811 
2812 	if (enable) {
2813 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2814 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2815 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2816 	} else {
2817 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2818 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2819 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2820 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2821 			adev->gfx.gfx_ring[i].ready = false;
2822 	}
2823 	WREG32(mmCP_ME_CNTL, tmp);
2824 	udelay(50);
2825 }
2826 
2827 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2828 {
2829 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2830 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2831 	const struct gfx_firmware_header_v1_0 *me_hdr;
2832 	const __le32 *fw_data;
2833 	unsigned i, fw_size;
2834 
2835 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2836 		return -EINVAL;
2837 
2838 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2839 		adev->gfx.pfp_fw->data;
2840 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2841 		adev->gfx.ce_fw->data;
2842 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2843 		adev->gfx.me_fw->data;
2844 
2845 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2846 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2847 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2848 
2849 	gfx_v8_0_cp_gfx_enable(adev, false);
2850 
2851 	/* PFP */
2852 	fw_data = (const __le32 *)
2853 		(adev->gfx.pfp_fw->data +
2854 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2855 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2856 	WREG32(mmCP_PFP_UCODE_ADDR, 0);
2857 	for (i = 0; i < fw_size; i++)
2858 		WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2859 	WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2860 
2861 	/* CE */
2862 	fw_data = (const __le32 *)
2863 		(adev->gfx.ce_fw->data +
2864 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2865 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2866 	WREG32(mmCP_CE_UCODE_ADDR, 0);
2867 	for (i = 0; i < fw_size; i++)
2868 		WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2869 	WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2870 
2871 	/* ME */
2872 	fw_data = (const __le32 *)
2873 		(adev->gfx.me_fw->data +
2874 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2875 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2876 	WREG32(mmCP_ME_RAM_WADDR, 0);
2877 	for (i = 0; i < fw_size; i++)
2878 		WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2879 	WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2880 
2881 	return 0;
2882 }
2883 
2884 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2885 {
2886 	u32 count = 0;
2887 	const struct cs_section_def *sect = NULL;
2888 	const struct cs_extent_def *ext = NULL;
2889 
2890 	/* begin clear state */
2891 	count += 2;
2892 	/* context control state */
2893 	count += 3;
2894 
2895 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2896 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2897 			if (sect->id == SECT_CONTEXT)
2898 				count += 2 + ext->reg_count;
2899 			else
2900 				return 0;
2901 		}
2902 	}
2903 	/* pa_sc_raster_config/pa_sc_raster_config1 */
2904 	count += 4;
2905 	/* end clear state */
2906 	count += 2;
2907 	/* clear state */
2908 	count += 2;
2909 
2910 	return count;
2911 }
2912 
2913 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2914 {
2915 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2916 	const struct cs_section_def *sect = NULL;
2917 	const struct cs_extent_def *ext = NULL;
2918 	int r, i;
2919 
2920 	/* init the CP */
2921 	WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2922 	WREG32(mmCP_ENDIAN_SWAP, 0);
2923 	WREG32(mmCP_DEVICE_ID, 1);
2924 
2925 	gfx_v8_0_cp_gfx_enable(adev, true);
2926 
2927 	r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
2928 	if (r) {
2929 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2930 		return r;
2931 	}
2932 
2933 	/* clear state buffer */
2934 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2935 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2936 
2937 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2938 	amdgpu_ring_write(ring, 0x80000000);
2939 	amdgpu_ring_write(ring, 0x80000000);
2940 
2941 	for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2942 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2943 			if (sect->id == SECT_CONTEXT) {
2944 				amdgpu_ring_write(ring,
2945 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2946 					       ext->reg_count));
2947 				amdgpu_ring_write(ring,
2948 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2949 				for (i = 0; i < ext->reg_count; i++)
2950 					amdgpu_ring_write(ring, ext->extent[i]);
2951 			}
2952 		}
2953 	}
2954 
2955 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2956 	amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2957 	switch (adev->asic_type) {
2958 	case CHIP_TONGA:
2959 	case CHIP_FIJI:
2960 		amdgpu_ring_write(ring, 0x16000012);
2961 		amdgpu_ring_write(ring, 0x0000002A);
2962 		break;
2963 	case CHIP_TOPAZ:
2964 	case CHIP_CARRIZO:
2965 		amdgpu_ring_write(ring, 0x00000002);
2966 		amdgpu_ring_write(ring, 0x00000000);
2967 		break;
2968 	case CHIP_STONEY:
2969 		amdgpu_ring_write(ring, 0x00000000);
2970 		amdgpu_ring_write(ring, 0x00000000);
2971 		break;
2972 	default:
2973 		BUG();
2974 	}
2975 
2976 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2977 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2978 
2979 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2980 	amdgpu_ring_write(ring, 0);
2981 
2982 	/* init the CE partitions */
2983 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2984 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2985 	amdgpu_ring_write(ring, 0x8000);
2986 	amdgpu_ring_write(ring, 0x8000);
2987 
2988 	amdgpu_ring_unlock_commit(ring);
2989 
2990 	return 0;
2991 }
2992 
2993 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
2994 {
2995 	struct amdgpu_ring *ring;
2996 	u32 tmp;
2997 	u32 rb_bufsz;
2998 	u64 rb_addr, rptr_addr;
2999 	int r;
3000 
3001 	/* Set the write pointer delay */
3002 	WREG32(mmCP_RB_WPTR_DELAY, 0);
3003 
3004 	/* set the RB to use vmid 0 */
3005 	WREG32(mmCP_RB_VMID, 0);
3006 
3007 	/* Set ring buffer size */
3008 	ring = &adev->gfx.gfx_ring[0];
3009 	rb_bufsz = order_base_2(ring->ring_size / 8);
3010 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3011 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3012 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3013 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3014 #ifdef __BIG_ENDIAN
3015 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3016 #endif
3017 	WREG32(mmCP_RB0_CNTL, tmp);
3018 
3019 	/* Initialize the ring buffer's read and write pointers */
3020 	WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3021 	ring->wptr = 0;
3022 	WREG32(mmCP_RB0_WPTR, ring->wptr);
3023 
3024 	/* set the wb address wether it's enabled or not */
3025 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3026 	WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3027 	WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3028 
3029 	mdelay(1);
3030 	WREG32(mmCP_RB0_CNTL, tmp);
3031 
3032 	rb_addr = ring->gpu_addr >> 8;
3033 	WREG32(mmCP_RB0_BASE, rb_addr);
3034 	WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3035 
3036 	/* no gfx doorbells on iceland */
3037 	if (adev->asic_type != CHIP_TOPAZ) {
3038 		tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3039 		if (ring->use_doorbell) {
3040 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3041 					    DOORBELL_OFFSET, ring->doorbell_index);
3042 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3043 					    DOORBELL_EN, 1);
3044 		} else {
3045 			tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3046 					    DOORBELL_EN, 0);
3047 		}
3048 		WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3049 
3050 		if (adev->asic_type == CHIP_TONGA) {
3051 			tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3052 					    DOORBELL_RANGE_LOWER,
3053 					    AMDGPU_DOORBELL_GFX_RING0);
3054 			WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3055 
3056 			WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3057 			       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3058 		}
3059 
3060 	}
3061 
3062 	/* start the ring */
3063 	gfx_v8_0_cp_gfx_start(adev);
3064 	ring->ready = true;
3065 	r = amdgpu_ring_test_ring(ring);
3066 	if (r) {
3067 		ring->ready = false;
3068 		return r;
3069 	}
3070 
3071 	return 0;
3072 }
3073 
3074 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3075 {
3076 	int i;
3077 
3078 	if (enable) {
3079 		WREG32(mmCP_MEC_CNTL, 0);
3080 	} else {
3081 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3082 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3083 			adev->gfx.compute_ring[i].ready = false;
3084 	}
3085 	udelay(50);
3086 }
3087 
3088 static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3089 {
3090 	gfx_v8_0_cp_compute_enable(adev, true);
3091 
3092 	return 0;
3093 }
3094 
3095 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3096 {
3097 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3098 	const __le32 *fw_data;
3099 	unsigned i, fw_size;
3100 
3101 	if (!adev->gfx.mec_fw)
3102 		return -EINVAL;
3103 
3104 	gfx_v8_0_cp_compute_enable(adev, false);
3105 
3106 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3107 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3108 
3109 	fw_data = (const __le32 *)
3110 		(adev->gfx.mec_fw->data +
3111 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3112 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3113 
3114 	/* MEC1 */
3115 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3116 	for (i = 0; i < fw_size; i++)
3117 		WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3118 	WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3119 
3120 	/* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3121 	if (adev->gfx.mec2_fw) {
3122 		const struct gfx_firmware_header_v1_0 *mec2_hdr;
3123 
3124 		mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3125 		amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3126 
3127 		fw_data = (const __le32 *)
3128 			(adev->gfx.mec2_fw->data +
3129 			 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3130 		fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3131 
3132 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3133 		for (i = 0; i < fw_size; i++)
3134 			WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3135 		WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3136 	}
3137 
3138 	return 0;
3139 }
3140 
3141 struct vi_mqd {
3142 	uint32_t header;  /* ordinal0 */
3143 	uint32_t compute_dispatch_initiator;  /* ordinal1 */
3144 	uint32_t compute_dim_x;  /* ordinal2 */
3145 	uint32_t compute_dim_y;  /* ordinal3 */
3146 	uint32_t compute_dim_z;  /* ordinal4 */
3147 	uint32_t compute_start_x;  /* ordinal5 */
3148 	uint32_t compute_start_y;  /* ordinal6 */
3149 	uint32_t compute_start_z;  /* ordinal7 */
3150 	uint32_t compute_num_thread_x;  /* ordinal8 */
3151 	uint32_t compute_num_thread_y;  /* ordinal9 */
3152 	uint32_t compute_num_thread_z;  /* ordinal10 */
3153 	uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3154 	uint32_t compute_perfcount_enable;  /* ordinal12 */
3155 	uint32_t compute_pgm_lo;  /* ordinal13 */
3156 	uint32_t compute_pgm_hi;  /* ordinal14 */
3157 	uint32_t compute_tba_lo;  /* ordinal15 */
3158 	uint32_t compute_tba_hi;  /* ordinal16 */
3159 	uint32_t compute_tma_lo;  /* ordinal17 */
3160 	uint32_t compute_tma_hi;  /* ordinal18 */
3161 	uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3162 	uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3163 	uint32_t compute_vmid;  /* ordinal21 */
3164 	uint32_t compute_resource_limits;  /* ordinal22 */
3165 	uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3166 	uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3167 	uint32_t compute_tmpring_size;  /* ordinal25 */
3168 	uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3169 	uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3170 	uint32_t compute_restart_x;  /* ordinal28 */
3171 	uint32_t compute_restart_y;  /* ordinal29 */
3172 	uint32_t compute_restart_z;  /* ordinal30 */
3173 	uint32_t compute_thread_trace_enable;  /* ordinal31 */
3174 	uint32_t compute_misc_reserved;  /* ordinal32 */
3175 	uint32_t compute_dispatch_id;  /* ordinal33 */
3176 	uint32_t compute_threadgroup_id;  /* ordinal34 */
3177 	uint32_t compute_relaunch;  /* ordinal35 */
3178 	uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3179 	uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3180 	uint32_t compute_wave_restore_control;  /* ordinal38 */
3181 	uint32_t reserved9;  /* ordinal39 */
3182 	uint32_t reserved10;  /* ordinal40 */
3183 	uint32_t reserved11;  /* ordinal41 */
3184 	uint32_t reserved12;  /* ordinal42 */
3185 	uint32_t reserved13;  /* ordinal43 */
3186 	uint32_t reserved14;  /* ordinal44 */
3187 	uint32_t reserved15;  /* ordinal45 */
3188 	uint32_t reserved16;  /* ordinal46 */
3189 	uint32_t reserved17;  /* ordinal47 */
3190 	uint32_t reserved18;  /* ordinal48 */
3191 	uint32_t reserved19;  /* ordinal49 */
3192 	uint32_t reserved20;  /* ordinal50 */
3193 	uint32_t reserved21;  /* ordinal51 */
3194 	uint32_t reserved22;  /* ordinal52 */
3195 	uint32_t reserved23;  /* ordinal53 */
3196 	uint32_t reserved24;  /* ordinal54 */
3197 	uint32_t reserved25;  /* ordinal55 */
3198 	uint32_t reserved26;  /* ordinal56 */
3199 	uint32_t reserved27;  /* ordinal57 */
3200 	uint32_t reserved28;  /* ordinal58 */
3201 	uint32_t reserved29;  /* ordinal59 */
3202 	uint32_t reserved30;  /* ordinal60 */
3203 	uint32_t reserved31;  /* ordinal61 */
3204 	uint32_t reserved32;  /* ordinal62 */
3205 	uint32_t reserved33;  /* ordinal63 */
3206 	uint32_t reserved34;  /* ordinal64 */
3207 	uint32_t compute_user_data_0;  /* ordinal65 */
3208 	uint32_t compute_user_data_1;  /* ordinal66 */
3209 	uint32_t compute_user_data_2;  /* ordinal67 */
3210 	uint32_t compute_user_data_3;  /* ordinal68 */
3211 	uint32_t compute_user_data_4;  /* ordinal69 */
3212 	uint32_t compute_user_data_5;  /* ordinal70 */
3213 	uint32_t compute_user_data_6;  /* ordinal71 */
3214 	uint32_t compute_user_data_7;  /* ordinal72 */
3215 	uint32_t compute_user_data_8;  /* ordinal73 */
3216 	uint32_t compute_user_data_9;  /* ordinal74 */
3217 	uint32_t compute_user_data_10;  /* ordinal75 */
3218 	uint32_t compute_user_data_11;  /* ordinal76 */
3219 	uint32_t compute_user_data_12;  /* ordinal77 */
3220 	uint32_t compute_user_data_13;  /* ordinal78 */
3221 	uint32_t compute_user_data_14;  /* ordinal79 */
3222 	uint32_t compute_user_data_15;  /* ordinal80 */
3223 	uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3224 	uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3225 	uint32_t reserved35;  /* ordinal83 */
3226 	uint32_t reserved36;  /* ordinal84 */
3227 	uint32_t reserved37;  /* ordinal85 */
3228 	uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3229 	uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3230 	uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3231 	uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3232 	uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3233 	uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3234 	uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3235 	uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3236 	uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3237 	uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3238 	uint32_t reserved38;  /* ordinal96 */
3239 	uint32_t reserved39;  /* ordinal97 */
3240 	uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3241 	uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3242 	uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3243 	uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3244 	uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3245 	uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3246 	uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3247 	uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3248 	uint32_t reserved40;  /* ordinal106 */
3249 	uint32_t reserved41;  /* ordinal107 */
3250 	uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3251 	uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3252 	uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3253 	uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3254 	uint32_t reserved42;  /* ordinal112 */
3255 	uint32_t reserved43;  /* ordinal113 */
3256 	uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3257 	uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3258 	uint32_t cp_packet_id_lo;  /* ordinal116 */
3259 	uint32_t cp_packet_id_hi;  /* ordinal117 */
3260 	uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3261 	uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3262 	uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3263 	uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3264 	uint32_t gds_save_mask_lo;  /* ordinal122 */
3265 	uint32_t gds_save_mask_hi;  /* ordinal123 */
3266 	uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3267 	uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3268 	uint32_t reserved44;  /* ordinal126 */
3269 	uint32_t reserved45;  /* ordinal127 */
3270 	uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3271 	uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3272 	uint32_t cp_hqd_active;  /* ordinal130 */
3273 	uint32_t cp_hqd_vmid;  /* ordinal131 */
3274 	uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3275 	uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3276 	uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3277 	uint32_t cp_hqd_quantum;  /* ordinal135 */
3278 	uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3279 	uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3280 	uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3281 	uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3282 	uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3283 	uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3284 	uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3285 	uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3286 	uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3287 	uint32_t cp_hqd_pq_control;  /* ordinal145 */
3288 	uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3289 	uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3290 	uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3291 	uint32_t cp_hqd_ib_control;  /* ordinal149 */
3292 	uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3293 	uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3294 	uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3295 	uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3296 	uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3297 	uint32_t cp_hqd_msg_type;  /* ordinal155 */
3298 	uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3299 	uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3300 	uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3301 	uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3302 	uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3303 	uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3304 	uint32_t cp_mqd_control;  /* ordinal162 */
3305 	uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3306 	uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3307 	uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3308 	uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3309 	uint32_t cp_hqd_eop_control;  /* ordinal167 */
3310 	uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3311 	uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3312 	uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3313 	uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3314 	uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3315 	uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3316 	uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3317 	uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3318 	uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3319 	uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3320 	uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3321 	uint32_t cp_hqd_error;  /* ordinal179 */
3322 	uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3323 	uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3324 	uint32_t reserved46;  /* ordinal182 */
3325 	uint32_t reserved47;  /* ordinal183 */
3326 	uint32_t reserved48;  /* ordinal184 */
3327 	uint32_t reserved49;  /* ordinal185 */
3328 	uint32_t reserved50;  /* ordinal186 */
3329 	uint32_t reserved51;  /* ordinal187 */
3330 	uint32_t reserved52;  /* ordinal188 */
3331 	uint32_t reserved53;  /* ordinal189 */
3332 	uint32_t reserved54;  /* ordinal190 */
3333 	uint32_t reserved55;  /* ordinal191 */
3334 	uint32_t iqtimer_pkt_header;  /* ordinal192 */
3335 	uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3336 	uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3337 	uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3338 	uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3339 	uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3340 	uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3341 	uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3342 	uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3343 	uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3344 	uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3345 	uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3346 	uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3347 	uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3348 	uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3349 	uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3350 	uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3351 	uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3352 	uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3353 	uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3354 	uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3355 	uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3356 	uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3357 	uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3358 	uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3359 	uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3360 	uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3361 	uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3362 	uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3363 	uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3364 	uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3365 	uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3366 	uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3367 	uint32_t reserved56;  /* ordinal225 */
3368 	uint32_t reserved57;  /* ordinal226 */
3369 	uint32_t reserved58;  /* ordinal227 */
3370 	uint32_t set_resources_header;  /* ordinal228 */
3371 	uint32_t set_resources_dw1;  /* ordinal229 */
3372 	uint32_t set_resources_dw2;  /* ordinal230 */
3373 	uint32_t set_resources_dw3;  /* ordinal231 */
3374 	uint32_t set_resources_dw4;  /* ordinal232 */
3375 	uint32_t set_resources_dw5;  /* ordinal233 */
3376 	uint32_t set_resources_dw6;  /* ordinal234 */
3377 	uint32_t set_resources_dw7;  /* ordinal235 */
3378 	uint32_t reserved59;  /* ordinal236 */
3379 	uint32_t reserved60;  /* ordinal237 */
3380 	uint32_t reserved61;  /* ordinal238 */
3381 	uint32_t reserved62;  /* ordinal239 */
3382 	uint32_t reserved63;  /* ordinal240 */
3383 	uint32_t reserved64;  /* ordinal241 */
3384 	uint32_t reserved65;  /* ordinal242 */
3385 	uint32_t reserved66;  /* ordinal243 */
3386 	uint32_t reserved67;  /* ordinal244 */
3387 	uint32_t reserved68;  /* ordinal245 */
3388 	uint32_t reserved69;  /* ordinal246 */
3389 	uint32_t reserved70;  /* ordinal247 */
3390 	uint32_t reserved71;  /* ordinal248 */
3391 	uint32_t reserved72;  /* ordinal249 */
3392 	uint32_t reserved73;  /* ordinal250 */
3393 	uint32_t reserved74;  /* ordinal251 */
3394 	uint32_t reserved75;  /* ordinal252 */
3395 	uint32_t reserved76;  /* ordinal253 */
3396 	uint32_t reserved77;  /* ordinal254 */
3397 	uint32_t reserved78;  /* ordinal255 */
3398 
3399 	uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3400 };
3401 
3402 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3403 {
3404 	int i, r;
3405 
3406 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3407 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3408 
3409 		if (ring->mqd_obj) {
3410 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
3411 			if (unlikely(r != 0))
3412 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3413 
3414 			amdgpu_bo_unpin(ring->mqd_obj);
3415 			amdgpu_bo_unreserve(ring->mqd_obj);
3416 
3417 			amdgpu_bo_unref(&ring->mqd_obj);
3418 			ring->mqd_obj = NULL;
3419 		}
3420 	}
3421 }
3422 
3423 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3424 {
3425 	int r, i, j;
3426 	u32 tmp;
3427 	bool use_doorbell = true;
3428 	u64 hqd_gpu_addr;
3429 	u64 mqd_gpu_addr;
3430 	u64 eop_gpu_addr;
3431 	u64 wb_gpu_addr;
3432 	u32 *buf;
3433 	struct vi_mqd *mqd;
3434 
3435 	/* init the pipes */
3436 	mutex_lock(&adev->srbm_mutex);
3437 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3438 		int me = (i < 4) ? 1 : 2;
3439 		int pipe = (i < 4) ? i : (i - 4);
3440 
3441 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3442 		eop_gpu_addr >>= 8;
3443 
3444 		vi_srbm_select(adev, me, pipe, 0, 0);
3445 
3446 		/* write the EOP addr */
3447 		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3448 		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3449 
3450 		/* set the VMID assigned */
3451 		WREG32(mmCP_HQD_VMID, 0);
3452 
3453 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3454 		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3455 		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3456 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3457 		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3458 	}
3459 	vi_srbm_select(adev, 0, 0, 0, 0);
3460 	mutex_unlock(&adev->srbm_mutex);
3461 
3462 	/* init the queues.  Just two for now. */
3463 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3464 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3465 
3466 		if (ring->mqd_obj == NULL) {
3467 			r = amdgpu_bo_create(adev,
3468 					     sizeof(struct vi_mqd),
3469 					     PAGE_SIZE, true,
3470 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3471 					     NULL, &ring->mqd_obj);
3472 			if (r) {
3473 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3474 				return r;
3475 			}
3476 		}
3477 
3478 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3479 		if (unlikely(r != 0)) {
3480 			gfx_v8_0_cp_compute_fini(adev);
3481 			return r;
3482 		}
3483 		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3484 				  &mqd_gpu_addr);
3485 		if (r) {
3486 			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3487 			gfx_v8_0_cp_compute_fini(adev);
3488 			return r;
3489 		}
3490 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3491 		if (r) {
3492 			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3493 			gfx_v8_0_cp_compute_fini(adev);
3494 			return r;
3495 		}
3496 
3497 		/* init the mqd struct */
3498 		memset(buf, 0, sizeof(struct vi_mqd));
3499 
3500 		mqd = (struct vi_mqd *)buf;
3501 		mqd->header = 0xC0310800;
3502 		mqd->compute_pipelinestat_enable = 0x00000001;
3503 		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3504 		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3505 		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3506 		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3507 		mqd->compute_misc_reserved = 0x00000003;
3508 
3509 		mutex_lock(&adev->srbm_mutex);
3510 		vi_srbm_select(adev, ring->me,
3511 			       ring->pipe,
3512 			       ring->queue, 0);
3513 
3514 		/* disable wptr polling */
3515 		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3516 		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3517 		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3518 
3519 		mqd->cp_hqd_eop_base_addr_lo =
3520 			RREG32(mmCP_HQD_EOP_BASE_ADDR);
3521 		mqd->cp_hqd_eop_base_addr_hi =
3522 			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3523 
3524 		/* enable doorbell? */
3525 		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3526 		if (use_doorbell) {
3527 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3528 		} else {
3529 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3530 		}
3531 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3532 		mqd->cp_hqd_pq_doorbell_control = tmp;
3533 
3534 		/* disable the queue if it's active */
3535 		mqd->cp_hqd_dequeue_request = 0;
3536 		mqd->cp_hqd_pq_rptr = 0;
3537 		mqd->cp_hqd_pq_wptr= 0;
3538 		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3539 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3540 			for (j = 0; j < adev->usec_timeout; j++) {
3541 				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3542 					break;
3543 				udelay(1);
3544 			}
3545 			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3546 			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3547 			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3548 		}
3549 
3550 		/* set the pointer to the MQD */
3551 		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3552 		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3553 		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3554 		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3555 
3556 		/* set MQD vmid to 0 */
3557 		tmp = RREG32(mmCP_MQD_CONTROL);
3558 		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3559 		WREG32(mmCP_MQD_CONTROL, tmp);
3560 		mqd->cp_mqd_control = tmp;
3561 
3562 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3563 		hqd_gpu_addr = ring->gpu_addr >> 8;
3564 		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3565 		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3566 		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3567 		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3568 
3569 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3570 		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3571 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3572 				    (order_base_2(ring->ring_size / 4) - 1));
3573 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3574 			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3575 #ifdef __BIG_ENDIAN
3576 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3577 #endif
3578 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3579 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3580 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3581 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3582 		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3583 		mqd->cp_hqd_pq_control = tmp;
3584 
3585 		/* set the wb address wether it's enabled or not */
3586 		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3587 		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3588 		mqd->cp_hqd_pq_rptr_report_addr_hi =
3589 			upper_32_bits(wb_gpu_addr) & 0xffff;
3590 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3591 		       mqd->cp_hqd_pq_rptr_report_addr_lo);
3592 		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3593 		       mqd->cp_hqd_pq_rptr_report_addr_hi);
3594 
3595 		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3596 		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3597 		mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3598 		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3599 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3600 		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3601 		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3602 
3603 		/* enable the doorbell if requested */
3604 		if (use_doorbell) {
3605 			if ((adev->asic_type == CHIP_CARRIZO) ||
3606 			    (adev->asic_type == CHIP_FIJI) ||
3607 			    (adev->asic_type == CHIP_STONEY)) {
3608 				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3609 				       AMDGPU_DOORBELL_KIQ << 2);
3610 				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3611 				       AMDGPU_DOORBELL_MEC_RING7 << 2);
3612 			}
3613 			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3614 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3615 					    DOORBELL_OFFSET, ring->doorbell_index);
3616 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3617 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3618 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3619 			mqd->cp_hqd_pq_doorbell_control = tmp;
3620 
3621 		} else {
3622 			mqd->cp_hqd_pq_doorbell_control = 0;
3623 		}
3624 		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3625 		       mqd->cp_hqd_pq_doorbell_control);
3626 
3627 		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3628 		ring->wptr = 0;
3629 		mqd->cp_hqd_pq_wptr = ring->wptr;
3630 		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3631 		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3632 
3633 		/* set the vmid for the queue */
3634 		mqd->cp_hqd_vmid = 0;
3635 		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3636 
3637 		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3638 		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3639 		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3640 		mqd->cp_hqd_persistent_state = tmp;
3641 
3642 		/* activate the queue */
3643 		mqd->cp_hqd_active = 1;
3644 		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3645 
3646 		vi_srbm_select(adev, 0, 0, 0, 0);
3647 		mutex_unlock(&adev->srbm_mutex);
3648 
3649 		amdgpu_bo_kunmap(ring->mqd_obj);
3650 		amdgpu_bo_unreserve(ring->mqd_obj);
3651 	}
3652 
3653 	if (use_doorbell) {
3654 		tmp = RREG32(mmCP_PQ_STATUS);
3655 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3656 		WREG32(mmCP_PQ_STATUS, tmp);
3657 	}
3658 
3659 	r = gfx_v8_0_cp_compute_start(adev);
3660 	if (r)
3661 		return r;
3662 
3663 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3664 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3665 
3666 		ring->ready = true;
3667 		r = amdgpu_ring_test_ring(ring);
3668 		if (r)
3669 			ring->ready = false;
3670 	}
3671 
3672 	return 0;
3673 }
3674 
3675 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3676 {
3677 	int r;
3678 
3679 	if (!(adev->flags & AMD_IS_APU))
3680 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3681 
3682 	if (!adev->firmware.smu_load) {
3683 		/* legacy firmware loading */
3684 		r = gfx_v8_0_cp_gfx_load_microcode(adev);
3685 		if (r)
3686 			return r;
3687 
3688 		r = gfx_v8_0_cp_compute_load_microcode(adev);
3689 		if (r)
3690 			return r;
3691 	} else {
3692 		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3693 						AMDGPU_UCODE_ID_CP_CE);
3694 		if (r)
3695 			return -EINVAL;
3696 
3697 		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3698 						AMDGPU_UCODE_ID_CP_PFP);
3699 		if (r)
3700 			return -EINVAL;
3701 
3702 		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3703 						AMDGPU_UCODE_ID_CP_ME);
3704 		if (r)
3705 			return -EINVAL;
3706 
3707 		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3708 						AMDGPU_UCODE_ID_CP_MEC1);
3709 		if (r)
3710 			return -EINVAL;
3711 	}
3712 
3713 	r = gfx_v8_0_cp_gfx_resume(adev);
3714 	if (r)
3715 		return r;
3716 
3717 	r = gfx_v8_0_cp_compute_resume(adev);
3718 	if (r)
3719 		return r;
3720 
3721 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3722 
3723 	return 0;
3724 }
3725 
3726 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3727 {
3728 	gfx_v8_0_cp_gfx_enable(adev, enable);
3729 	gfx_v8_0_cp_compute_enable(adev, enable);
3730 }
3731 
3732 static int gfx_v8_0_hw_init(void *handle)
3733 {
3734 	int r;
3735 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3736 
3737 	gfx_v8_0_init_golden_registers(adev);
3738 
3739 	gfx_v8_0_gpu_init(adev);
3740 
3741 	r = gfx_v8_0_rlc_resume(adev);
3742 	if (r)
3743 		return r;
3744 
3745 	r = gfx_v8_0_cp_resume(adev);
3746 	if (r)
3747 		return r;
3748 
3749 	return r;
3750 }
3751 
3752 static int gfx_v8_0_hw_fini(void *handle)
3753 {
3754 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3755 
3756 	gfx_v8_0_cp_enable(adev, false);
3757 	gfx_v8_0_rlc_stop(adev);
3758 	gfx_v8_0_cp_compute_fini(adev);
3759 
3760 	return 0;
3761 }
3762 
3763 static int gfx_v8_0_suspend(void *handle)
3764 {
3765 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3766 
3767 	return gfx_v8_0_hw_fini(adev);
3768 }
3769 
3770 static int gfx_v8_0_resume(void *handle)
3771 {
3772 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3773 
3774 	return gfx_v8_0_hw_init(adev);
3775 }
3776 
3777 static bool gfx_v8_0_is_idle(void *handle)
3778 {
3779 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3780 
3781 	if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3782 		return false;
3783 	else
3784 		return true;
3785 }
3786 
3787 static int gfx_v8_0_wait_for_idle(void *handle)
3788 {
3789 	unsigned i;
3790 	u32 tmp;
3791 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3792 
3793 	for (i = 0; i < adev->usec_timeout; i++) {
3794 		/* read MC_STATUS */
3795 		tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3796 
3797 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3798 			return 0;
3799 		udelay(1);
3800 	}
3801 	return -ETIMEDOUT;
3802 }
3803 
3804 static void gfx_v8_0_print_status(void *handle)
3805 {
3806 	int i;
3807 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3808 
3809 	dev_info(adev->dev, "GFX 8.x registers\n");
3810 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3811 		 RREG32(mmGRBM_STATUS));
3812 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3813 		 RREG32(mmGRBM_STATUS2));
3814 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3815 		 RREG32(mmGRBM_STATUS_SE0));
3816 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3817 		 RREG32(mmGRBM_STATUS_SE1));
3818 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3819 		 RREG32(mmGRBM_STATUS_SE2));
3820 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3821 		 RREG32(mmGRBM_STATUS_SE3));
3822 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3823 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3824 		 RREG32(mmCP_STALLED_STAT1));
3825 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3826 		 RREG32(mmCP_STALLED_STAT2));
3827 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3828 		 RREG32(mmCP_STALLED_STAT3));
3829 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3830 		 RREG32(mmCP_CPF_BUSY_STAT));
3831 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3832 		 RREG32(mmCP_CPF_STALLED_STAT1));
3833 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3834 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3835 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3836 		 RREG32(mmCP_CPC_STALLED_STAT1));
3837 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3838 
3839 	for (i = 0; i < 32; i++) {
3840 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3841 			 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3842 	}
3843 	for (i = 0; i < 16; i++) {
3844 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3845 			 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3846 	}
3847 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3848 		dev_info(adev->dev, "  se: %d\n", i);
3849 		gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3850 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3851 			 RREG32(mmPA_SC_RASTER_CONFIG));
3852 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3853 			 RREG32(mmPA_SC_RASTER_CONFIG_1));
3854 	}
3855 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3856 
3857 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3858 		 RREG32(mmGB_ADDR_CONFIG));
3859 	dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3860 		 RREG32(mmHDP_ADDR_CONFIG));
3861 	dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3862 		 RREG32(mmDMIF_ADDR_CALC));
3863 	dev_info(adev->dev, "  SDMA0_TILING_CONFIG=0x%08X\n",
3864 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
3865 	dev_info(adev->dev, "  SDMA1_TILING_CONFIG=0x%08X\n",
3866 		 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
3867 	dev_info(adev->dev, "  UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3868 		 RREG32(mmUVD_UDEC_ADDR_CONFIG));
3869 	dev_info(adev->dev, "  UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3870 		 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
3871 	dev_info(adev->dev, "  UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3872 		 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
3873 
3874 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3875 		 RREG32(mmCP_MEQ_THRESHOLDS));
3876 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3877 		 RREG32(mmSX_DEBUG_1));
3878 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3879 		 RREG32(mmTA_CNTL_AUX));
3880 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3881 		 RREG32(mmSPI_CONFIG_CNTL));
3882 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3883 		 RREG32(mmSQ_CONFIG));
3884 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3885 		 RREG32(mmDB_DEBUG));
3886 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3887 		 RREG32(mmDB_DEBUG2));
3888 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3889 		 RREG32(mmDB_DEBUG3));
3890 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3891 		 RREG32(mmCB_HW_CONTROL));
3892 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3893 		 RREG32(mmSPI_CONFIG_CNTL_1));
3894 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3895 		 RREG32(mmPA_SC_FIFO_SIZE));
3896 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3897 		 RREG32(mmVGT_NUM_INSTANCES));
3898 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3899 		 RREG32(mmCP_PERFMON_CNTL));
3900 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3901 		 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3902 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3903 		 RREG32(mmVGT_CACHE_INVALIDATION));
3904 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3905 		 RREG32(mmVGT_GS_VERTEX_REUSE));
3906 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3907 		 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3908 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3909 		 RREG32(mmPA_CL_ENHANCE));
3910 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3911 		 RREG32(mmPA_SC_ENHANCE));
3912 
3913 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3914 		 RREG32(mmCP_ME_CNTL));
3915 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3916 		 RREG32(mmCP_MAX_CONTEXT));
3917 	dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3918 		 RREG32(mmCP_ENDIAN_SWAP));
3919 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3920 		 RREG32(mmCP_DEVICE_ID));
3921 
3922 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
3923 		 RREG32(mmCP_SEM_WAIT_TIMER));
3924 
3925 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
3926 		 RREG32(mmCP_RB_WPTR_DELAY));
3927 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
3928 		 RREG32(mmCP_RB_VMID));
3929 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
3930 		 RREG32(mmCP_RB0_CNTL));
3931 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
3932 		 RREG32(mmCP_RB0_WPTR));
3933 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
3934 		 RREG32(mmCP_RB0_RPTR_ADDR));
3935 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
3936 		 RREG32(mmCP_RB0_RPTR_ADDR_HI));
3937 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
3938 		 RREG32(mmCP_RB0_CNTL));
3939 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
3940 		 RREG32(mmCP_RB0_BASE));
3941 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
3942 		 RREG32(mmCP_RB0_BASE_HI));
3943 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
3944 		 RREG32(mmCP_MEC_CNTL));
3945 	dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
3946 		 RREG32(mmCP_CPF_DEBUG));
3947 
3948 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
3949 		 RREG32(mmSCRATCH_ADDR));
3950 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
3951 		 RREG32(mmSCRATCH_UMSK));
3952 
3953 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
3954 		 RREG32(mmCP_INT_CNTL_RING0));
3955 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
3956 		 RREG32(mmRLC_LB_CNTL));
3957 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
3958 		 RREG32(mmRLC_CNTL));
3959 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
3960 		 RREG32(mmRLC_CGCG_CGLS_CTRL));
3961 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
3962 		 RREG32(mmRLC_LB_CNTR_INIT));
3963 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
3964 		 RREG32(mmRLC_LB_CNTR_MAX));
3965 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
3966 		 RREG32(mmRLC_LB_INIT_CU_MASK));
3967 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
3968 		 RREG32(mmRLC_LB_PARAMS));
3969 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
3970 		 RREG32(mmRLC_LB_CNTL));
3971 	dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
3972 		 RREG32(mmRLC_MC_CNTL));
3973 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
3974 		 RREG32(mmRLC_UCODE_CNTL));
3975 
3976 	mutex_lock(&adev->srbm_mutex);
3977 	for (i = 0; i < 16; i++) {
3978 		vi_srbm_select(adev, 0, 0, 0, i);
3979 		dev_info(adev->dev, "  VM %d:\n", i);
3980 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
3981 			 RREG32(mmSH_MEM_CONFIG));
3982 		dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
3983 			 RREG32(mmSH_MEM_APE1_BASE));
3984 		dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
3985 			 RREG32(mmSH_MEM_APE1_LIMIT));
3986 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
3987 			 RREG32(mmSH_MEM_BASES));
3988 	}
3989 	vi_srbm_select(adev, 0, 0, 0, 0);
3990 	mutex_unlock(&adev->srbm_mutex);
3991 }
3992 
3993 static int gfx_v8_0_soft_reset(void *handle)
3994 {
3995 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3996 	u32 tmp;
3997 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3998 
3999 	/* GRBM_STATUS */
4000 	tmp = RREG32(mmGRBM_STATUS);
4001 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4002 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4003 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4004 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4005 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4006 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4007 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4008 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4009 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4010 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4011 	}
4012 
4013 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4014 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4015 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4016 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4017 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4018 	}
4019 
4020 	/* GRBM_STATUS2 */
4021 	tmp = RREG32(mmGRBM_STATUS2);
4022 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4023 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4024 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4025 
4026 	/* SRBM_STATUS */
4027 	tmp = RREG32(mmSRBM_STATUS);
4028 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4029 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4030 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4031 
4032 	if (grbm_soft_reset || srbm_soft_reset) {
4033 		gfx_v8_0_print_status((void *)adev);
4034 		/* stop the rlc */
4035 		gfx_v8_0_rlc_stop(adev);
4036 
4037 		/* Disable GFX parsing/prefetching */
4038 		gfx_v8_0_cp_gfx_enable(adev, false);
4039 
4040 		/* Disable MEC parsing/prefetching */
4041 		/* XXX todo */
4042 
4043 		if (grbm_soft_reset) {
4044 			tmp = RREG32(mmGRBM_SOFT_RESET);
4045 			tmp |= grbm_soft_reset;
4046 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4047 			WREG32(mmGRBM_SOFT_RESET, tmp);
4048 			tmp = RREG32(mmGRBM_SOFT_RESET);
4049 
4050 			udelay(50);
4051 
4052 			tmp &= ~grbm_soft_reset;
4053 			WREG32(mmGRBM_SOFT_RESET, tmp);
4054 			tmp = RREG32(mmGRBM_SOFT_RESET);
4055 		}
4056 
4057 		if (srbm_soft_reset) {
4058 			tmp = RREG32(mmSRBM_SOFT_RESET);
4059 			tmp |= srbm_soft_reset;
4060 			dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4061 			WREG32(mmSRBM_SOFT_RESET, tmp);
4062 			tmp = RREG32(mmSRBM_SOFT_RESET);
4063 
4064 			udelay(50);
4065 
4066 			tmp &= ~srbm_soft_reset;
4067 			WREG32(mmSRBM_SOFT_RESET, tmp);
4068 			tmp = RREG32(mmSRBM_SOFT_RESET);
4069 		}
4070 		/* Wait a little for things to settle down */
4071 		udelay(50);
4072 		gfx_v8_0_print_status((void *)adev);
4073 	}
4074 	return 0;
4075 }
4076 
4077 /**
4078  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4079  *
4080  * @adev: amdgpu_device pointer
4081  *
4082  * Fetches a GPU clock counter snapshot.
4083  * Returns the 64 bit clock counter snapshot.
4084  */
4085 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4086 {
4087 	uint64_t clock;
4088 
4089 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4090 	WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4091 	clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4092 		((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4093 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4094 	return clock;
4095 }
4096 
4097 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4098 					  uint32_t vmid,
4099 					  uint32_t gds_base, uint32_t gds_size,
4100 					  uint32_t gws_base, uint32_t gws_size,
4101 					  uint32_t oa_base, uint32_t oa_size)
4102 {
4103 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4104 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4105 
4106 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4107 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4108 
4109 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
4110 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
4111 
4112 	/* GDS Base */
4113 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4114 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4115 				WRITE_DATA_DST_SEL(0)));
4116 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4117 	amdgpu_ring_write(ring, 0);
4118 	amdgpu_ring_write(ring, gds_base);
4119 
4120 	/* GDS Size */
4121 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4122 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4123 				WRITE_DATA_DST_SEL(0)));
4124 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4125 	amdgpu_ring_write(ring, 0);
4126 	amdgpu_ring_write(ring, gds_size);
4127 
4128 	/* GWS */
4129 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4130 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4131 				WRITE_DATA_DST_SEL(0)));
4132 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4133 	amdgpu_ring_write(ring, 0);
4134 	amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4135 
4136 	/* OA */
4137 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4138 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4139 				WRITE_DATA_DST_SEL(0)));
4140 	amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4141 	amdgpu_ring_write(ring, 0);
4142 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4143 }
4144 
4145 static int gfx_v8_0_early_init(void *handle)
4146 {
4147 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4148 
4149 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4150 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4151 	gfx_v8_0_set_ring_funcs(adev);
4152 	gfx_v8_0_set_irq_funcs(adev);
4153 	gfx_v8_0_set_gds_init(adev);
4154 
4155 	return 0;
4156 }
4157 
4158 static int gfx_v8_0_set_powergating_state(void *handle,
4159 					  enum amd_powergating_state state)
4160 {
4161 	return 0;
4162 }
4163 
4164 static int gfx_v8_0_set_clockgating_state(void *handle,
4165 					  enum amd_clockgating_state state)
4166 {
4167 	return 0;
4168 }
4169 
4170 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4171 {
4172 	u32 rptr;
4173 
4174 	rptr = ring->adev->wb.wb[ring->rptr_offs];
4175 
4176 	return rptr;
4177 }
4178 
4179 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4180 {
4181 	struct amdgpu_device *adev = ring->adev;
4182 	u32 wptr;
4183 
4184 	if (ring->use_doorbell)
4185 		/* XXX check if swapping is necessary on BE */
4186 		wptr = ring->adev->wb.wb[ring->wptr_offs];
4187 	else
4188 		wptr = RREG32(mmCP_RB0_WPTR);
4189 
4190 	return wptr;
4191 }
4192 
4193 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4194 {
4195 	struct amdgpu_device *adev = ring->adev;
4196 
4197 	if (ring->use_doorbell) {
4198 		/* XXX check if swapping is necessary on BE */
4199 		adev->wb.wb[ring->wptr_offs] = ring->wptr;
4200 		WDOORBELL32(ring->doorbell_index, ring->wptr);
4201 	} else {
4202 		WREG32(mmCP_RB0_WPTR, ring->wptr);
4203 		(void)RREG32(mmCP_RB0_WPTR);
4204 	}
4205 }
4206 
4207 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4208 {
4209 	u32 ref_and_mask, reg_mem_engine;
4210 
4211 	if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4212 		switch (ring->me) {
4213 		case 1:
4214 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4215 			break;
4216 		case 2:
4217 			ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4218 			break;
4219 		default:
4220 			return;
4221 		}
4222 		reg_mem_engine = 0;
4223 	} else {
4224 		ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4225 		reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4226 	}
4227 
4228 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4229 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4230 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
4231 				 reg_mem_engine));
4232 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4233 	amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4234 	amdgpu_ring_write(ring, ref_and_mask);
4235 	amdgpu_ring_write(ring, ref_and_mask);
4236 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4237 }
4238 
4239 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4240 				  struct amdgpu_ib *ib)
4241 {
4242 	bool need_ctx_switch = ring->current_ctx != ib->ctx;
4243 	u32 header, control = 0;
4244 	u32 next_rptr = ring->wptr + 5;
4245 
4246 	/* drop the CE preamble IB for the same context */
4247 	if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4248 		return;
4249 
4250 	if (need_ctx_switch)
4251 		next_rptr += 2;
4252 
4253 	next_rptr += 4;
4254 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4255 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4256 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4257 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4258 	amdgpu_ring_write(ring, next_rptr);
4259 
4260 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
4261 	if (need_ctx_switch) {
4262 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4263 		amdgpu_ring_write(ring, 0);
4264 	}
4265 
4266 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4267 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4268 	else
4269 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4270 
4271 	control |= ib->length_dw |
4272 		(ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4273 
4274 	amdgpu_ring_write(ring, header);
4275 	amdgpu_ring_write(ring,
4276 #ifdef __BIG_ENDIAN
4277 			  (2 << 0) |
4278 #endif
4279 			  (ib->gpu_addr & 0xFFFFFFFC));
4280 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4281 	amdgpu_ring_write(ring, control);
4282 }
4283 
4284 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4285 				  struct amdgpu_ib *ib)
4286 {
4287 	u32 header, control = 0;
4288 	u32 next_rptr = ring->wptr + 5;
4289 
4290 	control |= INDIRECT_BUFFER_VALID;
4291 
4292 	next_rptr += 4;
4293 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4294 	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4295 	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4296 	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4297 	amdgpu_ring_write(ring, next_rptr);
4298 
4299 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4300 
4301 	control |= ib->length_dw |
4302 			   (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4303 
4304 	amdgpu_ring_write(ring, header);
4305 	amdgpu_ring_write(ring,
4306 #ifdef __BIG_ENDIAN
4307 					  (2 << 0) |
4308 #endif
4309 					  (ib->gpu_addr & 0xFFFFFFFC));
4310 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4311 	amdgpu_ring_write(ring, control);
4312 }
4313 
4314 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4315 					 u64 seq, unsigned flags)
4316 {
4317 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4318 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4319 
4320 	/* EVENT_WRITE_EOP - flush caches, send int */
4321 	amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4322 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4323 				 EOP_TC_ACTION_EN |
4324 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4325 				 EVENT_INDEX(5)));
4326 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4327 	amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4328 			  DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4329 	amdgpu_ring_write(ring, lower_32_bits(seq));
4330 	amdgpu_ring_write(ring, upper_32_bits(seq));
4331 
4332 }
4333 
4334 /**
4335  * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4336  *
4337  * @ring: amdgpu ring buffer object
4338  * @semaphore: amdgpu semaphore object
4339  * @emit_wait: Is this a sempahore wait?
4340  *
4341  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4342  * from running ahead of semaphore waits.
4343  */
4344 static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4345 					 struct amdgpu_semaphore *semaphore,
4346 					 bool emit_wait)
4347 {
4348 	uint64_t addr = semaphore->gpu_addr;
4349 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4350 
4351 	if (ring->adev->asic_type == CHIP_TOPAZ ||
4352 	    ring->adev->asic_type == CHIP_TONGA ||
4353 	    ring->adev->asic_type == CHIP_FIJI)
4354 		/* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4355 		return false;
4356 	else {
4357 		amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4358 		amdgpu_ring_write(ring, lower_32_bits(addr));
4359 		amdgpu_ring_write(ring, upper_32_bits(addr));
4360 		amdgpu_ring_write(ring, sel);
4361 	}
4362 
4363 	if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4364 		/* Prevent the PFP from running ahead of the semaphore wait */
4365 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4366 		amdgpu_ring_write(ring, 0x0);
4367 	}
4368 
4369 	return true;
4370 }
4371 
4372 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4373 					unsigned vm_id, uint64_t pd_addr)
4374 {
4375 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4376 	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4377 	uint64_t addr = ring->fence_drv.gpu_addr;
4378 
4379 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4380 	amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4381 		 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4382 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4383 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4384 	amdgpu_ring_write(ring, seq);
4385 	amdgpu_ring_write(ring, 0xffffffff);
4386 	amdgpu_ring_write(ring, 4); /* poll interval */
4387 
4388 	if (usepfp) {
4389 		/* synce CE with ME to prevent CE fetch CEIB before context switch done */
4390 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4391 		amdgpu_ring_write(ring, 0);
4392 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4393 		amdgpu_ring_write(ring, 0);
4394 	}
4395 
4396 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4397 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4398 				 WRITE_DATA_DST_SEL(0)) |
4399 				 WR_CONFIRM);
4400 	if (vm_id < 8) {
4401 		amdgpu_ring_write(ring,
4402 				  (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4403 	} else {
4404 		amdgpu_ring_write(ring,
4405 				  (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4406 	}
4407 	amdgpu_ring_write(ring, 0);
4408 	amdgpu_ring_write(ring, pd_addr >> 12);
4409 
4410 	/* bits 0-15 are the VM contexts0-15 */
4411 	/* invalidate the cache */
4412 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4413 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4414 				 WRITE_DATA_DST_SEL(0)));
4415 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4416 	amdgpu_ring_write(ring, 0);
4417 	amdgpu_ring_write(ring, 1 << vm_id);
4418 
4419 	/* wait for the invalidate to complete */
4420 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4421 	amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4422 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
4423 				 WAIT_REG_MEM_ENGINE(0))); /* me */
4424 	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4425 	amdgpu_ring_write(ring, 0);
4426 	amdgpu_ring_write(ring, 0); /* ref */
4427 	amdgpu_ring_write(ring, 0); /* mask */
4428 	amdgpu_ring_write(ring, 0x20); /* poll interval */
4429 
4430 	/* compute doesn't have PFP */
4431 	if (usepfp) {
4432 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4433 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4434 		amdgpu_ring_write(ring, 0x0);
4435 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4436 		amdgpu_ring_write(ring, 0);
4437 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4438 		amdgpu_ring_write(ring, 0);
4439 	}
4440 }
4441 
4442 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4443 {
4444 	return ring->adev->wb.wb[ring->rptr_offs];
4445 }
4446 
4447 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4448 {
4449 	return ring->adev->wb.wb[ring->wptr_offs];
4450 }
4451 
4452 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4453 {
4454 	struct amdgpu_device *adev = ring->adev;
4455 
4456 	/* XXX check if swapping is necessary on BE */
4457 	adev->wb.wb[ring->wptr_offs] = ring->wptr;
4458 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4459 }
4460 
4461 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4462 					     u64 addr, u64 seq,
4463 					     unsigned flags)
4464 {
4465 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4466 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4467 
4468 	/* RELEASE_MEM - flush caches, send int */
4469 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4470 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4471 				 EOP_TC_ACTION_EN |
4472 				 EOP_TC_WB_ACTION_EN |
4473 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4474 				 EVENT_INDEX(5)));
4475 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4476 	amdgpu_ring_write(ring, addr & 0xfffffffc);
4477 	amdgpu_ring_write(ring, upper_32_bits(addr));
4478 	amdgpu_ring_write(ring, lower_32_bits(seq));
4479 	amdgpu_ring_write(ring, upper_32_bits(seq));
4480 }
4481 
4482 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4483 						 enum amdgpu_interrupt_state state)
4484 {
4485 	u32 cp_int_cntl;
4486 
4487 	switch (state) {
4488 	case AMDGPU_IRQ_STATE_DISABLE:
4489 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4490 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4491 					    TIME_STAMP_INT_ENABLE, 0);
4492 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4493 		break;
4494 	case AMDGPU_IRQ_STATE_ENABLE:
4495 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4496 		cp_int_cntl =
4497 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4498 				      TIME_STAMP_INT_ENABLE, 1);
4499 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4500 		break;
4501 	default:
4502 		break;
4503 	}
4504 }
4505 
4506 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4507 						     int me, int pipe,
4508 						     enum amdgpu_interrupt_state state)
4509 {
4510 	u32 mec_int_cntl, mec_int_cntl_reg;
4511 
4512 	/*
4513 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4514 	 * handles the setting of interrupts for this specific pipe. All other
4515 	 * pipes' interrupts are set by amdkfd.
4516 	 */
4517 
4518 	if (me == 1) {
4519 		switch (pipe) {
4520 		case 0:
4521 			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4522 			break;
4523 		default:
4524 			DRM_DEBUG("invalid pipe %d\n", pipe);
4525 			return;
4526 		}
4527 	} else {
4528 		DRM_DEBUG("invalid me %d\n", me);
4529 		return;
4530 	}
4531 
4532 	switch (state) {
4533 	case AMDGPU_IRQ_STATE_DISABLE:
4534 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4535 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4536 					     TIME_STAMP_INT_ENABLE, 0);
4537 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4538 		break;
4539 	case AMDGPU_IRQ_STATE_ENABLE:
4540 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4541 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4542 					     TIME_STAMP_INT_ENABLE, 1);
4543 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4544 		break;
4545 	default:
4546 		break;
4547 	}
4548 }
4549 
4550 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4551 					     struct amdgpu_irq_src *source,
4552 					     unsigned type,
4553 					     enum amdgpu_interrupt_state state)
4554 {
4555 	u32 cp_int_cntl;
4556 
4557 	switch (state) {
4558 	case AMDGPU_IRQ_STATE_DISABLE:
4559 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4560 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4561 					    PRIV_REG_INT_ENABLE, 0);
4562 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4563 		break;
4564 	case AMDGPU_IRQ_STATE_ENABLE:
4565 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4566 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4567 					    PRIV_REG_INT_ENABLE, 0);
4568 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4569 		break;
4570 	default:
4571 		break;
4572 	}
4573 
4574 	return 0;
4575 }
4576 
4577 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4578 					      struct amdgpu_irq_src *source,
4579 					      unsigned type,
4580 					      enum amdgpu_interrupt_state state)
4581 {
4582 	u32 cp_int_cntl;
4583 
4584 	switch (state) {
4585 	case AMDGPU_IRQ_STATE_DISABLE:
4586 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4587 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4588 					    PRIV_INSTR_INT_ENABLE, 0);
4589 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4590 		break;
4591 	case AMDGPU_IRQ_STATE_ENABLE:
4592 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4593 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4594 					    PRIV_INSTR_INT_ENABLE, 1);
4595 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4596 		break;
4597 	default:
4598 		break;
4599 	}
4600 
4601 	return 0;
4602 }
4603 
4604 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4605 					    struct amdgpu_irq_src *src,
4606 					    unsigned type,
4607 					    enum amdgpu_interrupt_state state)
4608 {
4609 	switch (type) {
4610 	case AMDGPU_CP_IRQ_GFX_EOP:
4611 		gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4612 		break;
4613 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4614 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4615 		break;
4616 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4617 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4618 		break;
4619 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4620 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4621 		break;
4622 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4623 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4624 		break;
4625 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4626 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4627 		break;
4628 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4629 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4630 		break;
4631 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4632 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4633 		break;
4634 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4635 		gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4636 		break;
4637 	default:
4638 		break;
4639 	}
4640 	return 0;
4641 }
4642 
4643 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4644 			    struct amdgpu_irq_src *source,
4645 			    struct amdgpu_iv_entry *entry)
4646 {
4647 	int i;
4648 	u8 me_id, pipe_id, queue_id;
4649 	struct amdgpu_ring *ring;
4650 
4651 	DRM_DEBUG("IH: CP EOP\n");
4652 	me_id = (entry->ring_id & 0x0c) >> 2;
4653 	pipe_id = (entry->ring_id & 0x03) >> 0;
4654 	queue_id = (entry->ring_id & 0x70) >> 4;
4655 
4656 	switch (me_id) {
4657 	case 0:
4658 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4659 		break;
4660 	case 1:
4661 	case 2:
4662 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4663 			ring = &adev->gfx.compute_ring[i];
4664 			/* Per-queue interrupt is supported for MEC starting from VI.
4665 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
4666 			  */
4667 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4668 				amdgpu_fence_process(ring);
4669 		}
4670 		break;
4671 	}
4672 	return 0;
4673 }
4674 
4675 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4676 				 struct amdgpu_irq_src *source,
4677 				 struct amdgpu_iv_entry *entry)
4678 {
4679 	DRM_ERROR("Illegal register access in command stream\n");
4680 	schedule_work(&adev->reset_work);
4681 	return 0;
4682 }
4683 
4684 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4685 				  struct amdgpu_irq_src *source,
4686 				  struct amdgpu_iv_entry *entry)
4687 {
4688 	DRM_ERROR("Illegal instruction in command stream\n");
4689 	schedule_work(&adev->reset_work);
4690 	return 0;
4691 }
4692 
4693 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
4694 	.early_init = gfx_v8_0_early_init,
4695 	.late_init = NULL,
4696 	.sw_init = gfx_v8_0_sw_init,
4697 	.sw_fini = gfx_v8_0_sw_fini,
4698 	.hw_init = gfx_v8_0_hw_init,
4699 	.hw_fini = gfx_v8_0_hw_fini,
4700 	.suspend = gfx_v8_0_suspend,
4701 	.resume = gfx_v8_0_resume,
4702 	.is_idle = gfx_v8_0_is_idle,
4703 	.wait_for_idle = gfx_v8_0_wait_for_idle,
4704 	.soft_reset = gfx_v8_0_soft_reset,
4705 	.print_status = gfx_v8_0_print_status,
4706 	.set_clockgating_state = gfx_v8_0_set_clockgating_state,
4707 	.set_powergating_state = gfx_v8_0_set_powergating_state,
4708 };
4709 
4710 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4711 	.get_rptr = gfx_v8_0_ring_get_rptr_gfx,
4712 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
4713 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
4714 	.parse_cs = NULL,
4715 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
4716 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
4717 	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4718 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4719 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4720 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4721 	.test_ring = gfx_v8_0_ring_test_ring,
4722 	.test_ib = gfx_v8_0_ring_test_ib,
4723 	.insert_nop = amdgpu_ring_insert_nop,
4724 };
4725 
4726 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4727 	.get_rptr = gfx_v8_0_ring_get_rptr_compute,
4728 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
4729 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
4730 	.parse_cs = NULL,
4731 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
4732 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
4733 	.emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4734 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4735 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4736 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
4737 	.test_ring = gfx_v8_0_ring_test_ring,
4738 	.test_ib = gfx_v8_0_ring_test_ib,
4739 	.insert_nop = amdgpu_ring_insert_nop,
4740 };
4741 
4742 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
4743 {
4744 	int i;
4745 
4746 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4747 		adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
4748 
4749 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
4750 		adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
4751 }
4752 
4753 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
4754 	.set = gfx_v8_0_set_eop_interrupt_state,
4755 	.process = gfx_v8_0_eop_irq,
4756 };
4757 
4758 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
4759 	.set = gfx_v8_0_set_priv_reg_fault_state,
4760 	.process = gfx_v8_0_priv_reg_irq,
4761 };
4762 
4763 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
4764 	.set = gfx_v8_0_set_priv_inst_fault_state,
4765 	.process = gfx_v8_0_priv_inst_irq,
4766 };
4767 
4768 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
4769 {
4770 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4771 	adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
4772 
4773 	adev->gfx.priv_reg_irq.num_types = 1;
4774 	adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
4775 
4776 	adev->gfx.priv_inst_irq.num_types = 1;
4777 	adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4778 }
4779 
4780 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
4781 {
4782 	/* init asci gds info */
4783 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
4784 	adev->gds.gws.total_size = 64;
4785 	adev->gds.oa.total_size = 16;
4786 
4787 	if (adev->gds.mem.total_size == 64 * 1024) {
4788 		adev->gds.mem.gfx_partition_size = 4096;
4789 		adev->gds.mem.cs_partition_size = 4096;
4790 
4791 		adev->gds.gws.gfx_partition_size = 4;
4792 		adev->gds.gws.cs_partition_size = 4;
4793 
4794 		adev->gds.oa.gfx_partition_size = 4;
4795 		adev->gds.oa.cs_partition_size = 1;
4796 	} else {
4797 		adev->gds.mem.gfx_partition_size = 1024;
4798 		adev->gds.mem.cs_partition_size = 1024;
4799 
4800 		adev->gds.gws.gfx_partition_size = 16;
4801 		adev->gds.gws.cs_partition_size = 16;
4802 
4803 		adev->gds.oa.gfx_partition_size = 4;
4804 		adev->gds.oa.cs_partition_size = 4;
4805 	}
4806 }
4807 
4808 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4809 		u32 se, u32 sh)
4810 {
4811 	u32 mask = 0, tmp, tmp1;
4812 	int i;
4813 
4814 	gfx_v8_0_select_se_sh(adev, se, sh);
4815 	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4816 	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4817 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4818 
4819 	tmp &= 0xffff0000;
4820 
4821 	tmp |= tmp1;
4822 	tmp >>= 16;
4823 
4824 	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4825 		mask <<= 1;
4826 		mask |= 1;
4827 	}
4828 
4829 	return (~tmp) & mask;
4830 }
4831 
4832 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
4833 						 struct amdgpu_cu_info *cu_info)
4834 {
4835 	int i, j, k, counter, active_cu_number = 0;
4836 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4837 
4838 	if (!adev || !cu_info)
4839 		return -EINVAL;
4840 
4841 	mutex_lock(&adev->grbm_idx_mutex);
4842 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4843 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4844 			mask = 1;
4845 			ao_bitmap = 0;
4846 			counter = 0;
4847 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
4848 			cu_info->bitmap[i][j] = bitmap;
4849 
4850 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4851 				if (bitmap & mask) {
4852 					if (counter < 2)
4853 						ao_bitmap |= mask;
4854 					counter ++;
4855 				}
4856 				mask <<= 1;
4857 			}
4858 			active_cu_number += counter;
4859 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4860 		}
4861 	}
4862 
4863 	cu_info->number = active_cu_number;
4864 	cu_info->ao_cu_mask = ao_cu_mask;
4865 	mutex_unlock(&adev->grbm_idx_mutex);
4866 	return 0;
4867 }
4868