1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "amdgpu_ring.h"
33 #include "vi.h"
34 #include "vi_structs.h"
35 #include "vid.h"
36 #include "amdgpu_ucode.h"
37 #include "amdgpu_atombios.h"
38 #include "atombios_i2c.h"
39 #include "clearstate_vi.h"
40
41 #include "gmc/gmc_8_2_d.h"
42 #include "gmc/gmc_8_2_sh_mask.h"
43
44 #include "oss/oss_3_0_d.h"
45 #include "oss/oss_3_0_sh_mask.h"
46
47 #include "bif/bif_5_0_d.h"
48 #include "bif/bif_5_0_sh_mask.h"
49 #include "gca/gfx_8_0_d.h"
50 #include "gca/gfx_8_0_enum.h"
51 #include "gca/gfx_8_0_sh_mask.h"
52
53 #include "dce/dce_10_0_d.h"
54 #include "dce/dce_10_0_sh_mask.h"
55
56 #include "smu/smu_7_1_3_d.h"
57
58 #include "ivsrcid/ivsrcid_vislands30.h"
59
60 #define GFX8_NUM_GFX_RINGS 1
61 #define GFX8_MEC_HPD_SIZE 4096
62
63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
67
68 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
69 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
70 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
71 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
72 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
73 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
74 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
75 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
76 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
77
78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
84
85 /* BPM SERDES CMD */
86 #define SET_BPM_SERDES_CMD 1
87 #define CLE_BPM_SERDES_CMD 0
88
89 /* BPM Register Address*/
90 enum {
91 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
92 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
93 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
94 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
95 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
96 BPM_REG_FGCG_MAX
97 };
98
99 #define RLC_FormatDirectRegListLength 14
100
101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
145
146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
157
158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
169
170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
176
177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
178 {
179 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
180 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
181 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
182 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
183 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
184 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
185 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
186 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
187 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
188 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
189 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
190 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
191 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
192 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
193 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
194 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
195 };
196
197 static const u32 golden_settings_tonga_a11[] =
198 {
199 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
200 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
201 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
202 mmGB_GPU_ID, 0x0000000f, 0x00000000,
203 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
204 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
205 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
206 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
207 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
208 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
209 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
210 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
211 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
212 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
213 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
214 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
215 };
216
217 static const u32 tonga_golden_common_all[] =
218 {
219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
221 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
222 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
223 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
224 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
225 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
226 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
227 };
228
229 static const u32 tonga_mgcg_cgcg_init[] =
230 {
231 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
233 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
234 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
236 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
237 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
238 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
239 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
240 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
242 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
243 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
244 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
245 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
246 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
247 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
248 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
249 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
250 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
251 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
252 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
253 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
254 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
255 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
256 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
257 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
258 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
260 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
261 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
262 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
263 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
264 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
265 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
266 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
267 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
268 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
269 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
270 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
271 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
272 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
273 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
274 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
275 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
276 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
277 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
278 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
279 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
280 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
281 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
282 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
283 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
284 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
285 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
286 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
287 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
288 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
289 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
290 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
291 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
292 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
293 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
294 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
295 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
296 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
297 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
298 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
299 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
300 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
301 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
302 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
303 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
304 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
305 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
306 };
307
308 static const u32 golden_settings_vegam_a11[] =
309 {
310 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
311 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
312 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
313 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
314 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
315 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
316 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
317 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
318 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
319 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
320 mmSQ_CONFIG, 0x07f80000, 0x01180000,
321 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
322 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
323 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
324 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
325 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
326 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
327 };
328
329 static const u32 vegam_golden_common_all[] =
330 {
331 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
332 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
333 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
334 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
335 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
336 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
337 };
338
339 static const u32 golden_settings_polaris11_a11[] =
340 {
341 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
342 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
343 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
344 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
345 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
346 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
347 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
348 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
349 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
350 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
351 mmSQ_CONFIG, 0x07f80000, 0x01180000,
352 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
353 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
354 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
355 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
356 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
357 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
358 };
359
360 static const u32 polaris11_golden_common_all[] =
361 {
362 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
363 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
364 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
365 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
366 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
367 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
368 };
369
370 static const u32 golden_settings_polaris10_a11[] =
371 {
372 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
373 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
374 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
380 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
381 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
382 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
383 mmSQ_CONFIG, 0x07f80000, 0x07180000,
384 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
385 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
386 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
387 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
388 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
389 };
390
391 static const u32 polaris10_golden_common_all[] =
392 {
393 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
394 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
395 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
396 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
397 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
398 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
399 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
400 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
401 };
402
403 static const u32 fiji_golden_common_all[] =
404 {
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
407 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
408 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
409 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
410 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
411 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
412 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
413 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
414 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
415 };
416
417 static const u32 golden_settings_fiji_a10[] =
418 {
419 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
420 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
422 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
423 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
424 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
426 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
427 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
428 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
429 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
430 };
431
432 static const u32 fiji_mgcg_cgcg_init[] =
433 {
434 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
435 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
436 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
437 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
441 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
443 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
445 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
452 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
456 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
459 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
460 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
461 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
464 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
465 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
466 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
467 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
468 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
469 };
470
471 static const u32 golden_settings_iceland_a11[] =
472 {
473 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
474 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
475 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
476 mmGB_GPU_ID, 0x0000000f, 0x00000000,
477 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
478 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
479 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
480 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
481 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
482 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
483 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
484 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
485 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
486 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
487 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
488 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
489 };
490
491 static const u32 iceland_golden_common_all[] =
492 {
493 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
494 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
495 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
496 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
497 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
498 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
499 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
500 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
501 };
502
503 static const u32 iceland_mgcg_cgcg_init[] =
504 {
505 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
506 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
507 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
508 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
509 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
510 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
511 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
512 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
513 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
514 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
515 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
516 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
517 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
518 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
519 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
520 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
521 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
522 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
523 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
524 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
525 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
526 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
527 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
528 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
529 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
530 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
531 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
532 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
534 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
535 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
536 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
537 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
538 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
539 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
540 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
541 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
542 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
543 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
544 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
545 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
546 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
547 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
548 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
549 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
550 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
551 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
552 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
553 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
554 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
555 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
556 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
557 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
558 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
559 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
560 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
561 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
562 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
563 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
564 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
565 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
566 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
567 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
568 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
569 };
570
571 static const u32 cz_golden_settings_a11[] =
572 {
573 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
574 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
575 mmGB_GPU_ID, 0x0000000f, 0x00000000,
576 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
577 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
578 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
579 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
580 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
581 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
582 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
583 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
584 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
585 };
586
587 static const u32 cz_golden_common_all[] =
588 {
589 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
590 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
591 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
592 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
593 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
594 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
595 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
596 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
597 };
598
599 static const u32 cz_mgcg_cgcg_init[] =
600 {
601 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
602 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
603 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
604 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
605 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
606 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
607 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
608 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
609 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
610 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
611 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
612 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
613 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
614 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
615 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
616 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
617 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
618 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
619 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
620 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
621 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
622 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
623 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
624 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
625 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
626 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
627 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
628 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
630 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
631 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
632 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
633 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
634 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
635 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
636 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
637 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
638 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
639 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
640 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
641 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
642 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
643 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
644 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
645 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
646 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
647 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
648 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
649 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
650 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
651 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
652 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
653 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
654 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
655 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
656 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
657 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
658 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
659 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
660 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
661 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
662 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
663 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
664 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
665 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
666 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
667 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
668 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
669 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
670 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
671 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
672 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
673 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
674 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
675 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
676 };
677
678 static const u32 stoney_golden_settings_a11[] =
679 {
680 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
681 mmGB_GPU_ID, 0x0000000f, 0x00000000,
682 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
683 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
684 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
685 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
686 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
687 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
688 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
689 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
690 };
691
692 static const u32 stoney_golden_common_all[] =
693 {
694 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
695 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
696 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
697 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
698 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
699 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
700 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
701 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
702 };
703
704 static const u32 stoney_mgcg_cgcg_init[] =
705 {
706 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
707 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
708 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
710 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
711 };
712
713
714 static const char * const sq_edc_source_names[] = {
715 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
716 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
717 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
718 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
719 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
720 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
721 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
722 };
723
724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
732
733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL
734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L
735
gfx_v8_0_init_golden_registers(struct amdgpu_device * adev)736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
737 {
738 uint32_t data;
739
740 switch (adev->asic_type) {
741 case CHIP_TOPAZ:
742 amdgpu_device_program_register_sequence(adev,
743 iceland_mgcg_cgcg_init,
744 ARRAY_SIZE(iceland_mgcg_cgcg_init));
745 amdgpu_device_program_register_sequence(adev,
746 golden_settings_iceland_a11,
747 ARRAY_SIZE(golden_settings_iceland_a11));
748 amdgpu_device_program_register_sequence(adev,
749 iceland_golden_common_all,
750 ARRAY_SIZE(iceland_golden_common_all));
751 break;
752 case CHIP_FIJI:
753 amdgpu_device_program_register_sequence(adev,
754 fiji_mgcg_cgcg_init,
755 ARRAY_SIZE(fiji_mgcg_cgcg_init));
756 amdgpu_device_program_register_sequence(adev,
757 golden_settings_fiji_a10,
758 ARRAY_SIZE(golden_settings_fiji_a10));
759 amdgpu_device_program_register_sequence(adev,
760 fiji_golden_common_all,
761 ARRAY_SIZE(fiji_golden_common_all));
762 break;
763
764 case CHIP_TONGA:
765 amdgpu_device_program_register_sequence(adev,
766 tonga_mgcg_cgcg_init,
767 ARRAY_SIZE(tonga_mgcg_cgcg_init));
768 amdgpu_device_program_register_sequence(adev,
769 golden_settings_tonga_a11,
770 ARRAY_SIZE(golden_settings_tonga_a11));
771 amdgpu_device_program_register_sequence(adev,
772 tonga_golden_common_all,
773 ARRAY_SIZE(tonga_golden_common_all));
774 break;
775 case CHIP_VEGAM:
776 amdgpu_device_program_register_sequence(adev,
777 golden_settings_vegam_a11,
778 ARRAY_SIZE(golden_settings_vegam_a11));
779 amdgpu_device_program_register_sequence(adev,
780 vegam_golden_common_all,
781 ARRAY_SIZE(vegam_golden_common_all));
782 break;
783 case CHIP_POLARIS11:
784 case CHIP_POLARIS12:
785 amdgpu_device_program_register_sequence(adev,
786 golden_settings_polaris11_a11,
787 ARRAY_SIZE(golden_settings_polaris11_a11));
788 amdgpu_device_program_register_sequence(adev,
789 polaris11_golden_common_all,
790 ARRAY_SIZE(polaris11_golden_common_all));
791 break;
792 case CHIP_POLARIS10:
793 amdgpu_device_program_register_sequence(adev,
794 golden_settings_polaris10_a11,
795 ARRAY_SIZE(golden_settings_polaris10_a11));
796 amdgpu_device_program_register_sequence(adev,
797 polaris10_golden_common_all,
798 ARRAY_SIZE(polaris10_golden_common_all));
799 data = RREG32_SMC(ixCG_ACLK_CNTL);
800 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK;
801 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT;
802 WREG32_SMC(ixCG_ACLK_CNTL, data);
803 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) &&
804 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
805 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
806 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) {
807 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
808 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
809 }
810 break;
811 case CHIP_CARRIZO:
812 amdgpu_device_program_register_sequence(adev,
813 cz_mgcg_cgcg_init,
814 ARRAY_SIZE(cz_mgcg_cgcg_init));
815 amdgpu_device_program_register_sequence(adev,
816 cz_golden_settings_a11,
817 ARRAY_SIZE(cz_golden_settings_a11));
818 amdgpu_device_program_register_sequence(adev,
819 cz_golden_common_all,
820 ARRAY_SIZE(cz_golden_common_all));
821 break;
822 case CHIP_STONEY:
823 amdgpu_device_program_register_sequence(adev,
824 stoney_mgcg_cgcg_init,
825 ARRAY_SIZE(stoney_mgcg_cgcg_init));
826 amdgpu_device_program_register_sequence(adev,
827 stoney_golden_settings_a11,
828 ARRAY_SIZE(stoney_golden_settings_a11));
829 amdgpu_device_program_register_sequence(adev,
830 stoney_golden_common_all,
831 ARRAY_SIZE(stoney_golden_common_all));
832 break;
833 default:
834 break;
835 }
836 }
837
gfx_v8_0_ring_test_ring(struct amdgpu_ring * ring)838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
839 {
840 struct amdgpu_device *adev = ring->adev;
841 uint32_t tmp = 0;
842 unsigned i;
843 int r;
844
845 WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
846 r = amdgpu_ring_alloc(ring, 3);
847 if (r)
848 return r;
849
850 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
851 amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
852 amdgpu_ring_write(ring, 0xDEADBEEF);
853 amdgpu_ring_commit(ring);
854
855 for (i = 0; i < adev->usec_timeout; i++) {
856 tmp = RREG32(mmSCRATCH_REG0);
857 if (tmp == 0xDEADBEEF)
858 break;
859 udelay(1);
860 }
861
862 if (i >= adev->usec_timeout)
863 r = -ETIMEDOUT;
864
865 return r;
866 }
867
gfx_v8_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
869 {
870 struct amdgpu_device *adev = ring->adev;
871 struct amdgpu_ib ib;
872 struct dma_fence *f = NULL;
873
874 unsigned int index;
875 uint64_t gpu_addr;
876 uint32_t tmp;
877 long r;
878
879 r = amdgpu_device_wb_get(adev, &index);
880 if (r)
881 return r;
882
883 gpu_addr = adev->wb.gpu_addr + (index * 4);
884 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
885 memset(&ib, 0, sizeof(ib));
886
887 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
888 if (r)
889 goto err1;
890
891 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
892 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
893 ib.ptr[2] = lower_32_bits(gpu_addr);
894 ib.ptr[3] = upper_32_bits(gpu_addr);
895 ib.ptr[4] = 0xDEADBEEF;
896 ib.length_dw = 5;
897
898 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
899 if (r)
900 goto err2;
901
902 r = dma_fence_wait_timeout(f, false, timeout);
903 if (r == 0) {
904 r = -ETIMEDOUT;
905 goto err2;
906 } else if (r < 0) {
907 goto err2;
908 }
909
910 tmp = adev->wb.wb[index];
911 if (tmp == 0xDEADBEEF)
912 r = 0;
913 else
914 r = -EINVAL;
915
916 err2:
917 amdgpu_ib_free(&ib, NULL);
918 dma_fence_put(f);
919 err1:
920 amdgpu_device_wb_free(adev, index);
921 return r;
922 }
923
924
gfx_v8_0_free_microcode(struct amdgpu_device * adev)925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
926 {
927 amdgpu_ucode_release(&adev->gfx.pfp_fw);
928 amdgpu_ucode_release(&adev->gfx.me_fw);
929 amdgpu_ucode_release(&adev->gfx.ce_fw);
930 amdgpu_ucode_release(&adev->gfx.rlc_fw);
931 amdgpu_ucode_release(&adev->gfx.mec_fw);
932 if ((adev->asic_type != CHIP_STONEY) &&
933 (adev->asic_type != CHIP_TOPAZ))
934 amdgpu_ucode_release(&adev->gfx.mec2_fw);
935
936 kfree(adev->gfx.rlc.register_list_format);
937 }
938
gfx_v8_0_init_microcode(struct amdgpu_device * adev)939 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
940 {
941 const char *chip_name;
942 int err;
943 struct amdgpu_firmware_info *info = NULL;
944 const struct common_firmware_header *header = NULL;
945 const struct gfx_firmware_header_v1_0 *cp_hdr;
946 const struct rlc_firmware_header_v2_0 *rlc_hdr;
947 unsigned int *tmp = NULL, i;
948
949 DRM_DEBUG("\n");
950
951 switch (adev->asic_type) {
952 case CHIP_TOPAZ:
953 chip_name = "topaz";
954 break;
955 case CHIP_TONGA:
956 chip_name = "tonga";
957 break;
958 case CHIP_CARRIZO:
959 chip_name = "carrizo";
960 break;
961 case CHIP_FIJI:
962 chip_name = "fiji";
963 break;
964 case CHIP_STONEY:
965 chip_name = "stoney";
966 break;
967 case CHIP_POLARIS10:
968 chip_name = "polaris10";
969 break;
970 case CHIP_POLARIS11:
971 chip_name = "polaris11";
972 break;
973 case CHIP_POLARIS12:
974 chip_name = "polaris12";
975 break;
976 case CHIP_VEGAM:
977 chip_name = "vegam";
978 break;
979 default:
980 BUG();
981 }
982
983 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
984 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
985 AMDGPU_UCODE_OPTIONAL,
986 "amdgpu/%s_pfp_2.bin", chip_name);
987 if (err == -ENODEV) {
988 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
989 AMDGPU_UCODE_REQUIRED,
990 "amdgpu/%s_pfp.bin", chip_name);
991 }
992 } else {
993 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
994 AMDGPU_UCODE_REQUIRED,
995 "amdgpu/%s_pfp.bin", chip_name);
996 }
997 if (err)
998 goto out;
999 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1000 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1001 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1002
1003 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1004 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1005 AMDGPU_UCODE_OPTIONAL,
1006 "amdgpu/%s_me_2.bin", chip_name);
1007 if (err == -ENODEV) {
1008 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1009 AMDGPU_UCODE_REQUIRED,
1010 "amdgpu/%s_me.bin", chip_name);
1011 }
1012 } else {
1013 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1014 AMDGPU_UCODE_REQUIRED,
1015 "amdgpu/%s_me.bin", chip_name);
1016 }
1017 if (err)
1018 goto out;
1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1020 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1021
1022 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1023
1024 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1025 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1026 AMDGPU_UCODE_OPTIONAL,
1027 "amdgpu/%s_ce_2.bin", chip_name);
1028 if (err == -ENODEV) {
1029 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1030 AMDGPU_UCODE_REQUIRED,
1031 "amdgpu/%s_ce.bin", chip_name);
1032 }
1033 } else {
1034 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1035 AMDGPU_UCODE_REQUIRED,
1036 "amdgpu/%s_ce.bin", chip_name);
1037 }
1038 if (err)
1039 goto out;
1040 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1041 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1042 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1043
1044 /*
1045 * Support for MCBP/Virtualization in combination with chained IBs is
1046 * formal released on feature version #46
1047 */
1048 if (adev->gfx.ce_feature_version >= 46 &&
1049 adev->gfx.pfp_feature_version >= 46) {
1050 adev->virt.chained_ib_support = true;
1051 DRM_INFO("Chained IB support enabled!\n");
1052 } else
1053 adev->virt.chained_ib_support = false;
1054
1055 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1056 AMDGPU_UCODE_REQUIRED,
1057 "amdgpu/%s_rlc.bin", chip_name);
1058 if (err)
1059 goto out;
1060 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1061 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1062 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1063
1064 adev->gfx.rlc.save_and_restore_offset =
1065 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1066 adev->gfx.rlc.clear_state_descriptor_offset =
1067 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1068 adev->gfx.rlc.avail_scratch_ram_locations =
1069 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1070 adev->gfx.rlc.reg_restore_list_size =
1071 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1072 adev->gfx.rlc.reg_list_format_start =
1073 le32_to_cpu(rlc_hdr->reg_list_format_start);
1074 adev->gfx.rlc.reg_list_format_separate_start =
1075 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1076 adev->gfx.rlc.starting_offsets_start =
1077 le32_to_cpu(rlc_hdr->starting_offsets_start);
1078 adev->gfx.rlc.reg_list_format_size_bytes =
1079 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1080 adev->gfx.rlc.reg_list_size_bytes =
1081 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1082
1083 adev->gfx.rlc.register_list_format =
1084 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1085 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1086
1087 if (!adev->gfx.rlc.register_list_format) {
1088 err = -ENOMEM;
1089 goto out;
1090 }
1091
1092 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1093 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1094 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1095 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1096
1097 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1098
1099 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1100 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1101 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1102 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1103
1104 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1105 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1106 AMDGPU_UCODE_OPTIONAL,
1107 "amdgpu/%s_mec_2.bin", chip_name);
1108 if (err == -ENODEV) {
1109 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1110 AMDGPU_UCODE_REQUIRED,
1111 "amdgpu/%s_mec.bin", chip_name);
1112 }
1113 } else {
1114 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1115 AMDGPU_UCODE_REQUIRED,
1116 "amdgpu/%s_mec.bin", chip_name);
1117 }
1118 if (err)
1119 goto out;
1120 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1121 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1122 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1123
1124 if ((adev->asic_type != CHIP_STONEY) &&
1125 (adev->asic_type != CHIP_TOPAZ)) {
1126 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1127 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1128 AMDGPU_UCODE_OPTIONAL,
1129 "amdgpu/%s_mec2_2.bin", chip_name);
1130 if (err == -ENODEV) {
1131 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1132 AMDGPU_UCODE_REQUIRED,
1133 "amdgpu/%s_mec2.bin", chip_name);
1134 }
1135 } else {
1136 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1137 AMDGPU_UCODE_REQUIRED,
1138 "amdgpu/%s_mec2.bin", chip_name);
1139 }
1140 if (!err) {
1141 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1142 adev->gfx.mec2_fw->data;
1143 adev->gfx.mec2_fw_version =
1144 le32_to_cpu(cp_hdr->header.ucode_version);
1145 adev->gfx.mec2_feature_version =
1146 le32_to_cpu(cp_hdr->ucode_feature_version);
1147 } else {
1148 err = 0;
1149 adev->gfx.mec2_fw = NULL;
1150 }
1151 }
1152
1153 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1154 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1155 info->fw = adev->gfx.pfp_fw;
1156 header = (const struct common_firmware_header *)info->fw->data;
1157 adev->firmware.fw_size +=
1158 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1159
1160 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1161 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1162 info->fw = adev->gfx.me_fw;
1163 header = (const struct common_firmware_header *)info->fw->data;
1164 adev->firmware.fw_size +=
1165 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1166
1167 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1168 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1169 info->fw = adev->gfx.ce_fw;
1170 header = (const struct common_firmware_header *)info->fw->data;
1171 adev->firmware.fw_size +=
1172 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1173
1174 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1175 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1176 info->fw = adev->gfx.rlc_fw;
1177 header = (const struct common_firmware_header *)info->fw->data;
1178 adev->firmware.fw_size +=
1179 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1180
1181 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1182 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1183 info->fw = adev->gfx.mec_fw;
1184 header = (const struct common_firmware_header *)info->fw->data;
1185 adev->firmware.fw_size +=
1186 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1187
1188 /* we need account JT in */
1189 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1190 adev->firmware.fw_size +=
1191 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1192
1193 if (amdgpu_sriov_vf(adev)) {
1194 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1195 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1196 info->fw = adev->gfx.mec_fw;
1197 adev->firmware.fw_size +=
1198 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1199 }
1200
1201 if (adev->gfx.mec2_fw) {
1202 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1203 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1204 info->fw = adev->gfx.mec2_fw;
1205 header = (const struct common_firmware_header *)info->fw->data;
1206 adev->firmware.fw_size +=
1207 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1208 }
1209
1210 out:
1211 if (err) {
1212 dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name);
1213 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1214 amdgpu_ucode_release(&adev->gfx.me_fw);
1215 amdgpu_ucode_release(&adev->gfx.ce_fw);
1216 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1217 amdgpu_ucode_release(&adev->gfx.mec_fw);
1218 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1219 }
1220 return err;
1221 }
1222
gfx_v8_0_get_csb_buffer(struct amdgpu_device * adev,u32 * buffer)1223 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
1224 {
1225 u32 count = 0;
1226
1227 if (adev->gfx.rlc.cs_data == NULL)
1228 return;
1229 if (buffer == NULL)
1230 return;
1231
1232 count = amdgpu_gfx_csb_preamble_start(buffer);
1233 count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
1234
1235 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1236 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
1237 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1238 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1239
1240 amdgpu_gfx_csb_preamble_end(buffer, count);
1241 }
1242
gfx_v8_0_cp_jump_table_num(struct amdgpu_device * adev)1243 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1244 {
1245 if (adev->asic_type == CHIP_CARRIZO)
1246 return 5;
1247 else
1248 return 4;
1249 }
1250
gfx_v8_0_rlc_init(struct amdgpu_device * adev)1251 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1252 {
1253 const struct cs_section_def *cs_data;
1254 int r;
1255
1256 adev->gfx.rlc.cs_data = vi_cs_data;
1257
1258 cs_data = adev->gfx.rlc.cs_data;
1259
1260 if (cs_data) {
1261 /* init clear state block */
1262 r = amdgpu_gfx_rlc_init_csb(adev);
1263 if (r)
1264 return r;
1265 }
1266
1267 if ((adev->asic_type == CHIP_CARRIZO) ||
1268 (adev->asic_type == CHIP_STONEY)) {
1269 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1270 r = amdgpu_gfx_rlc_init_cpt(adev);
1271 if (r)
1272 return r;
1273 }
1274
1275 /* init spm vmid with 0xf */
1276 if (adev->gfx.rlc.funcs->update_spm_vmid)
1277 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
1278
1279 return 0;
1280 }
1281
gfx_v8_0_mec_fini(struct amdgpu_device * adev)1282 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1283 {
1284 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1285 }
1286
gfx_v8_0_mec_init(struct amdgpu_device * adev)1287 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1288 {
1289 int r;
1290 u32 *hpd;
1291 size_t mec_hpd_size;
1292
1293 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1294
1295 /* take ownership of the relevant compute queues */
1296 amdgpu_gfx_compute_queue_acquire(adev);
1297
1298 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1299 if (mec_hpd_size) {
1300 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1301 AMDGPU_GEM_DOMAIN_VRAM |
1302 AMDGPU_GEM_DOMAIN_GTT,
1303 &adev->gfx.mec.hpd_eop_obj,
1304 &adev->gfx.mec.hpd_eop_gpu_addr,
1305 (void **)&hpd);
1306 if (r) {
1307 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1308 return r;
1309 }
1310
1311 memset(hpd, 0, mec_hpd_size);
1312
1313 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1314 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1315 }
1316
1317 return 0;
1318 }
1319
1320 static const u32 vgpr_init_compute_shader[] =
1321 {
1322 0x7e000209, 0x7e020208,
1323 0x7e040207, 0x7e060206,
1324 0x7e080205, 0x7e0a0204,
1325 0x7e0c0203, 0x7e0e0202,
1326 0x7e100201, 0x7e120200,
1327 0x7e140209, 0x7e160208,
1328 0x7e180207, 0x7e1a0206,
1329 0x7e1c0205, 0x7e1e0204,
1330 0x7e200203, 0x7e220202,
1331 0x7e240201, 0x7e260200,
1332 0x7e280209, 0x7e2a0208,
1333 0x7e2c0207, 0x7e2e0206,
1334 0x7e300205, 0x7e320204,
1335 0x7e340203, 0x7e360202,
1336 0x7e380201, 0x7e3a0200,
1337 0x7e3c0209, 0x7e3e0208,
1338 0x7e400207, 0x7e420206,
1339 0x7e440205, 0x7e460204,
1340 0x7e480203, 0x7e4a0202,
1341 0x7e4c0201, 0x7e4e0200,
1342 0x7e500209, 0x7e520208,
1343 0x7e540207, 0x7e560206,
1344 0x7e580205, 0x7e5a0204,
1345 0x7e5c0203, 0x7e5e0202,
1346 0x7e600201, 0x7e620200,
1347 0x7e640209, 0x7e660208,
1348 0x7e680207, 0x7e6a0206,
1349 0x7e6c0205, 0x7e6e0204,
1350 0x7e700203, 0x7e720202,
1351 0x7e740201, 0x7e760200,
1352 0x7e780209, 0x7e7a0208,
1353 0x7e7c0207, 0x7e7e0206,
1354 0xbf8a0000, 0xbf810000,
1355 };
1356
1357 static const u32 sgpr_init_compute_shader[] =
1358 {
1359 0xbe8a0100, 0xbe8c0102,
1360 0xbe8e0104, 0xbe900106,
1361 0xbe920108, 0xbe940100,
1362 0xbe960102, 0xbe980104,
1363 0xbe9a0106, 0xbe9c0108,
1364 0xbe9e0100, 0xbea00102,
1365 0xbea20104, 0xbea40106,
1366 0xbea60108, 0xbea80100,
1367 0xbeaa0102, 0xbeac0104,
1368 0xbeae0106, 0xbeb00108,
1369 0xbeb20100, 0xbeb40102,
1370 0xbeb60104, 0xbeb80106,
1371 0xbeba0108, 0xbebc0100,
1372 0xbebe0102, 0xbec00104,
1373 0xbec20106, 0xbec40108,
1374 0xbec60100, 0xbec80102,
1375 0xbee60004, 0xbee70005,
1376 0xbeea0006, 0xbeeb0007,
1377 0xbee80008, 0xbee90009,
1378 0xbefc0000, 0xbf8a0000,
1379 0xbf810000, 0x00000000,
1380 };
1381
1382 static const u32 vgpr_init_regs[] =
1383 {
1384 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1385 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1386 mmCOMPUTE_NUM_THREAD_X, 256*4,
1387 mmCOMPUTE_NUM_THREAD_Y, 1,
1388 mmCOMPUTE_NUM_THREAD_Z, 1,
1389 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1390 mmCOMPUTE_PGM_RSRC2, 20,
1391 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1392 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1393 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1394 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1395 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1396 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1397 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1398 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1399 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1400 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1401 };
1402
1403 static const u32 sgpr1_init_regs[] =
1404 {
1405 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1406 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1407 mmCOMPUTE_NUM_THREAD_X, 256*5,
1408 mmCOMPUTE_NUM_THREAD_Y, 1,
1409 mmCOMPUTE_NUM_THREAD_Z, 1,
1410 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1411 mmCOMPUTE_PGM_RSRC2, 20,
1412 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1413 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1414 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1415 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1416 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1417 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1418 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1419 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1420 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1421 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1422 };
1423
1424 static const u32 sgpr2_init_regs[] =
1425 {
1426 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1427 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1428 mmCOMPUTE_NUM_THREAD_X, 256*5,
1429 mmCOMPUTE_NUM_THREAD_Y, 1,
1430 mmCOMPUTE_NUM_THREAD_Z, 1,
1431 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1432 mmCOMPUTE_PGM_RSRC2, 20,
1433 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1434 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1435 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1436 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1437 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1438 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1439 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1440 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1441 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1442 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1443 };
1444
1445 static const u32 sec_ded_counter_registers[] =
1446 {
1447 mmCPC_EDC_ATC_CNT,
1448 mmCPC_EDC_SCRATCH_CNT,
1449 mmCPC_EDC_UCODE_CNT,
1450 mmCPF_EDC_ATC_CNT,
1451 mmCPF_EDC_ROQ_CNT,
1452 mmCPF_EDC_TAG_CNT,
1453 mmCPG_EDC_ATC_CNT,
1454 mmCPG_EDC_DMA_CNT,
1455 mmCPG_EDC_TAG_CNT,
1456 mmDC_EDC_CSINVOC_CNT,
1457 mmDC_EDC_RESTORE_CNT,
1458 mmDC_EDC_STATE_CNT,
1459 mmGDS_EDC_CNT,
1460 mmGDS_EDC_GRBM_CNT,
1461 mmGDS_EDC_OA_DED,
1462 mmSPI_EDC_CNT,
1463 mmSQC_ATC_EDC_GATCL1_CNT,
1464 mmSQC_EDC_CNT,
1465 mmSQ_EDC_DED_CNT,
1466 mmSQ_EDC_INFO,
1467 mmSQ_EDC_SEC_CNT,
1468 mmTCC_EDC_CNT,
1469 mmTCP_ATC_EDC_GATCL1_CNT,
1470 mmTCP_EDC_CNT,
1471 mmTD_EDC_CNT
1472 };
1473
gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)1474 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1475 {
1476 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1477 struct amdgpu_ib ib;
1478 struct dma_fence *f = NULL;
1479 int r, i;
1480 u32 tmp;
1481 unsigned total_size, vgpr_offset, sgpr_offset;
1482 u64 gpu_addr;
1483
1484 /* only supported on CZ */
1485 if (adev->asic_type != CHIP_CARRIZO)
1486 return 0;
1487
1488 /* bail if the compute ring is not ready */
1489 if (!ring->sched.ready)
1490 return 0;
1491
1492 tmp = RREG32(mmGB_EDC_MODE);
1493 WREG32(mmGB_EDC_MODE, 0);
1494
1495 total_size =
1496 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1497 total_size +=
1498 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1499 total_size +=
1500 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1501 total_size = ALIGN(total_size, 256);
1502 vgpr_offset = total_size;
1503 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1504 sgpr_offset = total_size;
1505 total_size += sizeof(sgpr_init_compute_shader);
1506
1507 /* allocate an indirect buffer to put the commands in */
1508 memset(&ib, 0, sizeof(ib));
1509 r = amdgpu_ib_get(adev, NULL, total_size,
1510 AMDGPU_IB_POOL_DIRECT, &ib);
1511 if (r) {
1512 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1513 return r;
1514 }
1515
1516 /* load the compute shaders */
1517 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1518 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1519
1520 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1521 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1522
1523 /* init the ib length to 0 */
1524 ib.length_dw = 0;
1525
1526 /* VGPR */
1527 /* write the register state for the compute dispatch */
1528 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1529 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1530 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1531 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1532 }
1533 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1534 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1535 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1536 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1537 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1538 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1539
1540 /* write dispatch packet */
1541 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1542 ib.ptr[ib.length_dw++] = 8; /* x */
1543 ib.ptr[ib.length_dw++] = 1; /* y */
1544 ib.ptr[ib.length_dw++] = 1; /* z */
1545 ib.ptr[ib.length_dw++] =
1546 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1547
1548 /* write CS partial flush packet */
1549 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1550 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1551
1552 /* SGPR1 */
1553 /* write the register state for the compute dispatch */
1554 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1555 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1556 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1557 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1558 }
1559 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1560 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1561 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1562 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1563 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1564 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1565
1566 /* write dispatch packet */
1567 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1568 ib.ptr[ib.length_dw++] = 8; /* x */
1569 ib.ptr[ib.length_dw++] = 1; /* y */
1570 ib.ptr[ib.length_dw++] = 1; /* z */
1571 ib.ptr[ib.length_dw++] =
1572 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1573
1574 /* write CS partial flush packet */
1575 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1576 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1577
1578 /* SGPR2 */
1579 /* write the register state for the compute dispatch */
1580 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1581 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1582 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1583 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1584 }
1585 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1586 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1587 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1588 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1589 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1590 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1591
1592 /* write dispatch packet */
1593 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1594 ib.ptr[ib.length_dw++] = 8; /* x */
1595 ib.ptr[ib.length_dw++] = 1; /* y */
1596 ib.ptr[ib.length_dw++] = 1; /* z */
1597 ib.ptr[ib.length_dw++] =
1598 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1599
1600 /* write CS partial flush packet */
1601 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1602 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1603
1604 /* shedule the ib on the ring */
1605 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1606 if (r) {
1607 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1608 goto fail;
1609 }
1610
1611 /* wait for the GPU to finish processing the IB */
1612 r = dma_fence_wait(f, false);
1613 if (r) {
1614 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1615 goto fail;
1616 }
1617
1618 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1619 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1620 WREG32(mmGB_EDC_MODE, tmp);
1621
1622 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1623 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1624 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1625
1626
1627 /* read back registers to clear the counters */
1628 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1629 RREG32(sec_ded_counter_registers[i]);
1630
1631 fail:
1632 amdgpu_ib_free(&ib, NULL);
1633 dma_fence_put(f);
1634
1635 return r;
1636 }
1637
gfx_v8_0_gpu_early_init(struct amdgpu_device * adev)1638 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1639 {
1640 u32 gb_addr_config;
1641 u32 mc_arb_ramcfg;
1642 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1643 u32 tmp;
1644 int ret;
1645
1646 switch (adev->asic_type) {
1647 case CHIP_TOPAZ:
1648 adev->gfx.config.max_shader_engines = 1;
1649 adev->gfx.config.max_tile_pipes = 2;
1650 adev->gfx.config.max_cu_per_sh = 6;
1651 adev->gfx.config.max_sh_per_se = 1;
1652 adev->gfx.config.max_backends_per_se = 2;
1653 adev->gfx.config.max_texture_channel_caches = 2;
1654 adev->gfx.config.max_gprs = 256;
1655 adev->gfx.config.max_gs_threads = 32;
1656 adev->gfx.config.max_hw_contexts = 8;
1657
1658 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1659 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1660 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1661 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1662 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1663 break;
1664 case CHIP_FIJI:
1665 adev->gfx.config.max_shader_engines = 4;
1666 adev->gfx.config.max_tile_pipes = 16;
1667 adev->gfx.config.max_cu_per_sh = 16;
1668 adev->gfx.config.max_sh_per_se = 1;
1669 adev->gfx.config.max_backends_per_se = 4;
1670 adev->gfx.config.max_texture_channel_caches = 16;
1671 adev->gfx.config.max_gprs = 256;
1672 adev->gfx.config.max_gs_threads = 32;
1673 adev->gfx.config.max_hw_contexts = 8;
1674
1675 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1676 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1677 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1678 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1679 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1680 break;
1681 case CHIP_POLARIS11:
1682 case CHIP_POLARIS12:
1683 ret = amdgpu_atombios_get_gfx_info(adev);
1684 if (ret)
1685 return ret;
1686 adev->gfx.config.max_gprs = 256;
1687 adev->gfx.config.max_gs_threads = 32;
1688 adev->gfx.config.max_hw_contexts = 8;
1689
1690 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1691 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1692 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1693 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1694 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1695 break;
1696 case CHIP_POLARIS10:
1697 case CHIP_VEGAM:
1698 ret = amdgpu_atombios_get_gfx_info(adev);
1699 if (ret)
1700 return ret;
1701 adev->gfx.config.max_gprs = 256;
1702 adev->gfx.config.max_gs_threads = 32;
1703 adev->gfx.config.max_hw_contexts = 8;
1704
1705 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1706 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1707 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1708 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1709 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1710 break;
1711 case CHIP_TONGA:
1712 adev->gfx.config.max_shader_engines = 4;
1713 adev->gfx.config.max_tile_pipes = 8;
1714 adev->gfx.config.max_cu_per_sh = 8;
1715 adev->gfx.config.max_sh_per_se = 1;
1716 adev->gfx.config.max_backends_per_se = 2;
1717 adev->gfx.config.max_texture_channel_caches = 8;
1718 adev->gfx.config.max_gprs = 256;
1719 adev->gfx.config.max_gs_threads = 32;
1720 adev->gfx.config.max_hw_contexts = 8;
1721
1722 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1723 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1724 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1725 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1726 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1727 break;
1728 case CHIP_CARRIZO:
1729 adev->gfx.config.max_shader_engines = 1;
1730 adev->gfx.config.max_tile_pipes = 2;
1731 adev->gfx.config.max_sh_per_se = 1;
1732 adev->gfx.config.max_backends_per_se = 2;
1733 adev->gfx.config.max_cu_per_sh = 8;
1734 adev->gfx.config.max_texture_channel_caches = 2;
1735 adev->gfx.config.max_gprs = 256;
1736 adev->gfx.config.max_gs_threads = 32;
1737 adev->gfx.config.max_hw_contexts = 8;
1738
1739 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1740 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1741 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1742 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1743 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1744 break;
1745 case CHIP_STONEY:
1746 adev->gfx.config.max_shader_engines = 1;
1747 adev->gfx.config.max_tile_pipes = 2;
1748 adev->gfx.config.max_sh_per_se = 1;
1749 adev->gfx.config.max_backends_per_se = 1;
1750 adev->gfx.config.max_cu_per_sh = 3;
1751 adev->gfx.config.max_texture_channel_caches = 2;
1752 adev->gfx.config.max_gprs = 256;
1753 adev->gfx.config.max_gs_threads = 16;
1754 adev->gfx.config.max_hw_contexts = 8;
1755
1756 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1757 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1758 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1759 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1760 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1761 break;
1762 default:
1763 adev->gfx.config.max_shader_engines = 2;
1764 adev->gfx.config.max_tile_pipes = 4;
1765 adev->gfx.config.max_cu_per_sh = 2;
1766 adev->gfx.config.max_sh_per_se = 1;
1767 adev->gfx.config.max_backends_per_se = 2;
1768 adev->gfx.config.max_texture_channel_caches = 4;
1769 adev->gfx.config.max_gprs = 256;
1770 adev->gfx.config.max_gs_threads = 32;
1771 adev->gfx.config.max_hw_contexts = 8;
1772
1773 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1774 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1775 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1776 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1777 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1778 break;
1779 }
1780
1781 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1782 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1783
1784 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
1785 MC_ARB_RAMCFG, NOOFBANK);
1786 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
1787 MC_ARB_RAMCFG, NOOFRANKS);
1788
1789 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1790 adev->gfx.config.mem_max_burst_length_bytes = 256;
1791 if (adev->flags & AMD_IS_APU) {
1792 /* Get memory bank mapping mode. */
1793 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1794 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1795 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1796
1797 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1798 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1799 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1800
1801 /* Validate settings in case only one DIMM installed. */
1802 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1803 dimm00_addr_map = 0;
1804 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1805 dimm01_addr_map = 0;
1806 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1807 dimm10_addr_map = 0;
1808 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1809 dimm11_addr_map = 0;
1810
1811 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1812 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1813 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1814 adev->gfx.config.mem_row_size_in_kb = 2;
1815 else
1816 adev->gfx.config.mem_row_size_in_kb = 1;
1817 } else {
1818 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1819 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1820 if (adev->gfx.config.mem_row_size_in_kb > 4)
1821 adev->gfx.config.mem_row_size_in_kb = 4;
1822 }
1823
1824 adev->gfx.config.shader_engine_tile_size = 32;
1825 adev->gfx.config.num_gpus = 1;
1826 adev->gfx.config.multi_gpu_tile_size = 64;
1827
1828 /* fix up row size */
1829 switch (adev->gfx.config.mem_row_size_in_kb) {
1830 case 1:
1831 default:
1832 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1833 break;
1834 case 2:
1835 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1836 break;
1837 case 4:
1838 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1839 break;
1840 }
1841 adev->gfx.config.gb_addr_config = gb_addr_config;
1842
1843 return 0;
1844 }
1845
gfx_v8_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)1846 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1847 int mec, int pipe, int queue)
1848 {
1849 int r;
1850 unsigned irq_type;
1851 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1852 unsigned int hw_prio;
1853
1854 ring = &adev->gfx.compute_ring[ring_id];
1855
1856 /* mec0 is me1 */
1857 ring->me = mec + 1;
1858 ring->pipe = pipe;
1859 ring->queue = queue;
1860
1861 ring->ring_obj = NULL;
1862 ring->use_doorbell = true;
1863 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1864 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1865 + (ring_id * GFX8_MEC_HPD_SIZE);
1866 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1867
1868 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1869 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1870 + ring->pipe;
1871
1872 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1873 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
1874 /* type-2 packets are deprecated on MEC, use type-3 instead */
1875 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1876 hw_prio, NULL);
1877 if (r)
1878 return r;
1879
1880
1881 return 0;
1882 }
1883
1884 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1885
gfx_v8_0_sw_init(struct amdgpu_ip_block * ip_block)1886 static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block)
1887 {
1888 int i, j, k, r, ring_id;
1889 int xcc_id = 0;
1890 struct amdgpu_ring *ring;
1891 struct amdgpu_device *adev = ip_block->adev;
1892
1893 switch (adev->asic_type) {
1894 case CHIP_TONGA:
1895 case CHIP_CARRIZO:
1896 case CHIP_FIJI:
1897 case CHIP_POLARIS10:
1898 case CHIP_POLARIS11:
1899 case CHIP_POLARIS12:
1900 case CHIP_VEGAM:
1901 adev->gfx.mec.num_mec = 2;
1902 break;
1903 case CHIP_TOPAZ:
1904 case CHIP_STONEY:
1905 default:
1906 adev->gfx.mec.num_mec = 1;
1907 break;
1908 }
1909
1910 adev->gfx.mec.num_pipe_per_mec = 4;
1911 adev->gfx.mec.num_queue_per_pipe = 8;
1912
1913 /* EOP Event */
1914 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1915 if (r)
1916 return r;
1917
1918 /* Privileged reg */
1919 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1920 &adev->gfx.priv_reg_irq);
1921 if (r)
1922 return r;
1923
1924 /* Privileged inst */
1925 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1926 &adev->gfx.priv_inst_irq);
1927 if (r)
1928 return r;
1929
1930 /* Add CP EDC/ECC irq */
1931 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1932 &adev->gfx.cp_ecc_error_irq);
1933 if (r)
1934 return r;
1935
1936 /* SQ interrupts. */
1937 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1938 &adev->gfx.sq_irq);
1939 if (r) {
1940 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1941 return r;
1942 }
1943
1944 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1945
1946 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1947
1948 r = gfx_v8_0_init_microcode(adev);
1949 if (r) {
1950 DRM_ERROR("Failed to load gfx firmware!\n");
1951 return r;
1952 }
1953
1954 r = adev->gfx.rlc.funcs->init(adev);
1955 if (r) {
1956 DRM_ERROR("Failed to init rlc BOs!\n");
1957 return r;
1958 }
1959
1960 r = gfx_v8_0_mec_init(adev);
1961 if (r) {
1962 DRM_ERROR("Failed to init MEC BOs!\n");
1963 return r;
1964 }
1965
1966 /* set up the gfx ring */
1967 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1968 ring = &adev->gfx.gfx_ring[i];
1969 ring->ring_obj = NULL;
1970 sprintf(ring->name, "gfx");
1971 /* no gfx doorbells on iceland */
1972 if (adev->asic_type != CHIP_TOPAZ) {
1973 ring->use_doorbell = true;
1974 ring->doorbell_index = adev->doorbell_index.gfx_ring0;
1975 }
1976
1977 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
1978 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
1979 AMDGPU_RING_PRIO_DEFAULT, NULL);
1980 if (r)
1981 return r;
1982 }
1983
1984
1985 /* set up the compute queues - allocate horizontally across pipes */
1986 ring_id = 0;
1987 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1988 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1989 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1990 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1991 k, j))
1992 continue;
1993
1994 r = gfx_v8_0_compute_ring_init(adev,
1995 ring_id,
1996 i, k, j);
1997 if (r)
1998 return r;
1999
2000 ring_id++;
2001 }
2002 }
2003 }
2004
2005 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0);
2006 if (r) {
2007 DRM_ERROR("Failed to init KIQ BOs!\n");
2008 return r;
2009 }
2010
2011 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2012 if (r)
2013 return r;
2014
2015 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2016 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0);
2017 if (r)
2018 return r;
2019
2020 adev->gfx.ce_ram_size = 0x8000;
2021
2022 r = gfx_v8_0_gpu_early_init(adev);
2023 if (r)
2024 return r;
2025
2026 adev->gfx.gfx_supported_reset =
2027 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2028 adev->gfx.compute_supported_reset =
2029 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2030
2031 return 0;
2032 }
2033
gfx_v8_0_sw_fini(struct amdgpu_ip_block * ip_block)2034 static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block)
2035 {
2036 struct amdgpu_device *adev = ip_block->adev;
2037 int i;
2038
2039 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2040 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2041 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2042 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2043
2044 amdgpu_gfx_mqd_sw_fini(adev, 0);
2045 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2046 amdgpu_gfx_kiq_fini(adev, 0);
2047
2048 gfx_v8_0_mec_fini(adev);
2049 amdgpu_gfx_rlc_fini(adev);
2050 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2051 &adev->gfx.rlc.clear_state_gpu_addr,
2052 (void **)&adev->gfx.rlc.cs_ptr);
2053 if ((adev->asic_type == CHIP_CARRIZO) ||
2054 (adev->asic_type == CHIP_STONEY)) {
2055 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2056 &adev->gfx.rlc.cp_table_gpu_addr,
2057 (void **)&adev->gfx.rlc.cp_table_ptr);
2058 }
2059 gfx_v8_0_free_microcode(adev);
2060
2061 return 0;
2062 }
2063
gfx_v8_0_tiling_mode_table_init(struct amdgpu_device * adev)2064 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2065 {
2066 uint32_t *modearray, *mod2array;
2067 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2068 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2069 u32 reg_offset;
2070
2071 modearray = adev->gfx.config.tile_mode_array;
2072 mod2array = adev->gfx.config.macrotile_mode_array;
2073
2074 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2075 modearray[reg_offset] = 0;
2076
2077 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2078 mod2array[reg_offset] = 0;
2079
2080 switch (adev->asic_type) {
2081 case CHIP_TOPAZ:
2082 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2083 PIPE_CONFIG(ADDR_SURF_P2) |
2084 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2086 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087 PIPE_CONFIG(ADDR_SURF_P2) |
2088 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2090 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 PIPE_CONFIG(ADDR_SURF_P2) |
2092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2094 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2095 PIPE_CONFIG(ADDR_SURF_P2) |
2096 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2098 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 PIPE_CONFIG(ADDR_SURF_P2) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2102 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2103 PIPE_CONFIG(ADDR_SURF_P2) |
2104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2106 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2107 PIPE_CONFIG(ADDR_SURF_P2) |
2108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2110 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2111 PIPE_CONFIG(ADDR_SURF_P2));
2112 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2113 PIPE_CONFIG(ADDR_SURF_P2) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2116 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2117 PIPE_CONFIG(ADDR_SURF_P2) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2120 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2121 PIPE_CONFIG(ADDR_SURF_P2) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2124 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P2) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P2) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P2) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2136 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 PIPE_CONFIG(ADDR_SURF_P2) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2141 PIPE_CONFIG(ADDR_SURF_P2) |
2142 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2144 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2145 PIPE_CONFIG(ADDR_SURF_P2) |
2146 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2148 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2149 PIPE_CONFIG(ADDR_SURF_P2) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2152 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2156 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2160 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2164 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2168 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 PIPE_CONFIG(ADDR_SURF_P2) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2176 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2177 PIPE_CONFIG(ADDR_SURF_P2) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2180 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181 PIPE_CONFIG(ADDR_SURF_P2) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2184
2185 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2192 NUM_BANKS(ADDR_SURF_8_BANK));
2193 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2196 NUM_BANKS(ADDR_SURF_8_BANK));
2197 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2200 NUM_BANKS(ADDR_SURF_8_BANK));
2201 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2202 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2203 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2204 NUM_BANKS(ADDR_SURF_8_BANK));
2205 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2208 NUM_BANKS(ADDR_SURF_8_BANK));
2209 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2212 NUM_BANKS(ADDR_SURF_8_BANK));
2213 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2216 NUM_BANKS(ADDR_SURF_16_BANK));
2217 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2220 NUM_BANKS(ADDR_SURF_16_BANK));
2221 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2224 NUM_BANKS(ADDR_SURF_16_BANK));
2225 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228 NUM_BANKS(ADDR_SURF_16_BANK));
2229 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2236 NUM_BANKS(ADDR_SURF_16_BANK));
2237 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240 NUM_BANKS(ADDR_SURF_8_BANK));
2241
2242 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2243 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2244 reg_offset != 23)
2245 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2246
2247 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2248 if (reg_offset != 7)
2249 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2250
2251 break;
2252 case CHIP_FIJI:
2253 case CHIP_VEGAM:
2254 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2258 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2261 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2262 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2265 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2266 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2270 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2272 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2274 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2277 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2278 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2282 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2288 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2289 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2292 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2293 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2296 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2297 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2300 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2304 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2305 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2308 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2313 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2316 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2317 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2320 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2321 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2324 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2328 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2332 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2336 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2340 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2344 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2345 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2348 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2352 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376
2377 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2380 NUM_BANKS(ADDR_SURF_8_BANK));
2381 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2384 NUM_BANKS(ADDR_SURF_8_BANK));
2385 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2388 NUM_BANKS(ADDR_SURF_8_BANK));
2389 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2392 NUM_BANKS(ADDR_SURF_8_BANK));
2393 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2396 NUM_BANKS(ADDR_SURF_8_BANK));
2397 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2400 NUM_BANKS(ADDR_SURF_8_BANK));
2401 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2404 NUM_BANKS(ADDR_SURF_8_BANK));
2405 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408 NUM_BANKS(ADDR_SURF_8_BANK));
2409 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412 NUM_BANKS(ADDR_SURF_8_BANK));
2413 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2416 NUM_BANKS(ADDR_SURF_8_BANK));
2417 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2420 NUM_BANKS(ADDR_SURF_8_BANK));
2421 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2424 NUM_BANKS(ADDR_SURF_8_BANK));
2425 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2428 NUM_BANKS(ADDR_SURF_8_BANK));
2429 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432 NUM_BANKS(ADDR_SURF_4_BANK));
2433
2434 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2435 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2436
2437 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2438 if (reg_offset != 7)
2439 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2440
2441 break;
2442 case CHIP_TONGA:
2443 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2447 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2451 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2455 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2456 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2459 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2460 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2461 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2463 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2467 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2471 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2472 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2473 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2477 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2485 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2486 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2489 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2490 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2493 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2494 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2497 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2498 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2501 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2502 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2505 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2506 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2509 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2510 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2513 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2517 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2521 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2525 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2528 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2529 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2532 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2533 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2534 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2536 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2537 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2541 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565
2566 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2569 NUM_BANKS(ADDR_SURF_16_BANK));
2570 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2573 NUM_BANKS(ADDR_SURF_16_BANK));
2574 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2577 NUM_BANKS(ADDR_SURF_16_BANK));
2578 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2581 NUM_BANKS(ADDR_SURF_16_BANK));
2582 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2585 NUM_BANKS(ADDR_SURF_16_BANK));
2586 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2587 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2588 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2589 NUM_BANKS(ADDR_SURF_16_BANK));
2590 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2591 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2592 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2593 NUM_BANKS(ADDR_SURF_16_BANK));
2594 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2596 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597 NUM_BANKS(ADDR_SURF_16_BANK));
2598 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601 NUM_BANKS(ADDR_SURF_16_BANK));
2602 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2604 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2605 NUM_BANKS(ADDR_SURF_16_BANK));
2606 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2608 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2609 NUM_BANKS(ADDR_SURF_16_BANK));
2610 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2612 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2613 NUM_BANKS(ADDR_SURF_8_BANK));
2614 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617 NUM_BANKS(ADDR_SURF_4_BANK));
2618 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2621 NUM_BANKS(ADDR_SURF_4_BANK));
2622
2623 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2624 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2625
2626 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2627 if (reg_offset != 7)
2628 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2629
2630 break;
2631 case CHIP_POLARIS11:
2632 case CHIP_POLARIS12:
2633 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2637 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2638 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2639 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2640 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2641 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2642 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2643 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2644 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2645 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2649 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2653 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2657 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2661 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2662 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2666 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2667 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2668 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2671 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2675 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2676 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2679 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2680 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2683 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2684 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2687 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2691 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2692 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2695 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2699 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2703 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2707 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2711 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2715 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2719 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2723 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2727 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2731 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755
2756 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2759 NUM_BANKS(ADDR_SURF_16_BANK));
2760
2761 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2764 NUM_BANKS(ADDR_SURF_16_BANK));
2765
2766 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2769 NUM_BANKS(ADDR_SURF_16_BANK));
2770
2771 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2774 NUM_BANKS(ADDR_SURF_16_BANK));
2775
2776 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2779 NUM_BANKS(ADDR_SURF_16_BANK));
2780
2781 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2784 NUM_BANKS(ADDR_SURF_16_BANK));
2785
2786 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 NUM_BANKS(ADDR_SURF_16_BANK));
2790
2791 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2792 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2793 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2794 NUM_BANKS(ADDR_SURF_16_BANK));
2795
2796 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2799 NUM_BANKS(ADDR_SURF_16_BANK));
2800
2801 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2804 NUM_BANKS(ADDR_SURF_16_BANK));
2805
2806 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2809 NUM_BANKS(ADDR_SURF_16_BANK));
2810
2811 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2812 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2813 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2814 NUM_BANKS(ADDR_SURF_16_BANK));
2815
2816 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819 NUM_BANKS(ADDR_SURF_8_BANK));
2820
2821 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2824 NUM_BANKS(ADDR_SURF_4_BANK));
2825
2826 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2827 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2828
2829 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2830 if (reg_offset != 7)
2831 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2832
2833 break;
2834 case CHIP_POLARIS10:
2835 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2836 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2837 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2839 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2841 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2843 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2844 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2845 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2847 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2848 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2849 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2851 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2853 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2855 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2857 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2859 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2860 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2861 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2863 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2868 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2869 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2870 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2873 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2877 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2878 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2881 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2882 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2885 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2886 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2889 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2894 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2898 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2901 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2902 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2905 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2909 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2913 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2917 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2920 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2921 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2924 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2925 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2926 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2927 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2928 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2929 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2933 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2936 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2956 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957
2958 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2961 NUM_BANKS(ADDR_SURF_16_BANK));
2962
2963 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 NUM_BANKS(ADDR_SURF_16_BANK));
2967
2968 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 NUM_BANKS(ADDR_SURF_16_BANK));
2972
2973 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2976 NUM_BANKS(ADDR_SURF_16_BANK));
2977
2978 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2981 NUM_BANKS(ADDR_SURF_16_BANK));
2982
2983 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2986 NUM_BANKS(ADDR_SURF_16_BANK));
2987
2988 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2991 NUM_BANKS(ADDR_SURF_16_BANK));
2992
2993 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2996 NUM_BANKS(ADDR_SURF_16_BANK));
2997
2998 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2999 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3000 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3001 NUM_BANKS(ADDR_SURF_16_BANK));
3002
3003 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3006 NUM_BANKS(ADDR_SURF_16_BANK));
3007
3008 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3011 NUM_BANKS(ADDR_SURF_16_BANK));
3012
3013 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3016 NUM_BANKS(ADDR_SURF_8_BANK));
3017
3018 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3021 NUM_BANKS(ADDR_SURF_4_BANK));
3022
3023 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3026 NUM_BANKS(ADDR_SURF_4_BANK));
3027
3028 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3029 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3030
3031 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3032 if (reg_offset != 7)
3033 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3034
3035 break;
3036 case CHIP_STONEY:
3037 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3038 PIPE_CONFIG(ADDR_SURF_P2) |
3039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3041 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3042 PIPE_CONFIG(ADDR_SURF_P2) |
3043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3045 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3046 PIPE_CONFIG(ADDR_SURF_P2) |
3047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3049 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3050 PIPE_CONFIG(ADDR_SURF_P2) |
3051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3053 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3054 PIPE_CONFIG(ADDR_SURF_P2) |
3055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3057 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3058 PIPE_CONFIG(ADDR_SURF_P2) |
3059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3061 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3062 PIPE_CONFIG(ADDR_SURF_P2) |
3063 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3065 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3066 PIPE_CONFIG(ADDR_SURF_P2));
3067 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3068 PIPE_CONFIG(ADDR_SURF_P2) |
3069 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3071 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3072 PIPE_CONFIG(ADDR_SURF_P2) |
3073 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3075 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3076 PIPE_CONFIG(ADDR_SURF_P2) |
3077 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3079 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3080 PIPE_CONFIG(ADDR_SURF_P2) |
3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3083 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3084 PIPE_CONFIG(ADDR_SURF_P2) |
3085 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3087 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3088 PIPE_CONFIG(ADDR_SURF_P2) |
3089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3092 PIPE_CONFIG(ADDR_SURF_P2) |
3093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3095 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3096 PIPE_CONFIG(ADDR_SURF_P2) |
3097 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3099 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3100 PIPE_CONFIG(ADDR_SURF_P2) |
3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3103 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3104 PIPE_CONFIG(ADDR_SURF_P2) |
3105 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3107 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3108 PIPE_CONFIG(ADDR_SURF_P2) |
3109 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3111 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3112 PIPE_CONFIG(ADDR_SURF_P2) |
3113 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3115 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3116 PIPE_CONFIG(ADDR_SURF_P2) |
3117 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3119 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3120 PIPE_CONFIG(ADDR_SURF_P2) |
3121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3123 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3124 PIPE_CONFIG(ADDR_SURF_P2) |
3125 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3128 PIPE_CONFIG(ADDR_SURF_P2) |
3129 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3131 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3135 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3136 PIPE_CONFIG(ADDR_SURF_P2) |
3137 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3139
3140 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143 NUM_BANKS(ADDR_SURF_8_BANK));
3144 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147 NUM_BANKS(ADDR_SURF_8_BANK));
3148 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151 NUM_BANKS(ADDR_SURF_8_BANK));
3152 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3155 NUM_BANKS(ADDR_SURF_8_BANK));
3156 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3159 NUM_BANKS(ADDR_SURF_8_BANK));
3160 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3163 NUM_BANKS(ADDR_SURF_8_BANK));
3164 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3167 NUM_BANKS(ADDR_SURF_8_BANK));
3168 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3171 NUM_BANKS(ADDR_SURF_16_BANK));
3172 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3175 NUM_BANKS(ADDR_SURF_16_BANK));
3176 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3179 NUM_BANKS(ADDR_SURF_16_BANK));
3180 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3183 NUM_BANKS(ADDR_SURF_16_BANK));
3184 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3187 NUM_BANKS(ADDR_SURF_16_BANK));
3188 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3191 NUM_BANKS(ADDR_SURF_16_BANK));
3192 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195 NUM_BANKS(ADDR_SURF_8_BANK));
3196
3197 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3198 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3199 reg_offset != 23)
3200 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3201
3202 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3203 if (reg_offset != 7)
3204 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3205
3206 break;
3207 default:
3208 dev_warn(adev->dev,
3209 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3210 adev->asic_type);
3211 fallthrough;
3212
3213 case CHIP_CARRIZO:
3214 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3215 PIPE_CONFIG(ADDR_SURF_P2) |
3216 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3217 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3218 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219 PIPE_CONFIG(ADDR_SURF_P2) |
3220 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3221 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3222 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3223 PIPE_CONFIG(ADDR_SURF_P2) |
3224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3226 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3227 PIPE_CONFIG(ADDR_SURF_P2) |
3228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3230 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3231 PIPE_CONFIG(ADDR_SURF_P2) |
3232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3234 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3235 PIPE_CONFIG(ADDR_SURF_P2) |
3236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3238 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3239 PIPE_CONFIG(ADDR_SURF_P2) |
3240 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3241 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3242 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3243 PIPE_CONFIG(ADDR_SURF_P2));
3244 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3248 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3252 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3256 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3260 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3268 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3272 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3280 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3284 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3288 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3292 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3296 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3300 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3304 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3308 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3309 PIPE_CONFIG(ADDR_SURF_P2) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3313 PIPE_CONFIG(ADDR_SURF_P2) |
3314 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3316
3317 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 NUM_BANKS(ADDR_SURF_8_BANK));
3321 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 NUM_BANKS(ADDR_SURF_8_BANK));
3325 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3328 NUM_BANKS(ADDR_SURF_8_BANK));
3329 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3332 NUM_BANKS(ADDR_SURF_8_BANK));
3333 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3336 NUM_BANKS(ADDR_SURF_8_BANK));
3337 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3340 NUM_BANKS(ADDR_SURF_8_BANK));
3341 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3344 NUM_BANKS(ADDR_SURF_8_BANK));
3345 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348 NUM_BANKS(ADDR_SURF_16_BANK));
3349 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352 NUM_BANKS(ADDR_SURF_16_BANK));
3353 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3356 NUM_BANKS(ADDR_SURF_16_BANK));
3357 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3360 NUM_BANKS(ADDR_SURF_16_BANK));
3361 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3364 NUM_BANKS(ADDR_SURF_16_BANK));
3365 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3368 NUM_BANKS(ADDR_SURF_16_BANK));
3369 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372 NUM_BANKS(ADDR_SURF_8_BANK));
3373
3374 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3375 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3376 reg_offset != 23)
3377 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3378
3379 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3380 if (reg_offset != 7)
3381 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3382
3383 break;
3384 }
3385 }
3386
gfx_v8_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)3387 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3388 u32 se_num, u32 sh_num, u32 instance,
3389 int xcc_id)
3390 {
3391 u32 data;
3392
3393 if (instance == 0xffffffff)
3394 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3395 else
3396 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3397
3398 if (se_num == 0xffffffff)
3399 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3400 else
3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3402
3403 if (sh_num == 0xffffffff)
3404 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3405 else
3406 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3407
3408 WREG32(mmGRBM_GFX_INDEX, data);
3409 }
3410
gfx_v8_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)3411 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3412 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
3413 {
3414 vi_srbm_select(adev, me, pipe, q, vm);
3415 }
3416
gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device * adev)3417 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3418 {
3419 u32 data, mask;
3420
3421 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3422 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3423
3424 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3425
3426 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3427 adev->gfx.config.max_sh_per_se);
3428
3429 return (~data) & mask;
3430 }
3431
3432 static void
gfx_v8_0_raster_config(struct amdgpu_device * adev,u32 * rconf,u32 * rconf1)3433 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3434 {
3435 switch (adev->asic_type) {
3436 case CHIP_FIJI:
3437 case CHIP_VEGAM:
3438 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3439 RB_XSEL2(1) | PKR_MAP(2) |
3440 PKR_XSEL(1) | PKR_YSEL(1) |
3441 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3442 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3443 SE_PAIR_YSEL(2);
3444 break;
3445 case CHIP_TONGA:
3446 case CHIP_POLARIS10:
3447 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3448 SE_XSEL(1) | SE_YSEL(1);
3449 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3450 SE_PAIR_YSEL(2);
3451 break;
3452 case CHIP_TOPAZ:
3453 case CHIP_CARRIZO:
3454 *rconf |= RB_MAP_PKR0(2);
3455 *rconf1 |= 0x0;
3456 break;
3457 case CHIP_POLARIS11:
3458 case CHIP_POLARIS12:
3459 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3460 SE_XSEL(1) | SE_YSEL(1);
3461 *rconf1 |= 0x0;
3462 break;
3463 case CHIP_STONEY:
3464 *rconf |= 0x0;
3465 *rconf1 |= 0x0;
3466 break;
3467 default:
3468 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3469 break;
3470 }
3471 }
3472
3473 static void
gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device * adev,u32 raster_config,u32 raster_config_1,unsigned rb_mask,unsigned num_rb)3474 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3475 u32 raster_config, u32 raster_config_1,
3476 unsigned rb_mask, unsigned num_rb)
3477 {
3478 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3479 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3480 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3481 unsigned rb_per_se = num_rb / num_se;
3482 unsigned se_mask[4];
3483 unsigned se;
3484
3485 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3486 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3487 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3488 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3489
3490 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3491 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3492 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3493
3494 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3495 (!se_mask[2] && !se_mask[3]))) {
3496 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3497
3498 if (!se_mask[0] && !se_mask[1]) {
3499 raster_config_1 |=
3500 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3501 } else {
3502 raster_config_1 |=
3503 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3504 }
3505 }
3506
3507 for (se = 0; se < num_se; se++) {
3508 unsigned raster_config_se = raster_config;
3509 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3510 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3511 int idx = (se / 2) * 2;
3512
3513 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3514 raster_config_se &= ~SE_MAP_MASK;
3515
3516 if (!se_mask[idx]) {
3517 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3518 } else {
3519 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3520 }
3521 }
3522
3523 pkr0_mask &= rb_mask;
3524 pkr1_mask &= rb_mask;
3525 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3526 raster_config_se &= ~PKR_MAP_MASK;
3527
3528 if (!pkr0_mask) {
3529 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3530 } else {
3531 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3532 }
3533 }
3534
3535 if (rb_per_se >= 2) {
3536 unsigned rb0_mask = 1 << (se * rb_per_se);
3537 unsigned rb1_mask = rb0_mask << 1;
3538
3539 rb0_mask &= rb_mask;
3540 rb1_mask &= rb_mask;
3541 if (!rb0_mask || !rb1_mask) {
3542 raster_config_se &= ~RB_MAP_PKR0_MASK;
3543
3544 if (!rb0_mask) {
3545 raster_config_se |=
3546 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3547 } else {
3548 raster_config_se |=
3549 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3550 }
3551 }
3552
3553 if (rb_per_se > 2) {
3554 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3555 rb1_mask = rb0_mask << 1;
3556 rb0_mask &= rb_mask;
3557 rb1_mask &= rb_mask;
3558 if (!rb0_mask || !rb1_mask) {
3559 raster_config_se &= ~RB_MAP_PKR1_MASK;
3560
3561 if (!rb0_mask) {
3562 raster_config_se |=
3563 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3564 } else {
3565 raster_config_se |=
3566 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3567 }
3568 }
3569 }
3570 }
3571
3572 /* GRBM_GFX_INDEX has a different offset on VI */
3573 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
3574 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3575 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3576 }
3577
3578 /* GRBM_GFX_INDEX has a different offset on VI */
3579 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3580 }
3581
gfx_v8_0_setup_rb(struct amdgpu_device * adev)3582 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3583 {
3584 int i, j;
3585 u32 data;
3586 u32 raster_config = 0, raster_config_1 = 0;
3587 u32 active_rbs = 0;
3588 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3589 adev->gfx.config.max_sh_per_se;
3590 unsigned num_rb_pipes;
3591
3592 mutex_lock(&adev->grbm_idx_mutex);
3593 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3594 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3595 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3596 data = gfx_v8_0_get_rb_active_bitmap(adev);
3597 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3598 rb_bitmap_width_per_sh);
3599 }
3600 }
3601 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3602
3603 adev->gfx.config.backend_enable_mask = active_rbs;
3604 adev->gfx.config.num_rbs = hweight32(active_rbs);
3605
3606 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3607 adev->gfx.config.max_shader_engines, 16);
3608
3609 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3610
3611 if (!adev->gfx.config.backend_enable_mask ||
3612 adev->gfx.config.num_rbs >= num_rb_pipes) {
3613 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3614 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3615 } else {
3616 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3617 adev->gfx.config.backend_enable_mask,
3618 num_rb_pipes);
3619 }
3620
3621 /* cache the values for userspace */
3622 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3623 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3624 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3625 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3626 RREG32(mmCC_RB_BACKEND_DISABLE);
3627 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3628 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3629 adev->gfx.config.rb_config[i][j].raster_config =
3630 RREG32(mmPA_SC_RASTER_CONFIG);
3631 adev->gfx.config.rb_config[i][j].raster_config_1 =
3632 RREG32(mmPA_SC_RASTER_CONFIG_1);
3633 }
3634 }
3635 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3636 mutex_unlock(&adev->grbm_idx_mutex);
3637 }
3638
3639 #define DEFAULT_SH_MEM_BASES (0x6000)
3640 /**
3641 * gfx_v8_0_init_compute_vmid - gart enable
3642 *
3643 * @adev: amdgpu_device pointer
3644 *
3645 * Initialize compute vmid sh_mem registers
3646 *
3647 */
gfx_v8_0_init_compute_vmid(struct amdgpu_device * adev)3648 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3649 {
3650 int i;
3651 uint32_t sh_mem_config;
3652 uint32_t sh_mem_bases;
3653
3654 /*
3655 * Configure apertures:
3656 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3657 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3658 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3659 */
3660 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3661
3662 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3663 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3664 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3665 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3666 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3667 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3668
3669 mutex_lock(&adev->srbm_mutex);
3670 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3671 vi_srbm_select(adev, 0, 0, 0, i);
3672 /* CP and shaders */
3673 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3674 WREG32(mmSH_MEM_APE1_BASE, 1);
3675 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3676 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3677 }
3678 vi_srbm_select(adev, 0, 0, 0, 0);
3679 mutex_unlock(&adev->srbm_mutex);
3680
3681 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3682 access. These should be enabled by FW for target VMIDs. */
3683 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
3684 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3685 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3686 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3687 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3688 }
3689 }
3690
gfx_v8_0_init_gds_vmid(struct amdgpu_device * adev)3691 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3692 {
3693 int vmid;
3694
3695 /*
3696 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3697 * access. Compute VMIDs should be enabled by FW for target VMIDs,
3698 * the driver can enable them for graphics. VMID0 should maintain
3699 * access so that HWS firmware can save/restore entries.
3700 */
3701 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
3702 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3703 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3704 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3705 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3706 }
3707 }
3708
gfx_v8_0_config_init(struct amdgpu_device * adev)3709 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3710 {
3711 switch (adev->asic_type) {
3712 default:
3713 adev->gfx.config.double_offchip_lds_buf = 1;
3714 break;
3715 case CHIP_CARRIZO:
3716 case CHIP_STONEY:
3717 adev->gfx.config.double_offchip_lds_buf = 0;
3718 break;
3719 }
3720 }
3721
gfx_v8_0_constants_init(struct amdgpu_device * adev)3722 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3723 {
3724 u32 tmp, sh_static_mem_cfg;
3725 int i;
3726
3727 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3728 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3729 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3730 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3731
3732 gfx_v8_0_tiling_mode_table_init(adev);
3733 gfx_v8_0_setup_rb(adev);
3734 gfx_v8_0_get_cu_info(adev);
3735 gfx_v8_0_config_init(adev);
3736
3737 /* XXX SH_MEM regs */
3738 /* where to put LDS, scratch, GPUVM in FSA64 space */
3739 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3740 SWIZZLE_ENABLE, 1);
3741 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3742 ELEMENT_SIZE, 1);
3743 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3744 INDEX_STRIDE, 3);
3745 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3746
3747 mutex_lock(&adev->srbm_mutex);
3748 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3749 vi_srbm_select(adev, 0, 0, 0, i);
3750 /* CP and shaders */
3751 if (i == 0) {
3752 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3753 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3754 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3755 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3756 WREG32(mmSH_MEM_CONFIG, tmp);
3757 WREG32(mmSH_MEM_BASES, 0);
3758 } else {
3759 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3760 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3761 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3762 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3763 WREG32(mmSH_MEM_CONFIG, tmp);
3764 tmp = adev->gmc.shared_aperture_start >> 48;
3765 WREG32(mmSH_MEM_BASES, tmp);
3766 }
3767
3768 WREG32(mmSH_MEM_APE1_BASE, 1);
3769 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3770 }
3771 vi_srbm_select(adev, 0, 0, 0, 0);
3772 mutex_unlock(&adev->srbm_mutex);
3773
3774 gfx_v8_0_init_compute_vmid(adev);
3775 gfx_v8_0_init_gds_vmid(adev);
3776
3777 mutex_lock(&adev->grbm_idx_mutex);
3778 /*
3779 * making sure that the following register writes will be broadcasted
3780 * to all the shaders
3781 */
3782 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3783
3784 WREG32(mmPA_SC_FIFO_SIZE,
3785 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3786 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3787 (adev->gfx.config.sc_prim_fifo_size_backend <<
3788 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3789 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3790 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3791 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3792 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3793
3794 tmp = RREG32(mmSPI_ARB_PRIORITY);
3795 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3796 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3797 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3798 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3799 WREG32(mmSPI_ARB_PRIORITY, tmp);
3800
3801 mutex_unlock(&adev->grbm_idx_mutex);
3802
3803 }
3804
gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device * adev)3805 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3806 {
3807 u32 i, j, k;
3808 u32 mask;
3809
3810 mutex_lock(&adev->grbm_idx_mutex);
3811 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3812 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3813 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
3814 for (k = 0; k < adev->usec_timeout; k++) {
3815 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3816 break;
3817 udelay(1);
3818 }
3819 if (k == adev->usec_timeout) {
3820 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3821 0xffffffff, 0xffffffff, 0);
3822 mutex_unlock(&adev->grbm_idx_mutex);
3823 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3824 i, j);
3825 return;
3826 }
3827 }
3828 }
3829 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
3830 mutex_unlock(&adev->grbm_idx_mutex);
3831
3832 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3833 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3834 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3835 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3836 for (k = 0; k < adev->usec_timeout; k++) {
3837 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3838 break;
3839 udelay(1);
3840 }
3841 }
3842
gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)3843 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3844 bool enable)
3845 {
3846 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3847
3848 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3849 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3850 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3851 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3852
3853 WREG32(mmCP_INT_CNTL_RING0, tmp);
3854 }
3855
gfx_v8_0_init_csb(struct amdgpu_device * adev)3856 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3857 {
3858 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3859 /* csib */
3860 WREG32(mmRLC_CSIB_ADDR_HI,
3861 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3862 WREG32(mmRLC_CSIB_ADDR_LO,
3863 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3864 WREG32(mmRLC_CSIB_LENGTH,
3865 adev->gfx.rlc.clear_state_size);
3866 }
3867
gfx_v8_0_parse_ind_reg_list(int * register_list_format,int ind_offset,int list_size,int * unique_indices,int * indices_count,int max_indices,int * ind_start_offsets,int * offset_count,int max_offset)3868 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3869 int ind_offset,
3870 int list_size,
3871 int *unique_indices,
3872 int *indices_count,
3873 int max_indices,
3874 int *ind_start_offsets,
3875 int *offset_count,
3876 int max_offset)
3877 {
3878 int indices;
3879 bool new_entry = true;
3880
3881 for (; ind_offset < list_size; ind_offset++) {
3882
3883 if (new_entry) {
3884 new_entry = false;
3885 ind_start_offsets[*offset_count] = ind_offset;
3886 *offset_count = *offset_count + 1;
3887 BUG_ON(*offset_count >= max_offset);
3888 }
3889
3890 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3891 new_entry = true;
3892 continue;
3893 }
3894
3895 ind_offset += 2;
3896
3897 /* look for the matching indice */
3898 for (indices = 0;
3899 indices < *indices_count;
3900 indices++) {
3901 if (unique_indices[indices] ==
3902 register_list_format[ind_offset])
3903 break;
3904 }
3905
3906 if (indices >= *indices_count) {
3907 unique_indices[*indices_count] =
3908 register_list_format[ind_offset];
3909 indices = *indices_count;
3910 *indices_count = *indices_count + 1;
3911 BUG_ON(*indices_count >= max_indices);
3912 }
3913
3914 register_list_format[ind_offset] = indices;
3915 }
3916 }
3917
gfx_v8_0_init_save_restore_list(struct amdgpu_device * adev)3918 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3919 {
3920 int i, temp, data;
3921 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3922 int indices_count = 0;
3923 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3924 int offset_count = 0;
3925
3926 int list_size;
3927 unsigned int *register_list_format =
3928 kmemdup(adev->gfx.rlc.register_list_format,
3929 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3930 if (!register_list_format)
3931 return -ENOMEM;
3932
3933 gfx_v8_0_parse_ind_reg_list(register_list_format,
3934 RLC_FormatDirectRegListLength,
3935 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3936 unique_indices,
3937 &indices_count,
3938 ARRAY_SIZE(unique_indices),
3939 indirect_start_offsets,
3940 &offset_count,
3941 ARRAY_SIZE(indirect_start_offsets));
3942
3943 /* save and restore list */
3944 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3945
3946 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3947 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3948 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3949
3950 /* indirect list */
3951 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3952 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3953 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3954
3955 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3956 list_size = list_size >> 1;
3957 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3958 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3959
3960 /* starting offsets starts */
3961 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3962 adev->gfx.rlc.starting_offsets_start);
3963 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3964 WREG32(mmRLC_GPM_SCRATCH_DATA,
3965 indirect_start_offsets[i]);
3966
3967 /* unique indices */
3968 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3969 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3970 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3971 if (unique_indices[i] != 0) {
3972 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
3973 WREG32(data + i, unique_indices[i] >> 20);
3974 }
3975 }
3976 kfree(register_list_format);
3977
3978 return 0;
3979 }
3980
gfx_v8_0_enable_save_restore_machine(struct amdgpu_device * adev)3981 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3982 {
3983 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3984 }
3985
gfx_v8_0_init_power_gating(struct amdgpu_device * adev)3986 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3987 {
3988 uint32_t data;
3989
3990 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3991
3992 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3993 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3994 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3995 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3996 WREG32(mmRLC_PG_DELAY, data);
3997
3998 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3999 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4000
4001 }
4002
cz_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)4003 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4004 bool enable)
4005 {
4006 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4007 }
4008
cz_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)4009 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4010 bool enable)
4011 {
4012 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4013 }
4014
cz_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)4015 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4016 {
4017 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4018 }
4019
gfx_v8_0_init_pg(struct amdgpu_device * adev)4020 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4021 {
4022 if ((adev->asic_type == CHIP_CARRIZO) ||
4023 (adev->asic_type == CHIP_STONEY)) {
4024 gfx_v8_0_init_csb(adev);
4025 gfx_v8_0_init_save_restore_list(adev);
4026 gfx_v8_0_enable_save_restore_machine(adev);
4027 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4028 gfx_v8_0_init_power_gating(adev);
4029 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4030 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4031 (adev->asic_type == CHIP_POLARIS12) ||
4032 (adev->asic_type == CHIP_VEGAM)) {
4033 gfx_v8_0_init_csb(adev);
4034 gfx_v8_0_init_save_restore_list(adev);
4035 gfx_v8_0_enable_save_restore_machine(adev);
4036 gfx_v8_0_init_power_gating(adev);
4037 }
4038
4039 }
4040
gfx_v8_0_rlc_stop(struct amdgpu_device * adev)4041 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4042 {
4043 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4044
4045 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4046 gfx_v8_0_wait_for_rlc_serdes(adev);
4047 }
4048
gfx_v8_0_rlc_reset(struct amdgpu_device * adev)4049 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4050 {
4051 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4052 udelay(50);
4053
4054 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4055 udelay(50);
4056 }
4057
gfx_v8_0_rlc_start(struct amdgpu_device * adev)4058 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4059 {
4060 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4061
4062 /* carrizo do enable cp interrupt after cp inited */
4063 if (!(adev->flags & AMD_IS_APU))
4064 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4065
4066 udelay(50);
4067 }
4068
gfx_v8_0_rlc_resume(struct amdgpu_device * adev)4069 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4070 {
4071 if (amdgpu_sriov_vf(adev)) {
4072 gfx_v8_0_init_csb(adev);
4073 return 0;
4074 }
4075
4076 adev->gfx.rlc.funcs->stop(adev);
4077 adev->gfx.rlc.funcs->reset(adev);
4078 gfx_v8_0_init_pg(adev);
4079 adev->gfx.rlc.funcs->start(adev);
4080
4081 return 0;
4082 }
4083
gfx_v8_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)4084 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4085 {
4086 u32 tmp = RREG32(mmCP_ME_CNTL);
4087
4088 if (enable) {
4089 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4090 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4091 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4092 } else {
4093 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4094 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4095 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4096 }
4097 WREG32(mmCP_ME_CNTL, tmp);
4098 udelay(50);
4099 }
4100
gfx_v8_0_get_csb_size(struct amdgpu_device * adev)4101 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4102 {
4103 u32 count = 0;
4104 const struct cs_section_def *sect = NULL;
4105 const struct cs_extent_def *ext = NULL;
4106
4107 /* begin clear state */
4108 count += 2;
4109 /* context control state */
4110 count += 3;
4111
4112 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4113 for (ext = sect->section; ext->extent != NULL; ++ext) {
4114 if (sect->id == SECT_CONTEXT)
4115 count += 2 + ext->reg_count;
4116 else
4117 return 0;
4118 }
4119 }
4120 /* pa_sc_raster_config/pa_sc_raster_config1 */
4121 count += 4;
4122 /* end clear state */
4123 count += 2;
4124 /* clear state */
4125 count += 2;
4126
4127 return count;
4128 }
4129
gfx_v8_0_cp_gfx_start(struct amdgpu_device * adev)4130 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4131 {
4132 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4133 const struct cs_section_def *sect = NULL;
4134 const struct cs_extent_def *ext = NULL;
4135 int r, i;
4136
4137 /* init the CP */
4138 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4139 WREG32(mmCP_ENDIAN_SWAP, 0);
4140 WREG32(mmCP_DEVICE_ID, 1);
4141
4142 gfx_v8_0_cp_gfx_enable(adev, true);
4143
4144 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4145 if (r) {
4146 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4147 return r;
4148 }
4149
4150 /* clear state buffer */
4151 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4152 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4153
4154 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4155 amdgpu_ring_write(ring, 0x80000000);
4156 amdgpu_ring_write(ring, 0x80000000);
4157
4158 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4159 for (ext = sect->section; ext->extent != NULL; ++ext) {
4160 if (sect->id == SECT_CONTEXT) {
4161 amdgpu_ring_write(ring,
4162 PACKET3(PACKET3_SET_CONTEXT_REG,
4163 ext->reg_count));
4164 amdgpu_ring_write(ring,
4165 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4166 for (i = 0; i < ext->reg_count; i++)
4167 amdgpu_ring_write(ring, ext->extent[i]);
4168 }
4169 }
4170 }
4171
4172 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4173 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4174 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4175 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4176
4177 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4178 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4179
4180 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4181 amdgpu_ring_write(ring, 0);
4182
4183 /* init the CE partitions */
4184 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4185 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4186 amdgpu_ring_write(ring, 0x8000);
4187 amdgpu_ring_write(ring, 0x8000);
4188
4189 amdgpu_ring_commit(ring);
4190
4191 return 0;
4192 }
gfx_v8_0_set_cpg_door_bell(struct amdgpu_device * adev,struct amdgpu_ring * ring)4193 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4194 {
4195 u32 tmp;
4196 /* no gfx doorbells on iceland */
4197 if (adev->asic_type == CHIP_TOPAZ)
4198 return;
4199
4200 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4201
4202 if (ring->use_doorbell) {
4203 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4204 DOORBELL_OFFSET, ring->doorbell_index);
4205 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4206 DOORBELL_HIT, 0);
4207 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4208 DOORBELL_EN, 1);
4209 } else {
4210 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4211 }
4212
4213 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4214
4215 if (adev->flags & AMD_IS_APU)
4216 return;
4217
4218 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4219 DOORBELL_RANGE_LOWER,
4220 adev->doorbell_index.gfx_ring0);
4221 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4222
4223 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4224 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4225 }
4226
gfx_v8_0_cp_gfx_resume(struct amdgpu_device * adev)4227 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4228 {
4229 struct amdgpu_ring *ring;
4230 u32 tmp;
4231 u32 rb_bufsz;
4232 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4233
4234 /* Set the write pointer delay */
4235 WREG32(mmCP_RB_WPTR_DELAY, 0);
4236
4237 /* set the RB to use vmid 0 */
4238 WREG32(mmCP_RB_VMID, 0);
4239
4240 /* Set ring buffer size */
4241 ring = &adev->gfx.gfx_ring[0];
4242 rb_bufsz = order_base_2(ring->ring_size / 8);
4243 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4244 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4245 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4246 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4247 #ifdef __BIG_ENDIAN
4248 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4249 #endif
4250 WREG32(mmCP_RB0_CNTL, tmp);
4251
4252 /* Initialize the ring buffer's read and write pointers */
4253 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4254 ring->wptr = 0;
4255 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4256
4257 /* set the wb address whether it's enabled or not */
4258 rptr_addr = ring->rptr_gpu_addr;
4259 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4260 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4261
4262 wptr_gpu_addr = ring->wptr_gpu_addr;
4263 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4264 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4265 mdelay(1);
4266 WREG32(mmCP_RB0_CNTL, tmp);
4267
4268 rb_addr = ring->gpu_addr >> 8;
4269 WREG32(mmCP_RB0_BASE, rb_addr);
4270 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4271
4272 gfx_v8_0_set_cpg_door_bell(adev, ring);
4273 /* start the ring */
4274 amdgpu_ring_clear_ring(ring);
4275 gfx_v8_0_cp_gfx_start(adev);
4276
4277 return 0;
4278 }
4279
gfx_v8_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)4280 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4281 {
4282 if (enable) {
4283 WREG32(mmCP_MEC_CNTL, 0);
4284 } else {
4285 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4286 adev->gfx.kiq[0].ring.sched.ready = false;
4287 }
4288 udelay(50);
4289 }
4290
4291 /* KIQ functions */
gfx_v8_0_kiq_setting(struct amdgpu_ring * ring)4292 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4293 {
4294 uint32_t tmp;
4295 struct amdgpu_device *adev = ring->adev;
4296
4297 /* tell RLC which is KIQ queue */
4298 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4299 tmp &= 0xffffff00;
4300 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4301 WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80);
4302 }
4303
gfx_v8_0_kiq_kcq_enable(struct amdgpu_device * adev)4304 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4305 {
4306 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4307 uint64_t queue_mask = 0;
4308 int r, i;
4309
4310 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4311 if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap))
4312 continue;
4313
4314 /* This situation may be hit in the future if a new HW
4315 * generation exposes more than 64 queues. If so, the
4316 * definition of queue_mask needs updating */
4317 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4318 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4319 break;
4320 }
4321
4322 queue_mask |= (1ull << i);
4323 }
4324
4325 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4326 if (r) {
4327 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4328 return r;
4329 }
4330 /* set resources */
4331 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4332 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4333 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4334 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4335 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4336 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4337 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4338 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4339 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4340 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4341 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4342 uint64_t wptr_addr = ring->wptr_gpu_addr;
4343
4344 /* map queues */
4345 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4346 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4347 amdgpu_ring_write(kiq_ring,
4348 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4349 amdgpu_ring_write(kiq_ring,
4350 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4351 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4352 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4353 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4354 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4355 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4356 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4357 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4358 }
4359
4360 amdgpu_ring_commit(kiq_ring);
4361
4362 return 0;
4363 }
4364
gfx_v8_0_deactivate_hqd(struct amdgpu_device * adev,u32 req)4365 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4366 {
4367 int i, r = 0;
4368
4369 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4370 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4371 for (i = 0; i < adev->usec_timeout; i++) {
4372 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4373 break;
4374 udelay(1);
4375 }
4376 if (i == adev->usec_timeout)
4377 r = -ETIMEDOUT;
4378 }
4379 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4380 WREG32(mmCP_HQD_PQ_RPTR, 0);
4381 WREG32(mmCP_HQD_PQ_WPTR, 0);
4382
4383 return r;
4384 }
4385
gfx_v8_0_mqd_set_priority(struct amdgpu_ring * ring,struct vi_mqd * mqd)4386 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
4387 {
4388 struct amdgpu_device *adev = ring->adev;
4389
4390 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4391 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
4392 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
4393 mqd->cp_hqd_queue_priority =
4394 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
4395 }
4396 }
4397 }
4398
gfx_v8_0_mqd_init(struct amdgpu_ring * ring)4399 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4400 {
4401 struct amdgpu_device *adev = ring->adev;
4402 struct vi_mqd *mqd = ring->mqd_ptr;
4403 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4404 uint32_t tmp;
4405
4406 mqd->header = 0xC0310800;
4407 mqd->compute_pipelinestat_enable = 0x00000001;
4408 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4409 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4410 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4411 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4412 mqd->compute_misc_reserved = 0x00000003;
4413 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4414 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4415 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4416 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4417 eop_base_addr = ring->eop_gpu_addr >> 8;
4418 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4419 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4420
4421 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4422 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4423 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4424 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4425
4426 mqd->cp_hqd_eop_control = tmp;
4427
4428 /* enable doorbell? */
4429 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4430 CP_HQD_PQ_DOORBELL_CONTROL,
4431 DOORBELL_EN,
4432 ring->use_doorbell ? 1 : 0);
4433
4434 mqd->cp_hqd_pq_doorbell_control = tmp;
4435
4436 /* set the pointer to the MQD */
4437 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4438 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4439
4440 /* set MQD vmid to 0 */
4441 tmp = RREG32(mmCP_MQD_CONTROL);
4442 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4443 mqd->cp_mqd_control = tmp;
4444
4445 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4446 hqd_gpu_addr = ring->gpu_addr >> 8;
4447 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4448 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4449
4450 /* set up the HQD, this is similar to CP_RB0_CNTL */
4451 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4453 (order_base_2(ring->ring_size / 4) - 1));
4454 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4455 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4456 #ifdef __BIG_ENDIAN
4457 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4458 #endif
4459 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4460 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4461 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4462 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4463 mqd->cp_hqd_pq_control = tmp;
4464
4465 /* set the wb address whether it's enabled or not */
4466 wb_gpu_addr = ring->rptr_gpu_addr;
4467 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4468 mqd->cp_hqd_pq_rptr_report_addr_hi =
4469 upper_32_bits(wb_gpu_addr) & 0xffff;
4470
4471 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4472 wb_gpu_addr = ring->wptr_gpu_addr;
4473 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4474 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4475
4476 tmp = 0;
4477 /* enable the doorbell if requested */
4478 if (ring->use_doorbell) {
4479 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4480 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4481 DOORBELL_OFFSET, ring->doorbell_index);
4482
4483 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4484 DOORBELL_EN, 1);
4485 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4486 DOORBELL_SOURCE, 0);
4487 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4488 DOORBELL_HIT, 0);
4489 }
4490
4491 mqd->cp_hqd_pq_doorbell_control = tmp;
4492
4493 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4494 ring->wptr = 0;
4495 mqd->cp_hqd_pq_wptr = ring->wptr;
4496 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4497
4498 /* set the vmid for the queue */
4499 mqd->cp_hqd_vmid = 0;
4500
4501 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4502 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4503 mqd->cp_hqd_persistent_state = tmp;
4504
4505 /* set MTYPE */
4506 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4507 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4508 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4509 mqd->cp_hqd_ib_control = tmp;
4510
4511 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4512 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4513 mqd->cp_hqd_iq_timer = tmp;
4514
4515 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4516 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4517 mqd->cp_hqd_ctx_save_control = tmp;
4518
4519 /* defaults */
4520 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4521 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4522 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4523 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4524 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4525 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4526 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4527 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4528 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4529 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4530 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4531 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4532
4533 /* set static priority for a queue/ring */
4534 gfx_v8_0_mqd_set_priority(ring, mqd);
4535 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4536
4537 /* map_queues packet doesn't need activate the queue,
4538 * so only kiq need set this field.
4539 */
4540 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4541 mqd->cp_hqd_active = 1;
4542
4543 return 0;
4544 }
4545
gfx_v8_0_mqd_commit(struct amdgpu_device * adev,struct vi_mqd * mqd)4546 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4547 struct vi_mqd *mqd)
4548 {
4549 uint32_t mqd_reg;
4550 uint32_t *mqd_data;
4551
4552 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4553 mqd_data = &mqd->cp_mqd_base_addr_lo;
4554
4555 /* disable wptr polling */
4556 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4557
4558 /* program all HQD registers */
4559 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4560 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4561
4562 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4563 * This is safe since EOP RPTR==WPTR for any inactive HQD
4564 * on ASICs that do not support context-save.
4565 * EOP writes/reads can start anywhere in the ring.
4566 */
4567 if (adev->asic_type != CHIP_TONGA) {
4568 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4569 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4570 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4571 }
4572
4573 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4574 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4575
4576 /* activate the HQD */
4577 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4578 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4579
4580 return 0;
4581 }
4582
gfx_v8_0_kiq_init_queue(struct amdgpu_ring * ring)4583 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4584 {
4585 struct amdgpu_device *adev = ring->adev;
4586 struct vi_mqd *mqd = ring->mqd_ptr;
4587
4588 gfx_v8_0_kiq_setting(ring);
4589
4590 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4591 /* reset MQD to a clean status */
4592 if (adev->gfx.kiq[0].mqd_backup)
4593 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation));
4594
4595 /* reset ring buffer */
4596 ring->wptr = 0;
4597 amdgpu_ring_clear_ring(ring);
4598 mutex_lock(&adev->srbm_mutex);
4599 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4600 gfx_v8_0_mqd_commit(adev, mqd);
4601 vi_srbm_select(adev, 0, 0, 0, 0);
4602 mutex_unlock(&adev->srbm_mutex);
4603 } else {
4604 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4605 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4606 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4607 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4608 amdgpu_ring_clear_ring(ring);
4609 mutex_lock(&adev->srbm_mutex);
4610 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4611 gfx_v8_0_mqd_init(ring);
4612 gfx_v8_0_mqd_commit(adev, mqd);
4613 vi_srbm_select(adev, 0, 0, 0, 0);
4614 mutex_unlock(&adev->srbm_mutex);
4615
4616 if (adev->gfx.kiq[0].mqd_backup)
4617 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation));
4618 }
4619
4620 return 0;
4621 }
4622
gfx_v8_0_kcq_init_queue(struct amdgpu_ring * ring)4623 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4624 {
4625 struct amdgpu_device *adev = ring->adev;
4626 struct vi_mqd *mqd = ring->mqd_ptr;
4627 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4628
4629 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4630 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4631 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4632 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4633 mutex_lock(&adev->srbm_mutex);
4634 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4635 gfx_v8_0_mqd_init(ring);
4636 vi_srbm_select(adev, 0, 0, 0, 0);
4637 mutex_unlock(&adev->srbm_mutex);
4638
4639 if (adev->gfx.mec.mqd_backup[mqd_idx])
4640 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4641 } else {
4642 /* restore MQD to a clean status */
4643 if (adev->gfx.mec.mqd_backup[mqd_idx])
4644 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4645 /* reset ring buffer */
4646 ring->wptr = 0;
4647 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4648 amdgpu_ring_clear_ring(ring);
4649 }
4650 return 0;
4651 }
4652
gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device * adev)4653 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4654 {
4655 if (adev->asic_type > CHIP_TONGA) {
4656 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4657 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4658 }
4659 /* enable doorbells */
4660 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4661 }
4662
gfx_v8_0_kiq_resume(struct amdgpu_device * adev)4663 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4664 {
4665 gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
4666 return 0;
4667 }
4668
gfx_v8_0_kcq_resume(struct amdgpu_device * adev)4669 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4670 {
4671 int i, r;
4672
4673 gfx_v8_0_cp_compute_enable(adev, true);
4674
4675 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4676 r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]);
4677 if (r)
4678 return r;
4679 }
4680
4681 gfx_v8_0_set_mec_doorbell_range(adev);
4682
4683 return gfx_v8_0_kiq_kcq_enable(adev);
4684 }
4685
gfx_v8_0_cp_test_all_rings(struct amdgpu_device * adev)4686 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4687 {
4688 int r, i;
4689 struct amdgpu_ring *ring;
4690
4691 /* collect all the ring_tests here, gfx, kiq, compute */
4692 ring = &adev->gfx.gfx_ring[0];
4693 r = amdgpu_ring_test_helper(ring);
4694 if (r)
4695 return r;
4696
4697 ring = &adev->gfx.kiq[0].ring;
4698 r = amdgpu_ring_test_helper(ring);
4699 if (r)
4700 return r;
4701
4702 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4703 ring = &adev->gfx.compute_ring[i];
4704 amdgpu_ring_test_helper(ring);
4705 }
4706
4707 return 0;
4708 }
4709
gfx_v8_0_cp_resume(struct amdgpu_device * adev)4710 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4711 {
4712 int r;
4713
4714 if (!(adev->flags & AMD_IS_APU))
4715 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4716
4717 r = gfx_v8_0_kiq_resume(adev);
4718 if (r)
4719 return r;
4720
4721 r = gfx_v8_0_cp_gfx_resume(adev);
4722 if (r)
4723 return r;
4724
4725 r = gfx_v8_0_kcq_resume(adev);
4726 if (r)
4727 return r;
4728
4729 r = gfx_v8_0_cp_test_all_rings(adev);
4730 if (r)
4731 return r;
4732
4733 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4734
4735 return 0;
4736 }
4737
gfx_v8_0_cp_enable(struct amdgpu_device * adev,bool enable)4738 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4739 {
4740 gfx_v8_0_cp_gfx_enable(adev, enable);
4741 gfx_v8_0_cp_compute_enable(adev, enable);
4742 }
4743
gfx_v8_0_hw_init(struct amdgpu_ip_block * ip_block)4744 static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block)
4745 {
4746 int r;
4747 struct amdgpu_device *adev = ip_block->adev;
4748
4749 gfx_v8_0_init_golden_registers(adev);
4750 gfx_v8_0_constants_init(adev);
4751
4752 r = adev->gfx.rlc.funcs->resume(adev);
4753 if (r)
4754 return r;
4755
4756 r = gfx_v8_0_cp_resume(adev);
4757
4758 return r;
4759 }
4760
gfx_v8_0_kcq_disable(struct amdgpu_device * adev)4761 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4762 {
4763 int r, i;
4764 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
4765
4766 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4767 if (r)
4768 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4769
4770 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4771 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4772
4773 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4774 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4775 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4776 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4777 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4778 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4779 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4780 amdgpu_ring_write(kiq_ring, 0);
4781 amdgpu_ring_write(kiq_ring, 0);
4782 amdgpu_ring_write(kiq_ring, 0);
4783 }
4784 /* Submit unmap queue packet */
4785 amdgpu_ring_commit(kiq_ring);
4786 /*
4787 * Ring test will do a basic scratch register change check. Just run
4788 * this to ensure that unmap queues that is submitted before got
4789 * processed successfully before returning.
4790 */
4791 r = amdgpu_ring_test_helper(kiq_ring);
4792 if (r)
4793 DRM_ERROR("KCQ disable failed\n");
4794
4795 return r;
4796 }
4797
gfx_v8_0_is_idle(struct amdgpu_ip_block * ip_block)4798 static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block)
4799 {
4800 struct amdgpu_device *adev = ip_block->adev;
4801
4802 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4803 || RREG32(mmGRBM_STATUS2) != 0x8)
4804 return false;
4805 else
4806 return true;
4807 }
4808
gfx_v8_0_rlc_is_idle(void * handle)4809 static bool gfx_v8_0_rlc_is_idle(void *handle)
4810 {
4811 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4812
4813 if (RREG32(mmGRBM_STATUS2) != 0x8)
4814 return false;
4815 else
4816 return true;
4817 }
4818
gfx_v8_0_wait_for_rlc_idle(void * handle)4819 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4820 {
4821 unsigned int i;
4822 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4823
4824 for (i = 0; i < adev->usec_timeout; i++) {
4825 if (gfx_v8_0_rlc_is_idle(handle))
4826 return 0;
4827
4828 udelay(1);
4829 }
4830 return -ETIMEDOUT;
4831 }
4832
gfx_v8_0_wait_for_idle(struct amdgpu_ip_block * ip_block)4833 static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4834 {
4835 unsigned int i;
4836 struct amdgpu_device *adev = ip_block->adev;
4837
4838 for (i = 0; i < adev->usec_timeout; i++) {
4839 if (gfx_v8_0_is_idle(ip_block))
4840 return 0;
4841
4842 udelay(1);
4843 }
4844 return -ETIMEDOUT;
4845 }
4846
gfx_v8_0_hw_fini(struct amdgpu_ip_block * ip_block)4847 static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block)
4848 {
4849 struct amdgpu_device *adev = ip_block->adev;
4850
4851 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4852 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4853
4854 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4855
4856 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4857
4858 /* disable KCQ to avoid CPC touch memory not valid anymore */
4859 gfx_v8_0_kcq_disable(adev);
4860
4861 if (amdgpu_sriov_vf(adev)) {
4862 pr_debug("For SRIOV client, shouldn't do anything.\n");
4863 return 0;
4864 }
4865
4866 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4867 if (!gfx_v8_0_wait_for_idle(ip_block))
4868 gfx_v8_0_cp_enable(adev, false);
4869 else
4870 pr_err("cp is busy, skip halt cp\n");
4871 if (!gfx_v8_0_wait_for_rlc_idle(adev))
4872 adev->gfx.rlc.funcs->stop(adev);
4873 else
4874 pr_err("rlc is busy, skip halt rlc\n");
4875 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4876
4877 return 0;
4878 }
4879
gfx_v8_0_suspend(struct amdgpu_ip_block * ip_block)4880 static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block)
4881 {
4882 return gfx_v8_0_hw_fini(ip_block);
4883 }
4884
gfx_v8_0_resume(struct amdgpu_ip_block * ip_block)4885 static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block)
4886 {
4887 return gfx_v8_0_hw_init(ip_block);
4888 }
4889
gfx_v8_0_check_soft_reset(struct amdgpu_ip_block * ip_block)4890 static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4891 {
4892 struct amdgpu_device *adev = ip_block->adev;
4893 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4894 u32 tmp;
4895
4896 /* GRBM_STATUS */
4897 tmp = RREG32(mmGRBM_STATUS);
4898 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4899 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4900 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4901 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4902 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4903 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4904 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4905 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4906 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4907 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4908 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4909 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4910 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4911 }
4912
4913 /* GRBM_STATUS2 */
4914 tmp = RREG32(mmGRBM_STATUS2);
4915 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4916 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4917 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4918
4919 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4920 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4921 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4922 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4923 SOFT_RESET_CPF, 1);
4924 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4925 SOFT_RESET_CPC, 1);
4926 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4927 SOFT_RESET_CPG, 1);
4928 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4929 SOFT_RESET_GRBM, 1);
4930 }
4931
4932 /* SRBM_STATUS */
4933 tmp = RREG32(mmSRBM_STATUS);
4934 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4935 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4936 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4937 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4938 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4939 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4940
4941 if (grbm_soft_reset || srbm_soft_reset) {
4942 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4943 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4944 return true;
4945 } else {
4946 adev->gfx.grbm_soft_reset = 0;
4947 adev->gfx.srbm_soft_reset = 0;
4948 return false;
4949 }
4950 }
4951
gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block * ip_block)4952 static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block)
4953 {
4954 struct amdgpu_device *adev = ip_block->adev;
4955 u32 grbm_soft_reset = 0;
4956
4957 if ((!adev->gfx.grbm_soft_reset) &&
4958 (!adev->gfx.srbm_soft_reset))
4959 return 0;
4960
4961 grbm_soft_reset = adev->gfx.grbm_soft_reset;
4962
4963 /* stop the rlc */
4964 adev->gfx.rlc.funcs->stop(adev);
4965
4966 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4967 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
4968 /* Disable GFX parsing/prefetching */
4969 gfx_v8_0_cp_gfx_enable(adev, false);
4970
4971 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
4972 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
4973 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
4974 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
4975 int i;
4976
4977 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4978 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4979
4980 mutex_lock(&adev->srbm_mutex);
4981 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4982 gfx_v8_0_deactivate_hqd(adev, 2);
4983 vi_srbm_select(adev, 0, 0, 0, 0);
4984 mutex_unlock(&adev->srbm_mutex);
4985 }
4986 /* Disable MEC parsing/prefetching */
4987 gfx_v8_0_cp_compute_enable(adev, false);
4988 }
4989
4990 return 0;
4991 }
4992
gfx_v8_0_soft_reset(struct amdgpu_ip_block * ip_block)4993 static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block)
4994 {
4995 struct amdgpu_device *adev = ip_block->adev;
4996 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4997 u32 tmp;
4998
4999 if ((!adev->gfx.grbm_soft_reset) &&
5000 (!adev->gfx.srbm_soft_reset))
5001 return 0;
5002
5003 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5004 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5005
5006 if (grbm_soft_reset || srbm_soft_reset) {
5007 tmp = RREG32(mmGMCON_DEBUG);
5008 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5009 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5010 WREG32(mmGMCON_DEBUG, tmp);
5011 udelay(50);
5012 }
5013
5014 if (grbm_soft_reset) {
5015 tmp = RREG32(mmGRBM_SOFT_RESET);
5016 tmp |= grbm_soft_reset;
5017 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5018 WREG32(mmGRBM_SOFT_RESET, tmp);
5019 tmp = RREG32(mmGRBM_SOFT_RESET);
5020
5021 udelay(50);
5022
5023 tmp &= ~grbm_soft_reset;
5024 WREG32(mmGRBM_SOFT_RESET, tmp);
5025 tmp = RREG32(mmGRBM_SOFT_RESET);
5026 }
5027
5028 if (srbm_soft_reset) {
5029 tmp = RREG32(mmSRBM_SOFT_RESET);
5030 tmp |= srbm_soft_reset;
5031 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5032 WREG32(mmSRBM_SOFT_RESET, tmp);
5033 tmp = RREG32(mmSRBM_SOFT_RESET);
5034
5035 udelay(50);
5036
5037 tmp &= ~srbm_soft_reset;
5038 WREG32(mmSRBM_SOFT_RESET, tmp);
5039 tmp = RREG32(mmSRBM_SOFT_RESET);
5040 }
5041
5042 if (grbm_soft_reset || srbm_soft_reset) {
5043 tmp = RREG32(mmGMCON_DEBUG);
5044 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5045 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5046 WREG32(mmGMCON_DEBUG, tmp);
5047 }
5048
5049 /* Wait a little for things to settle down */
5050 udelay(50);
5051
5052 return 0;
5053 }
5054
gfx_v8_0_post_soft_reset(struct amdgpu_ip_block * ip_block)5055 static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5056 {
5057 struct amdgpu_device *adev = ip_block->adev;
5058 u32 grbm_soft_reset = 0;
5059
5060 if ((!adev->gfx.grbm_soft_reset) &&
5061 (!adev->gfx.srbm_soft_reset))
5062 return 0;
5063
5064 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5065
5066 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5067 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5068 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5069 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5070 int i;
5071
5072 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5073 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5074
5075 mutex_lock(&adev->srbm_mutex);
5076 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5077 gfx_v8_0_deactivate_hqd(adev, 2);
5078 vi_srbm_select(adev, 0, 0, 0, 0);
5079 mutex_unlock(&adev->srbm_mutex);
5080 }
5081 gfx_v8_0_kiq_resume(adev);
5082 gfx_v8_0_kcq_resume(adev);
5083 }
5084
5085 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5086 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5087 gfx_v8_0_cp_gfx_resume(adev);
5088
5089 gfx_v8_0_cp_test_all_rings(adev);
5090
5091 adev->gfx.rlc.funcs->start(adev);
5092
5093 return 0;
5094 }
5095
5096 /**
5097 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5098 *
5099 * @adev: amdgpu_device pointer
5100 *
5101 * Fetches a GPU clock counter snapshot.
5102 * Returns the 64 bit clock counter snapshot.
5103 */
gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device * adev)5104 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5105 {
5106 uint64_t clock;
5107
5108 mutex_lock(&adev->gfx.gpu_clock_mutex);
5109 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5110 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5111 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5112 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5113 return clock;
5114 }
5115
gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)5116 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5117 uint32_t vmid,
5118 uint32_t gds_base, uint32_t gds_size,
5119 uint32_t gws_base, uint32_t gws_size,
5120 uint32_t oa_base, uint32_t oa_size)
5121 {
5122 /* GDS Base */
5123 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5124 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5125 WRITE_DATA_DST_SEL(0)));
5126 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5127 amdgpu_ring_write(ring, 0);
5128 amdgpu_ring_write(ring, gds_base);
5129
5130 /* GDS Size */
5131 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5132 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5133 WRITE_DATA_DST_SEL(0)));
5134 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5135 amdgpu_ring_write(ring, 0);
5136 amdgpu_ring_write(ring, gds_size);
5137
5138 /* GWS */
5139 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5140 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5141 WRITE_DATA_DST_SEL(0)));
5142 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5143 amdgpu_ring_write(ring, 0);
5144 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5145
5146 /* OA */
5147 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5148 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5149 WRITE_DATA_DST_SEL(0)));
5150 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5151 amdgpu_ring_write(ring, 0);
5152 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5153 }
5154
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)5155 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5156 {
5157 WREG32(mmSQ_IND_INDEX,
5158 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5159 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5160 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5161 (SQ_IND_INDEX__FORCE_READ_MASK));
5162 return RREG32(mmSQ_IND_DATA);
5163 }
5164
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)5165 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5166 uint32_t wave, uint32_t thread,
5167 uint32_t regno, uint32_t num, uint32_t *out)
5168 {
5169 WREG32(mmSQ_IND_INDEX,
5170 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5171 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5172 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5173 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5174 (SQ_IND_INDEX__FORCE_READ_MASK) |
5175 (SQ_IND_INDEX__AUTO_INCR_MASK));
5176 while (num--)
5177 *(out++) = RREG32(mmSQ_IND_DATA);
5178 }
5179
gfx_v8_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)5180 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5181 {
5182 /* type 0 wave data */
5183 dst[(*no_fields)++] = 0;
5184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5198 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5199 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5200 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5201 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5202 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
5203 }
5204
gfx_v8_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)5205 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
5206 uint32_t wave, uint32_t start,
5207 uint32_t size, uint32_t *dst)
5208 {
5209 wave_read_regs(
5210 adev, simd, wave, 0,
5211 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5212 }
5213
5214
5215 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5216 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5217 .select_se_sh = &gfx_v8_0_select_se_sh,
5218 .read_wave_data = &gfx_v8_0_read_wave_data,
5219 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5220 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5221 };
5222
gfx_v8_0_early_init(struct amdgpu_ip_block * ip_block)5223 static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block)
5224 {
5225 struct amdgpu_device *adev = ip_block->adev;
5226
5227 adev->gfx.xcc_mask = 1;
5228 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5229 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5230 AMDGPU_MAX_COMPUTE_RINGS);
5231 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5232 gfx_v8_0_set_ring_funcs(adev);
5233 gfx_v8_0_set_irq_funcs(adev);
5234 gfx_v8_0_set_gds_init(adev);
5235 gfx_v8_0_set_rlc_funcs(adev);
5236
5237 return 0;
5238 }
5239
gfx_v8_0_late_init(struct amdgpu_ip_block * ip_block)5240 static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block)
5241 {
5242 struct amdgpu_device *adev = ip_block->adev;
5243 int r;
5244
5245 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5246 if (r)
5247 return r;
5248
5249 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5250 if (r)
5251 return r;
5252
5253 /* requires IBs so do in late init after IB pool is initialized */
5254 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5255 if (r)
5256 return r;
5257
5258 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5259 if (r) {
5260 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5261 return r;
5262 }
5263
5264 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5265 if (r) {
5266 DRM_ERROR(
5267 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5268 r);
5269 return r;
5270 }
5271
5272 return 0;
5273 }
5274
gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)5275 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5276 bool enable)
5277 {
5278 if ((adev->asic_type == CHIP_POLARIS11) ||
5279 (adev->asic_type == CHIP_POLARIS12) ||
5280 (adev->asic_type == CHIP_VEGAM))
5281 /* Send msg to SMU via Powerplay */
5282 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0);
5283
5284 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5285 }
5286
gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)5287 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5288 bool enable)
5289 {
5290 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5291 }
5292
polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device * adev,bool enable)5293 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5294 bool enable)
5295 {
5296 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5297 }
5298
cz_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5299 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5300 bool enable)
5301 {
5302 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5303 }
5304
cz_enable_gfx_pipeline_power_gating(struct amdgpu_device * adev,bool enable)5305 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5306 bool enable)
5307 {
5308 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5309
5310 /* Read any GFX register to wake up GFX. */
5311 if (!enable)
5312 RREG32(mmDB_RENDER_CONTROL);
5313 }
5314
cz_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)5315 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5316 bool enable)
5317 {
5318 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5319 cz_enable_gfx_cg_power_gating(adev, true);
5320 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5321 cz_enable_gfx_pipeline_power_gating(adev, true);
5322 } else {
5323 cz_enable_gfx_cg_power_gating(adev, false);
5324 cz_enable_gfx_pipeline_power_gating(adev, false);
5325 }
5326 }
5327
gfx_v8_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)5328 static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5329 enum amd_powergating_state state)
5330 {
5331 struct amdgpu_device *adev = ip_block->adev;
5332 bool enable = (state == AMD_PG_STATE_GATE);
5333
5334 if (amdgpu_sriov_vf(adev))
5335 return 0;
5336
5337 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5338 AMD_PG_SUPPORT_RLC_SMU_HS |
5339 AMD_PG_SUPPORT_CP |
5340 AMD_PG_SUPPORT_GFX_DMG))
5341 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5342 switch (adev->asic_type) {
5343 case CHIP_CARRIZO:
5344 case CHIP_STONEY:
5345
5346 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5347 cz_enable_sck_slow_down_on_power_up(adev, true);
5348 cz_enable_sck_slow_down_on_power_down(adev, true);
5349 } else {
5350 cz_enable_sck_slow_down_on_power_up(adev, false);
5351 cz_enable_sck_slow_down_on_power_down(adev, false);
5352 }
5353 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5354 cz_enable_cp_power_gating(adev, true);
5355 else
5356 cz_enable_cp_power_gating(adev, false);
5357
5358 cz_update_gfx_cg_power_gating(adev, enable);
5359
5360 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5361 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5362 else
5363 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5364
5365 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5366 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5367 else
5368 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5369 break;
5370 case CHIP_POLARIS11:
5371 case CHIP_POLARIS12:
5372 case CHIP_VEGAM:
5373 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5374 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5375 else
5376 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5377
5378 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5379 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5380 else
5381 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5382
5383 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5384 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5385 else
5386 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5387 break;
5388 default:
5389 break;
5390 }
5391 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5392 AMD_PG_SUPPORT_RLC_SMU_HS |
5393 AMD_PG_SUPPORT_CP |
5394 AMD_PG_SUPPORT_GFX_DMG))
5395 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5396 return 0;
5397 }
5398
gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block * ip_block,u64 * flags)5399 static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5400 {
5401 struct amdgpu_device *adev = ip_block->adev;
5402 int data;
5403
5404 if (amdgpu_sriov_vf(adev))
5405 *flags = 0;
5406
5407 /* AMD_CG_SUPPORT_GFX_MGCG */
5408 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5409 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5410 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5411
5412 /* AMD_CG_SUPPORT_GFX_CGLG */
5413 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5414 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5415 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5416
5417 /* AMD_CG_SUPPORT_GFX_CGLS */
5418 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5419 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5420
5421 /* AMD_CG_SUPPORT_GFX_CGTS */
5422 data = RREG32(mmCGTS_SM_CTRL_REG);
5423 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5424 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5425
5426 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5427 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5428 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5429
5430 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5431 data = RREG32(mmRLC_MEM_SLP_CNTL);
5432 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5433 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5434
5435 /* AMD_CG_SUPPORT_GFX_CP_LS */
5436 data = RREG32(mmCP_MEM_SLP_CNTL);
5437 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5438 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5439 }
5440
gfx_v8_0_send_serdes_cmd(struct amdgpu_device * adev,uint32_t reg_addr,uint32_t cmd)5441 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5442 uint32_t reg_addr, uint32_t cmd)
5443 {
5444 uint32_t data;
5445
5446 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
5447
5448 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5449 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5450
5451 data = RREG32(mmRLC_SERDES_WR_CTRL);
5452 if (adev->asic_type == CHIP_STONEY)
5453 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5454 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5455 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5456 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5457 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5458 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5459 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5460 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5461 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5462 else
5463 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5464 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5465 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5466 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5467 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5468 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5469 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5470 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5471 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5472 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5473 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5474 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5475 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5476 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5477 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5478
5479 WREG32(mmRLC_SERDES_WR_CTRL, data);
5480 }
5481
5482 #define MSG_ENTER_RLC_SAFE_MODE 1
5483 #define MSG_EXIT_RLC_SAFE_MODE 0
5484 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5485 #define RLC_GPR_REG2__REQ__SHIFT 0
5486 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5487 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5488
gfx_v8_0_is_rlc_enabled(struct amdgpu_device * adev)5489 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5490 {
5491 uint32_t rlc_setting;
5492
5493 rlc_setting = RREG32(mmRLC_CNTL);
5494 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5495 return false;
5496
5497 return true;
5498 }
5499
gfx_v8_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)5500 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5501 {
5502 uint32_t data;
5503 unsigned i;
5504 data = RREG32(mmRLC_CNTL);
5505 data |= RLC_SAFE_MODE__CMD_MASK;
5506 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5507 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5508 WREG32(mmRLC_SAFE_MODE, data);
5509
5510 /* wait for RLC_SAFE_MODE */
5511 for (i = 0; i < adev->usec_timeout; i++) {
5512 if ((RREG32(mmRLC_GPM_STAT) &
5513 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5514 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5515 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5516 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5517 break;
5518 udelay(1);
5519 }
5520 for (i = 0; i < adev->usec_timeout; i++) {
5521 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5522 break;
5523 udelay(1);
5524 }
5525 }
5526
gfx_v8_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)5527 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5528 {
5529 uint32_t data;
5530 unsigned i;
5531
5532 data = RREG32(mmRLC_CNTL);
5533 data |= RLC_SAFE_MODE__CMD_MASK;
5534 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5535 WREG32(mmRLC_SAFE_MODE, data);
5536
5537 for (i = 0; i < adev->usec_timeout; i++) {
5538 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5539 break;
5540 udelay(1);
5541 }
5542 }
5543
gfx_v8_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned vmid)5544 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5545 {
5546 u32 data;
5547
5548 amdgpu_gfx_off_ctrl(adev, false);
5549
5550 if (amdgpu_sriov_is_pp_one_vf(adev))
5551 data = RREG32_NO_KIQ(mmRLC_SPM_VMID);
5552 else
5553 data = RREG32(mmRLC_SPM_VMID);
5554
5555 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
5556 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
5557
5558 if (amdgpu_sriov_is_pp_one_vf(adev))
5559 WREG32_NO_KIQ(mmRLC_SPM_VMID, data);
5560 else
5561 WREG32(mmRLC_SPM_VMID, data);
5562
5563 amdgpu_gfx_off_ctrl(adev, true);
5564 }
5565
5566 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5567 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5568 .set_safe_mode = gfx_v8_0_set_safe_mode,
5569 .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5570 .init = gfx_v8_0_rlc_init,
5571 .get_csb_size = gfx_v8_0_get_csb_size,
5572 .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5573 .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5574 .resume = gfx_v8_0_rlc_resume,
5575 .stop = gfx_v8_0_rlc_stop,
5576 .reset = gfx_v8_0_rlc_reset,
5577 .start = gfx_v8_0_rlc_start,
5578 .update_spm_vmid = gfx_v8_0_update_spm_vmid
5579 };
5580
gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)5581 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5582 bool enable)
5583 {
5584 uint32_t temp, data;
5585
5586 /* It is disabled by HW by default */
5587 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5588 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5589 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5590 /* 1 - RLC memory Light sleep */
5591 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5592
5593 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5594 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5595 }
5596
5597 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5598 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5599 if (adev->flags & AMD_IS_APU)
5600 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5601 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5602 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5603 else
5604 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5605 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5606 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5607 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5608
5609 if (temp != data)
5610 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5611
5612 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5613 gfx_v8_0_wait_for_rlc_serdes(adev);
5614
5615 /* 5 - clear mgcg override */
5616 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5617
5618 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5619 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5620 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5621 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5622 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5623 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5624 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5625 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5626 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5627 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5628 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5629 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5630 if (temp != data)
5631 WREG32(mmCGTS_SM_CTRL_REG, data);
5632 }
5633 udelay(50);
5634
5635 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5636 gfx_v8_0_wait_for_rlc_serdes(adev);
5637 } else {
5638 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5639 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5640 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5641 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5642 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5643 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5644 if (temp != data)
5645 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5646
5647 /* 2 - disable MGLS in RLC */
5648 data = RREG32(mmRLC_MEM_SLP_CNTL);
5649 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5650 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5651 WREG32(mmRLC_MEM_SLP_CNTL, data);
5652 }
5653
5654 /* 3 - disable MGLS in CP */
5655 data = RREG32(mmCP_MEM_SLP_CNTL);
5656 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5657 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5658 WREG32(mmCP_MEM_SLP_CNTL, data);
5659 }
5660
5661 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5662 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5663 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5664 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5665 if (temp != data)
5666 WREG32(mmCGTS_SM_CTRL_REG, data);
5667
5668 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5669 gfx_v8_0_wait_for_rlc_serdes(adev);
5670
5671 /* 6 - set mgcg override */
5672 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5673
5674 udelay(50);
5675
5676 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5677 gfx_v8_0_wait_for_rlc_serdes(adev);
5678 }
5679 }
5680
gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5681 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5682 bool enable)
5683 {
5684 uint32_t temp, temp1, data, data1;
5685
5686 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5687
5688 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5689 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5690 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5691 if (temp1 != data1)
5692 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5693
5694 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5695 gfx_v8_0_wait_for_rlc_serdes(adev);
5696
5697 /* 2 - clear cgcg override */
5698 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5699
5700 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5701 gfx_v8_0_wait_for_rlc_serdes(adev);
5702
5703 /* 3 - write cmd to set CGLS */
5704 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5705
5706 /* 4 - enable cgcg */
5707 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5708
5709 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5710 /* enable cgls*/
5711 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5712
5713 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5714 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5715
5716 if (temp1 != data1)
5717 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5718 } else {
5719 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5720 }
5721
5722 if (temp != data)
5723 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5724
5725 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5726 * Cmp_busy/GFX_Idle interrupts
5727 */
5728 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5729 } else {
5730 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5731 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5732
5733 /* TEST CGCG */
5734 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5735 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5736 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5737 if (temp1 != data1)
5738 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5739
5740 /* read gfx register to wake up cgcg */
5741 RREG32(mmCB_CGTT_SCLK_CTRL);
5742 RREG32(mmCB_CGTT_SCLK_CTRL);
5743 RREG32(mmCB_CGTT_SCLK_CTRL);
5744 RREG32(mmCB_CGTT_SCLK_CTRL);
5745
5746 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5747 gfx_v8_0_wait_for_rlc_serdes(adev);
5748
5749 /* write cmd to Set CGCG Override */
5750 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5751
5752 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5753 gfx_v8_0_wait_for_rlc_serdes(adev);
5754
5755 /* write cmd to Clear CGLS */
5756 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5757
5758 /* disable cgcg, cgls should be disabled too. */
5759 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5760 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5761 if (temp != data)
5762 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5763 /* enable interrupts again for PG */
5764 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5765 }
5766
5767 gfx_v8_0_wait_for_rlc_serdes(adev);
5768 }
gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5769 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5770 bool enable)
5771 {
5772 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5773
5774 if (enable) {
5775 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5776 * === MGCG + MGLS + TS(CG/LS) ===
5777 */
5778 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5779 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5780 } else {
5781 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5782 * === CGCG + CGLS ===
5783 */
5784 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5785 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5786 }
5787
5788 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5789 return 0;
5790 }
5791
gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5792 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5793 enum amd_clockgating_state state)
5794 {
5795 uint32_t msg_id, pp_state = 0;
5796 uint32_t pp_support_state = 0;
5797
5798 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5799 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5800 pp_support_state = PP_STATE_SUPPORT_LS;
5801 pp_state = PP_STATE_LS;
5802 }
5803 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5804 pp_support_state |= PP_STATE_SUPPORT_CG;
5805 pp_state |= PP_STATE_CG;
5806 }
5807 if (state == AMD_CG_STATE_UNGATE)
5808 pp_state = 0;
5809
5810 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5811 PP_BLOCK_GFX_CG,
5812 pp_support_state,
5813 pp_state);
5814 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5815 }
5816
5817 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5819 pp_support_state = PP_STATE_SUPPORT_LS;
5820 pp_state = PP_STATE_LS;
5821 }
5822
5823 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5824 pp_support_state |= PP_STATE_SUPPORT_CG;
5825 pp_state |= PP_STATE_CG;
5826 }
5827
5828 if (state == AMD_CG_STATE_UNGATE)
5829 pp_state = 0;
5830
5831 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5832 PP_BLOCK_GFX_MG,
5833 pp_support_state,
5834 pp_state);
5835 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5836 }
5837
5838 return 0;
5839 }
5840
gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device * adev,enum amd_clockgating_state state)5841 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5842 enum amd_clockgating_state state)
5843 {
5844
5845 uint32_t msg_id, pp_state = 0;
5846 uint32_t pp_support_state = 0;
5847
5848 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5849 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5850 pp_support_state = PP_STATE_SUPPORT_LS;
5851 pp_state = PP_STATE_LS;
5852 }
5853 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5854 pp_support_state |= PP_STATE_SUPPORT_CG;
5855 pp_state |= PP_STATE_CG;
5856 }
5857 if (state == AMD_CG_STATE_UNGATE)
5858 pp_state = 0;
5859
5860 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5861 PP_BLOCK_GFX_CG,
5862 pp_support_state,
5863 pp_state);
5864 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5865 }
5866
5867 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5868 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5869 pp_support_state = PP_STATE_SUPPORT_LS;
5870 pp_state = PP_STATE_LS;
5871 }
5872 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5873 pp_support_state |= PP_STATE_SUPPORT_CG;
5874 pp_state |= PP_STATE_CG;
5875 }
5876 if (state == AMD_CG_STATE_UNGATE)
5877 pp_state = 0;
5878
5879 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5880 PP_BLOCK_GFX_3D,
5881 pp_support_state,
5882 pp_state);
5883 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5884 }
5885
5886 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5887 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5888 pp_support_state = PP_STATE_SUPPORT_LS;
5889 pp_state = PP_STATE_LS;
5890 }
5891
5892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5893 pp_support_state |= PP_STATE_SUPPORT_CG;
5894 pp_state |= PP_STATE_CG;
5895 }
5896
5897 if (state == AMD_CG_STATE_UNGATE)
5898 pp_state = 0;
5899
5900 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5901 PP_BLOCK_GFX_MG,
5902 pp_support_state,
5903 pp_state);
5904 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5905 }
5906
5907 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5908 pp_support_state = PP_STATE_SUPPORT_LS;
5909
5910 if (state == AMD_CG_STATE_UNGATE)
5911 pp_state = 0;
5912 else
5913 pp_state = PP_STATE_LS;
5914
5915 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5916 PP_BLOCK_GFX_RLC,
5917 pp_support_state,
5918 pp_state);
5919 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5920 }
5921
5922 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5923 pp_support_state = PP_STATE_SUPPORT_LS;
5924
5925 if (state == AMD_CG_STATE_UNGATE)
5926 pp_state = 0;
5927 else
5928 pp_state = PP_STATE_LS;
5929 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5930 PP_BLOCK_GFX_CP,
5931 pp_support_state,
5932 pp_state);
5933 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5934 }
5935
5936 return 0;
5937 }
5938
gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)5939 static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5940 enum amd_clockgating_state state)
5941 {
5942 struct amdgpu_device *adev = ip_block->adev;
5943
5944 if (amdgpu_sriov_vf(adev))
5945 return 0;
5946
5947 switch (adev->asic_type) {
5948 case CHIP_FIJI:
5949 case CHIP_CARRIZO:
5950 case CHIP_STONEY:
5951 gfx_v8_0_update_gfx_clock_gating(adev,
5952 state == AMD_CG_STATE_GATE);
5953 break;
5954 case CHIP_TONGA:
5955 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5956 break;
5957 case CHIP_POLARIS10:
5958 case CHIP_POLARIS11:
5959 case CHIP_POLARIS12:
5960 case CHIP_VEGAM:
5961 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5962 break;
5963 default:
5964 break;
5965 }
5966 return 0;
5967 }
5968
gfx_v8_0_ring_get_rptr(struct amdgpu_ring * ring)5969 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
5970 {
5971 return *ring->rptr_cpu_addr;
5972 }
5973
gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5974 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5975 {
5976 struct amdgpu_device *adev = ring->adev;
5977
5978 if (ring->use_doorbell)
5979 /* XXX check if swapping is necessary on BE */
5980 return *ring->wptr_cpu_addr;
5981 else
5982 return RREG32(mmCP_RB0_WPTR);
5983 }
5984
gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5985 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5986 {
5987 struct amdgpu_device *adev = ring->adev;
5988
5989 if (ring->use_doorbell) {
5990 /* XXX check if swapping is necessary on BE */
5991 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
5992 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
5993 } else {
5994 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5995 (void)RREG32(mmCP_RB0_WPTR);
5996 }
5997 }
5998
gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5999 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6000 {
6001 u32 ref_and_mask, reg_mem_engine;
6002
6003 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6004 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6005 switch (ring->me) {
6006 case 1:
6007 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6008 break;
6009 case 2:
6010 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6011 break;
6012 default:
6013 return;
6014 }
6015 reg_mem_engine = 0;
6016 } else {
6017 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6018 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6019 }
6020
6021 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6022 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6023 WAIT_REG_MEM_FUNCTION(3) | /* == */
6024 reg_mem_engine));
6025 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6026 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6027 amdgpu_ring_write(ring, ref_and_mask);
6028 amdgpu_ring_write(ring, ref_and_mask);
6029 amdgpu_ring_write(ring, 0x20); /* poll interval */
6030 }
6031
gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring * ring)6032 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6033 {
6034 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6035 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6036 EVENT_INDEX(4));
6037
6038 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6039 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6040 EVENT_INDEX(0));
6041 }
6042
gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)6043 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6044 struct amdgpu_job *job,
6045 struct amdgpu_ib *ib,
6046 uint32_t flags)
6047 {
6048 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6049 u32 header, control = 0;
6050
6051 if (ib->flags & AMDGPU_IB_FLAG_CE)
6052 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6053 else
6054 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6055
6056 control |= ib->length_dw | (vmid << 24);
6057
6058 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6059 control |= INDIRECT_BUFFER_PRE_ENB(1);
6060
6061 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
6062 gfx_v8_0_ring_emit_de_meta(ring);
6063 }
6064
6065 amdgpu_ring_write(ring, header);
6066 amdgpu_ring_write(ring,
6067 #ifdef __BIG_ENDIAN
6068 (2 << 0) |
6069 #endif
6070 (ib->gpu_addr & 0xFFFFFFFC));
6071 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6072 amdgpu_ring_write(ring, control);
6073 }
6074
gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)6075 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6076 struct amdgpu_job *job,
6077 struct amdgpu_ib *ib,
6078 uint32_t flags)
6079 {
6080 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6081 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6082
6083 /* Currently, there is a high possibility to get wave ID mismatch
6084 * between ME and GDS, leading to a hw deadlock, because ME generates
6085 * different wave IDs than the GDS expects. This situation happens
6086 * randomly when at least 5 compute pipes use GDS ordered append.
6087 * The wave IDs generated by ME are also wrong after suspend/resume.
6088 * Those are probably bugs somewhere else in the kernel driver.
6089 *
6090 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6091 * GDS to 0 for this ring (me/pipe).
6092 */
6093 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6094 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6095 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6096 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6097 }
6098
6099 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6100 amdgpu_ring_write(ring,
6101 #ifdef __BIG_ENDIAN
6102 (2 << 0) |
6103 #endif
6104 (ib->gpu_addr & 0xFFFFFFFC));
6105 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6106 amdgpu_ring_write(ring, control);
6107 }
6108
gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6109 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6110 u64 seq, unsigned flags)
6111 {
6112 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6113 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6114 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
6115
6116 /* Workaround for cache flush problems. First send a dummy EOP
6117 * event down the pipe with seq one below.
6118 */
6119 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6120 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6121 EOP_TC_ACTION_EN |
6122 EOP_TC_WB_ACTION_EN |
6123 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6124 EVENT_INDEX(5)));
6125 amdgpu_ring_write(ring, addr & 0xfffffffc);
6126 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6127 DATA_SEL(1) | INT_SEL(0));
6128 amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6129 amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6130
6131 /* Then send the real EOP event down the pipe:
6132 * EVENT_WRITE_EOP - flush caches, send int */
6133 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6134 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6135 EOP_TC_ACTION_EN |
6136 EOP_TC_WB_ACTION_EN |
6137 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6138 EVENT_INDEX(5) |
6139 (exec ? EOP_EXEC : 0)));
6140 amdgpu_ring_write(ring, addr & 0xfffffffc);
6141 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6142 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6143 amdgpu_ring_write(ring, lower_32_bits(seq));
6144 amdgpu_ring_write(ring, upper_32_bits(seq));
6145
6146 }
6147
gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)6148 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6149 {
6150 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6151 uint32_t seq = ring->fence_drv.sync_seq;
6152 uint64_t addr = ring->fence_drv.gpu_addr;
6153
6154 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6155 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6156 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6157 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6158 amdgpu_ring_write(ring, addr & 0xfffffffc);
6159 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6160 amdgpu_ring_write(ring, seq);
6161 amdgpu_ring_write(ring, 0xffffffff);
6162 amdgpu_ring_write(ring, 4); /* poll interval */
6163 }
6164
gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)6165 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6166 unsigned vmid, uint64_t pd_addr)
6167 {
6168 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6169
6170 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6171
6172 /* wait for the invalidate to complete */
6173 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6174 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6175 WAIT_REG_MEM_FUNCTION(0) | /* always */
6176 WAIT_REG_MEM_ENGINE(0))); /* me */
6177 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6178 amdgpu_ring_write(ring, 0);
6179 amdgpu_ring_write(ring, 0); /* ref */
6180 amdgpu_ring_write(ring, 0); /* mask */
6181 amdgpu_ring_write(ring, 0x20); /* poll interval */
6182
6183 /* compute doesn't have PFP */
6184 if (usepfp) {
6185 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6186 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6187 amdgpu_ring_write(ring, 0x0);
6188 }
6189 }
6190
gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring * ring)6191 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6192 {
6193 return *ring->wptr_cpu_addr;
6194 }
6195
gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring * ring)6196 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6197 {
6198 struct amdgpu_device *adev = ring->adev;
6199
6200 /* XXX check if swapping is necessary on BE */
6201 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
6202 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6203 }
6204
gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)6205 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6206 u64 addr, u64 seq,
6207 unsigned flags)
6208 {
6209 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6210 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6211
6212 /* RELEASE_MEM - flush caches, send int */
6213 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6214 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6215 EOP_TC_ACTION_EN |
6216 EOP_TC_WB_ACTION_EN |
6217 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6218 EVENT_INDEX(5)));
6219 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6220 amdgpu_ring_write(ring, addr & 0xfffffffc);
6221 amdgpu_ring_write(ring, upper_32_bits(addr));
6222 amdgpu_ring_write(ring, lower_32_bits(seq));
6223 amdgpu_ring_write(ring, upper_32_bits(seq));
6224 }
6225
gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)6226 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6227 u64 seq, unsigned int flags)
6228 {
6229 /* we only allocate 32bit for each seq wb address */
6230 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6231
6232 /* write fence seq to the "addr" */
6233 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6234 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6235 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6236 amdgpu_ring_write(ring, lower_32_bits(addr));
6237 amdgpu_ring_write(ring, upper_32_bits(addr));
6238 amdgpu_ring_write(ring, lower_32_bits(seq));
6239
6240 if (flags & AMDGPU_FENCE_FLAG_INT) {
6241 /* set register to trigger INT */
6242 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6243 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6244 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6245 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6246 amdgpu_ring_write(ring, 0);
6247 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6248 }
6249 }
6250
gfx_v8_ring_emit_sb(struct amdgpu_ring * ring)6251 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6252 {
6253 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6254 amdgpu_ring_write(ring, 0);
6255 }
6256
gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)6257 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6258 {
6259 uint32_t dw2 = 0;
6260
6261 if (amdgpu_sriov_vf(ring->adev))
6262 gfx_v8_0_ring_emit_ce_meta(ring);
6263
6264 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6265 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6266 gfx_v8_0_ring_emit_vgt_flush(ring);
6267 /* set load_global_config & load_global_uconfig */
6268 dw2 |= 0x8001;
6269 /* set load_cs_sh_regs */
6270 dw2 |= 0x01000000;
6271 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6272 dw2 |= 0x10002;
6273
6274 /* set load_ce_ram if preamble presented */
6275 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6276 dw2 |= 0x10000000;
6277 } else {
6278 /* still load_ce_ram if this is the first time preamble presented
6279 * although there is no context switch happens.
6280 */
6281 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6282 dw2 |= 0x10000000;
6283 }
6284
6285 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6286 amdgpu_ring_write(ring, dw2);
6287 amdgpu_ring_write(ring, 0);
6288 }
6289
gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)6290 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6291 uint64_t addr)
6292 {
6293 unsigned ret;
6294
6295 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6296 amdgpu_ring_write(ring, lower_32_bits(addr));
6297 amdgpu_ring_write(ring, upper_32_bits(addr));
6298 /* discard following DWs if *cond_exec_gpu_addr==0 */
6299 amdgpu_ring_write(ring, 0);
6300 ret = ring->wptr & ring->buf_mask;
6301 /* patch dummy value later */
6302 amdgpu_ring_write(ring, 0);
6303 return ret;
6304 }
6305
gfx_v8_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)6306 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6307 uint32_t reg_val_offs)
6308 {
6309 struct amdgpu_device *adev = ring->adev;
6310
6311 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6312 amdgpu_ring_write(ring, 0 | /* src: register*/
6313 (5 << 8) | /* dst: memory */
6314 (1 << 20)); /* write confirm */
6315 amdgpu_ring_write(ring, reg);
6316 amdgpu_ring_write(ring, 0);
6317 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6318 reg_val_offs * 4));
6319 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6320 reg_val_offs * 4));
6321 }
6322
gfx_v8_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)6323 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6324 uint32_t val)
6325 {
6326 uint32_t cmd;
6327
6328 switch (ring->funcs->type) {
6329 case AMDGPU_RING_TYPE_GFX:
6330 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6331 break;
6332 case AMDGPU_RING_TYPE_KIQ:
6333 cmd = 1 << 16; /* no inc addr */
6334 break;
6335 default:
6336 cmd = WR_CONFIRM;
6337 break;
6338 }
6339
6340 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6341 amdgpu_ring_write(ring, cmd);
6342 amdgpu_ring_write(ring, reg);
6343 amdgpu_ring_write(ring, 0);
6344 amdgpu_ring_write(ring, val);
6345 }
6346
gfx_v8_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)6347 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6348 {
6349 struct amdgpu_device *adev = ring->adev;
6350 uint32_t value = 0;
6351
6352 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6353 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6354 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6355 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6356 WREG32(mmSQ_CMD, value);
6357 }
6358
gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)6359 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6360 enum amdgpu_interrupt_state state)
6361 {
6362 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6363 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6364 }
6365
gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)6366 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6367 int me, int pipe,
6368 enum amdgpu_interrupt_state state)
6369 {
6370 u32 mec_int_cntl, mec_int_cntl_reg;
6371
6372 /*
6373 * amdgpu controls only the first MEC. That's why this function only
6374 * handles the setting of interrupts for this specific MEC. All other
6375 * pipes' interrupts are set by amdkfd.
6376 */
6377
6378 if (me == 1) {
6379 switch (pipe) {
6380 case 0:
6381 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6382 break;
6383 case 1:
6384 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6385 break;
6386 case 2:
6387 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6388 break;
6389 case 3:
6390 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6391 break;
6392 default:
6393 DRM_DEBUG("invalid pipe %d\n", pipe);
6394 return;
6395 }
6396 } else {
6397 DRM_DEBUG("invalid me %d\n", me);
6398 return;
6399 }
6400
6401 switch (state) {
6402 case AMDGPU_IRQ_STATE_DISABLE:
6403 mec_int_cntl = RREG32(mec_int_cntl_reg);
6404 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6405 WREG32(mec_int_cntl_reg, mec_int_cntl);
6406 break;
6407 case AMDGPU_IRQ_STATE_ENABLE:
6408 mec_int_cntl = RREG32(mec_int_cntl_reg);
6409 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6410 WREG32(mec_int_cntl_reg, mec_int_cntl);
6411 break;
6412 default:
6413 break;
6414 }
6415 }
6416
gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6417 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6418 struct amdgpu_irq_src *source,
6419 unsigned type,
6420 enum amdgpu_interrupt_state state)
6421 {
6422 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6423 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6424
6425 return 0;
6426 }
6427
gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6428 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6429 struct amdgpu_irq_src *source,
6430 unsigned type,
6431 enum amdgpu_interrupt_state state)
6432 {
6433 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6434 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6435
6436 return 0;
6437 }
6438
gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6439 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6440 struct amdgpu_irq_src *src,
6441 unsigned type,
6442 enum amdgpu_interrupt_state state)
6443 {
6444 switch (type) {
6445 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6446 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6447 break;
6448 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6449 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6450 break;
6451 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6452 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6453 break;
6454 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6455 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6456 break;
6457 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6458 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6459 break;
6460 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6461 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6462 break;
6463 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6464 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6465 break;
6466 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6467 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6468 break;
6469 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6470 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6471 break;
6472 default:
6473 break;
6474 }
6475 return 0;
6476 }
6477
gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6478 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6479 struct amdgpu_irq_src *source,
6480 unsigned int type,
6481 enum amdgpu_interrupt_state state)
6482 {
6483 int enable_flag;
6484
6485 switch (state) {
6486 case AMDGPU_IRQ_STATE_DISABLE:
6487 enable_flag = 0;
6488 break;
6489
6490 case AMDGPU_IRQ_STATE_ENABLE:
6491 enable_flag = 1;
6492 break;
6493
6494 default:
6495 return -EINVAL;
6496 }
6497
6498 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6499 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6500 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6501 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6502 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6503 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6504 enable_flag);
6505 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6506 enable_flag);
6507 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6508 enable_flag);
6509 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6510 enable_flag);
6511 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6512 enable_flag);
6513 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6514 enable_flag);
6515 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6516 enable_flag);
6517 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6518 enable_flag);
6519
6520 return 0;
6521 }
6522
gfx_v8_0_set_sq_int_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6523 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6524 struct amdgpu_irq_src *source,
6525 unsigned int type,
6526 enum amdgpu_interrupt_state state)
6527 {
6528 int enable_flag;
6529
6530 switch (state) {
6531 case AMDGPU_IRQ_STATE_DISABLE:
6532 enable_flag = 1;
6533 break;
6534
6535 case AMDGPU_IRQ_STATE_ENABLE:
6536 enable_flag = 0;
6537 break;
6538
6539 default:
6540 return -EINVAL;
6541 }
6542
6543 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6544 enable_flag);
6545
6546 return 0;
6547 }
6548
gfx_v8_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6549 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6550 struct amdgpu_irq_src *source,
6551 struct amdgpu_iv_entry *entry)
6552 {
6553 int i;
6554 u8 me_id, pipe_id, queue_id;
6555 struct amdgpu_ring *ring;
6556
6557 DRM_DEBUG("IH: CP EOP\n");
6558 me_id = (entry->ring_id & 0x0c) >> 2;
6559 pipe_id = (entry->ring_id & 0x03) >> 0;
6560 queue_id = (entry->ring_id & 0x70) >> 4;
6561
6562 switch (me_id) {
6563 case 0:
6564 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6565 break;
6566 case 1:
6567 case 2:
6568 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6569 ring = &adev->gfx.compute_ring[i];
6570 /* Per-queue interrupt is supported for MEC starting from VI.
6571 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6572 */
6573 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6574 amdgpu_fence_process(ring);
6575 }
6576 break;
6577 }
6578 return 0;
6579 }
6580
gfx_v8_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6581 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6582 struct amdgpu_iv_entry *entry)
6583 {
6584 u8 me_id, pipe_id, queue_id;
6585 struct amdgpu_ring *ring;
6586 int i;
6587
6588 me_id = (entry->ring_id & 0x0c) >> 2;
6589 pipe_id = (entry->ring_id & 0x03) >> 0;
6590 queue_id = (entry->ring_id & 0x70) >> 4;
6591
6592 switch (me_id) {
6593 case 0:
6594 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6595 break;
6596 case 1:
6597 case 2:
6598 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6599 ring = &adev->gfx.compute_ring[i];
6600 if (ring->me == me_id && ring->pipe == pipe_id &&
6601 ring->queue == queue_id)
6602 drm_sched_fault(&ring->sched);
6603 }
6604 break;
6605 }
6606 }
6607
gfx_v8_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6608 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6609 struct amdgpu_irq_src *source,
6610 struct amdgpu_iv_entry *entry)
6611 {
6612 DRM_ERROR("Illegal register access in command stream\n");
6613 gfx_v8_0_fault(adev, entry);
6614 return 0;
6615 }
6616
gfx_v8_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6617 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6618 struct amdgpu_irq_src *source,
6619 struct amdgpu_iv_entry *entry)
6620 {
6621 DRM_ERROR("Illegal instruction in command stream\n");
6622 gfx_v8_0_fault(adev, entry);
6623 return 0;
6624 }
6625
gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6626 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6627 struct amdgpu_irq_src *source,
6628 struct amdgpu_iv_entry *entry)
6629 {
6630 DRM_ERROR("CP EDC/ECC error detected.");
6631 return 0;
6632 }
6633
gfx_v8_0_parse_sq_irq(struct amdgpu_device * adev,unsigned ih_data,bool from_wq)6634 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data,
6635 bool from_wq)
6636 {
6637 u32 enc, se_id, sh_id, cu_id;
6638 char type[20];
6639 int sq_edc_source = -1;
6640
6641 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6642 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6643
6644 switch (enc) {
6645 case 0:
6646 DRM_INFO("SQ general purpose intr detected:"
6647 "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6648 "host_cmd_overflow %d, cmd_timestamp %d,"
6649 "reg_timestamp %d, thread_trace_buff_full %d,"
6650 "wlt %d, thread_trace %d.\n",
6651 se_id,
6652 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6653 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6654 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6655 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6656 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6657 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6658 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6659 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6660 );
6661 break;
6662 case 1:
6663 case 2:
6664
6665 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6666 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6667
6668 /*
6669 * This function can be called either directly from ISR
6670 * or from BH in which case we can access SQ_EDC_INFO
6671 * instance
6672 */
6673 if (from_wq) {
6674 mutex_lock(&adev->grbm_idx_mutex);
6675 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
6676
6677 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6678
6679 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6680 mutex_unlock(&adev->grbm_idx_mutex);
6681 }
6682
6683 if (enc == 1)
6684 sprintf(type, "instruction intr");
6685 else
6686 sprintf(type, "EDC/ECC error");
6687
6688 DRM_INFO(
6689 "SQ %s detected: "
6690 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6691 "trap %s, sq_ed_info.source %s.\n",
6692 type, se_id, sh_id, cu_id,
6693 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6694 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6695 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6696 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6697 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6698 );
6699 break;
6700 default:
6701 DRM_ERROR("SQ invalid encoding type\n.");
6702 }
6703 }
6704
gfx_v8_0_sq_irq_work_func(struct work_struct * work)6705 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6706 {
6707
6708 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6709 struct sq_work *sq_work = container_of(work, struct sq_work, work);
6710
6711 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true);
6712 }
6713
gfx_v8_0_sq_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6714 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6715 struct amdgpu_irq_src *source,
6716 struct amdgpu_iv_entry *entry)
6717 {
6718 unsigned ih_data = entry->src_data[0];
6719
6720 /*
6721 * Try to submit work so SQ_EDC_INFO can be accessed from
6722 * BH. If previous work submission hasn't finished yet
6723 * just print whatever info is possible directly from the ISR.
6724 */
6725 if (work_pending(&adev->gfx.sq_work.work)) {
6726 gfx_v8_0_parse_sq_irq(adev, ih_data, false);
6727 } else {
6728 adev->gfx.sq_work.ih_data = ih_data;
6729 schedule_work(&adev->gfx.sq_work.work);
6730 }
6731
6732 return 0;
6733 }
6734
gfx_v8_0_emit_mem_sync(struct amdgpu_ring * ring)6735 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring)
6736 {
6737 amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
6738 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6739 PACKET3_TC_ACTION_ENA |
6740 PACKET3_SH_KCACHE_ACTION_ENA |
6741 PACKET3_SH_ICACHE_ACTION_ENA |
6742 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6743 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6744 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6745 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6746 }
6747
gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring * ring)6748 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
6749 {
6750 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6751 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
6752 PACKET3_TC_ACTION_ENA |
6753 PACKET3_SH_KCACHE_ACTION_ENA |
6754 PACKET3_SH_ICACHE_ACTION_ENA |
6755 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */
6756 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
6757 amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
6758 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6759 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
6760 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
6761 }
6762
6763
6764 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6765 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f
gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)6766 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6767 uint32_t pipe, bool enable)
6768 {
6769 uint32_t val;
6770 uint32_t wcl_cs_reg;
6771
6772 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT;
6773
6774 switch (pipe) {
6775 case 0:
6776 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0;
6777 break;
6778 case 1:
6779 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1;
6780 break;
6781 case 2:
6782 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2;
6783 break;
6784 case 3:
6785 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3;
6786 break;
6787 default:
6788 DRM_DEBUG("invalid pipe %d\n", pipe);
6789 return;
6790 }
6791
6792 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6793
6794 }
6795
6796 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff
gfx_v8_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)6797 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6798 {
6799 struct amdgpu_device *adev = ring->adev;
6800 uint32_t val;
6801 int i;
6802
6803 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6804 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6805 * around 25% of gpu resources.
6806 */
6807 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6808 amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
6809
6810 /* Restrict waves for normal/low priority compute queues as well
6811 * to get best QoS for high priority compute jobs.
6812 *
6813 * amdgpu controls only 1st ME(0-3 CS pipes).
6814 */
6815 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6816 if (i != ring->pipe)
6817 gfx_v8_0_emit_wave_limit_cs(ring, i, enable);
6818
6819 }
6820
6821 }
6822
6823 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6824 .name = "gfx_v8_0",
6825 .early_init = gfx_v8_0_early_init,
6826 .late_init = gfx_v8_0_late_init,
6827 .sw_init = gfx_v8_0_sw_init,
6828 .sw_fini = gfx_v8_0_sw_fini,
6829 .hw_init = gfx_v8_0_hw_init,
6830 .hw_fini = gfx_v8_0_hw_fini,
6831 .suspend = gfx_v8_0_suspend,
6832 .resume = gfx_v8_0_resume,
6833 .is_idle = gfx_v8_0_is_idle,
6834 .wait_for_idle = gfx_v8_0_wait_for_idle,
6835 .check_soft_reset = gfx_v8_0_check_soft_reset,
6836 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6837 .soft_reset = gfx_v8_0_soft_reset,
6838 .post_soft_reset = gfx_v8_0_post_soft_reset,
6839 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6840 .set_powergating_state = gfx_v8_0_set_powergating_state,
6841 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6842 };
6843
6844 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6845 .type = AMDGPU_RING_TYPE_GFX,
6846 .align_mask = 0xff,
6847 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6848 .support_64bit_ptrs = false,
6849 .get_rptr = gfx_v8_0_ring_get_rptr,
6850 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6851 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6852 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6853 5 + /* COND_EXEC */
6854 7 + /* PIPELINE_SYNC */
6855 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6856 12 + /* FENCE for VM_FLUSH */
6857 20 + /* GDS switch */
6858 4 + /* double SWITCH_BUFFER,
6859 the first COND_EXEC jump to the place just
6860 prior to this double SWITCH_BUFFER */
6861 5 + /* COND_EXEC */
6862 7 + /* HDP_flush */
6863 4 + /* VGT_flush */
6864 14 + /* CE_META */
6865 31 + /* DE_META */
6866 3 + /* CNTX_CTRL */
6867 5 + /* HDP_INVL */
6868 12 + 12 + /* FENCE x2 */
6869 2 + /* SWITCH_BUFFER */
6870 5, /* SURFACE_SYNC */
6871 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6872 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6873 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6874 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6875 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6876 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6877 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6878 .test_ring = gfx_v8_0_ring_test_ring,
6879 .test_ib = gfx_v8_0_ring_test_ib,
6880 .insert_nop = amdgpu_ring_insert_nop,
6881 .pad_ib = amdgpu_ring_generic_pad_ib,
6882 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6883 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6884 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6885 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6886 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6887 .emit_mem_sync = gfx_v8_0_emit_mem_sync,
6888 };
6889
6890 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6891 .type = AMDGPU_RING_TYPE_COMPUTE,
6892 .align_mask = 0xff,
6893 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6894 .support_64bit_ptrs = false,
6895 .get_rptr = gfx_v8_0_ring_get_rptr,
6896 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6897 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6898 .emit_frame_size =
6899 20 + /* gfx_v8_0_ring_emit_gds_switch */
6900 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6901 5 + /* hdp_invalidate */
6902 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6903 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6904 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6905 7 + /* gfx_v8_0_emit_mem_sync_compute */
6906 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6907 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6908 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6909 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6910 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6911 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6912 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6913 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6914 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6915 .test_ring = gfx_v8_0_ring_test_ring,
6916 .test_ib = gfx_v8_0_ring_test_ib,
6917 .insert_nop = amdgpu_ring_insert_nop,
6918 .pad_ib = amdgpu_ring_generic_pad_ib,
6919 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6920 .soft_recovery = gfx_v8_0_ring_soft_recovery,
6921 .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
6922 .emit_wave_limit = gfx_v8_0_emit_wave_limit,
6923 };
6924
6925 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6926 .type = AMDGPU_RING_TYPE_KIQ,
6927 .align_mask = 0xff,
6928 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6929 .support_64bit_ptrs = false,
6930 .get_rptr = gfx_v8_0_ring_get_rptr,
6931 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6932 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6933 .emit_frame_size =
6934 20 + /* gfx_v8_0_ring_emit_gds_switch */
6935 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6936 5 + /* hdp_invalidate */
6937 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6938 17 + /* gfx_v8_0_ring_emit_vm_flush */
6939 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6940 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6941 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6942 .test_ring = gfx_v8_0_ring_test_ring,
6943 .insert_nop = amdgpu_ring_insert_nop,
6944 .pad_ib = amdgpu_ring_generic_pad_ib,
6945 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6946 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6947 };
6948
gfx_v8_0_set_ring_funcs(struct amdgpu_device * adev)6949 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6950 {
6951 int i;
6952
6953 adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6954
6955 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6956 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6957
6958 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6959 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6960 }
6961
6962 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6963 .set = gfx_v8_0_set_eop_interrupt_state,
6964 .process = gfx_v8_0_eop_irq,
6965 };
6966
6967 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6968 .set = gfx_v8_0_set_priv_reg_fault_state,
6969 .process = gfx_v8_0_priv_reg_irq,
6970 };
6971
6972 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6973 .set = gfx_v8_0_set_priv_inst_fault_state,
6974 .process = gfx_v8_0_priv_inst_irq,
6975 };
6976
6977 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
6978 .set = gfx_v8_0_set_cp_ecc_int_state,
6979 .process = gfx_v8_0_cp_ecc_error_irq,
6980 };
6981
6982 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
6983 .set = gfx_v8_0_set_sq_int_state,
6984 .process = gfx_v8_0_sq_irq,
6985 };
6986
gfx_v8_0_set_irq_funcs(struct amdgpu_device * adev)6987 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6988 {
6989 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6990 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6991
6992 adev->gfx.priv_reg_irq.num_types = 1;
6993 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6994
6995 adev->gfx.priv_inst_irq.num_types = 1;
6996 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6997
6998 adev->gfx.cp_ecc_error_irq.num_types = 1;
6999 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7000
7001 adev->gfx.sq_irq.num_types = 1;
7002 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7003 }
7004
gfx_v8_0_set_rlc_funcs(struct amdgpu_device * adev)7005 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7006 {
7007 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7008 }
7009
gfx_v8_0_set_gds_init(struct amdgpu_device * adev)7010 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7011 {
7012 /* init asci gds info */
7013 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7014 adev->gds.gws_size = 64;
7015 adev->gds.oa_size = 16;
7016 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7017 }
7018
gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7019 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7020 u32 bitmap)
7021 {
7022 u32 data;
7023
7024 if (!bitmap)
7025 return;
7026
7027 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7028 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7029
7030 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7031 }
7032
gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device * adev)7033 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7034 {
7035 u32 data, mask;
7036
7037 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7038 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7039
7040 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7041
7042 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7043 }
7044
gfx_v8_0_get_cu_info(struct amdgpu_device * adev)7045 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7046 {
7047 int i, j, k, counter, active_cu_number = 0;
7048 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7049 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7050 unsigned disable_masks[4 * 2];
7051 u32 ao_cu_num;
7052
7053 memset(cu_info, 0, sizeof(*cu_info));
7054
7055 if (adev->flags & AMD_IS_APU)
7056 ao_cu_num = 2;
7057 else
7058 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7059
7060 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7061
7062 mutex_lock(&adev->grbm_idx_mutex);
7063 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7064 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7065 mask = 1;
7066 ao_bitmap = 0;
7067 counter = 0;
7068 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7069 if (i < 4 && j < 2)
7070 gfx_v8_0_set_user_cu_inactive_bitmap(
7071 adev, disable_masks[i * 2 + j]);
7072 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7073 cu_info->bitmap[0][i][j] = bitmap;
7074
7075 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7076 if (bitmap & mask) {
7077 if (counter < ao_cu_num)
7078 ao_bitmap |= mask;
7079 counter ++;
7080 }
7081 mask <<= 1;
7082 }
7083 active_cu_number += counter;
7084 if (i < 2 && j < 2)
7085 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7086 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7087 }
7088 }
7089 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7090 mutex_unlock(&adev->grbm_idx_mutex);
7091
7092 cu_info->number = active_cu_number;
7093 cu_info->ao_cu_mask = ao_cu_mask;
7094 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7095 cu_info->max_waves_per_simd = 10;
7096 cu_info->max_scratch_slots_per_cu = 32;
7097 cu_info->wave_front_size = 64;
7098 cu_info->lds_size = 64;
7099 }
7100
7101 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7102 {
7103 .type = AMD_IP_BLOCK_TYPE_GFX,
7104 .major = 8,
7105 .minor = 0,
7106 .rev = 0,
7107 .funcs = &gfx_v8_0_ip_funcs,
7108 };
7109
7110 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7111 {
7112 .type = AMD_IP_BLOCK_TYPE_GFX,
7113 .major = 8,
7114 .minor = 1,
7115 .rev = 0,
7116 .funcs = &gfx_v8_0_ip_funcs,
7117 };
7118
gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring * ring)7119 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7120 {
7121 uint64_t ce_payload_addr;
7122 int cnt_ce;
7123 union {
7124 struct vi_ce_ib_state regular;
7125 struct vi_ce_ib_state_chained_ib chained;
7126 } ce_payload = {};
7127
7128 if (ring->adev->virt.chained_ib_support) {
7129 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7130 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7131 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7132 } else {
7133 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7134 offsetof(struct vi_gfx_meta_data, ce_payload);
7135 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7136 }
7137
7138 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7139 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7140 WRITE_DATA_DST_SEL(8) |
7141 WR_CONFIRM) |
7142 WRITE_DATA_CACHE_POLICY(0));
7143 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7144 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7145 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7146 }
7147
gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring * ring)7148 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7149 {
7150 uint64_t de_payload_addr, gds_addr, csa_addr;
7151 int cnt_de;
7152 union {
7153 struct vi_de_ib_state regular;
7154 struct vi_de_ib_state_chained_ib chained;
7155 } de_payload = {};
7156
7157 csa_addr = amdgpu_csa_vaddr(ring->adev);
7158 gds_addr = csa_addr + 4096;
7159 if (ring->adev->virt.chained_ib_support) {
7160 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7161 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7162 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7163 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7164 } else {
7165 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7166 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7167 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7168 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7169 }
7170
7171 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7172 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7173 WRITE_DATA_DST_SEL(8) |
7174 WR_CONFIRM) |
7175 WRITE_DATA_CACHE_POLICY(0));
7176 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7177 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7178 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7179 }
7180