1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59
60 #define GFX9_NUM_GFX_RINGS 1
61 #define GFX9_NUM_SW_GFX_RINGS 2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65
66 #define mmGCEA_PROBE_MAP 0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX 0
68
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134
135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
147
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
152
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 /* cp header registers */
229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 /* SE status registers */
235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 /* compute queue registers */
243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281
282 enum ta_ras_gfx_subblock {
283 /*CPC*/
284 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 TA_RAS_BLOCK__GFX_CPC_UCODE,
287 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 /* CPF*/
295 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 TA_RAS_BLOCK__GFX_CPF_TAG,
299 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 /* CPG*/
301 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 TA_RAS_BLOCK__GFX_CPG_TAG,
305 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 /* GDS*/
307 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 /* SPI*/
315 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 /* SQ*/
317 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 /* SQC (3 ranges)*/
324 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 /* SQC range 0*/
326 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 /* SQC range 1*/
338 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 /* SQC range 2*/
352 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 /* TA*/
367 TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 /* TCA*/
375 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 /* TCC (5 sub-ranges)*/
380 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 /* TCC range 0*/
382 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 /* TCC range 1*/
393 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 /* TCC range 2*/
399 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 /* TCC range 3*/
411 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 /* TCC range 4*/
417 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 /* TCI*/
425 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 /* TCP*/
427 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 /* TD*/
437 TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 /* EA (3 sub-ranges)*/
443 TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 /* EA range 0*/
445 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 /* EA range 1*/
456 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 /* EA range 2*/
466 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 /* UTC VM L2 bank*/
474 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 /* UTC VM walker*/
476 TA_RAS_BLOCK__UTC_VML2_WALKER,
477 /* UTC ATC L2 2MB cache*/
478 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 /* UTC ATC L2 4KB cache*/
480 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 TA_RAS_BLOCK__GFX_MAX
482 };
483
484 struct ras_gfx_subblock {
485 unsigned char *name;
486 int ta_subblock;
487 int hw_supported_error_type;
488 int sw_supported_error_type;
489 };
490
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
492 [AMDGPU_RAS_BLOCK__##subblock] = { \
493 #subblock, \
494 TA_RAS_BLOCK__##subblock, \
495 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
496 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
497 }
498
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 0),
518 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 0),
520 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 0, 0),
529 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 0),
531 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 0, 0),
533 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 0),
535 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 0, 0),
537 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 0),
539 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 1),
541 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 0, 0, 0),
543 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 0),
545 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 0),
547 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 0),
549 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 0),
551 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 0),
553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 0, 0),
555 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 0),
557 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 0),
559 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 0, 0, 0),
561 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 0),
563 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 0),
565 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 0),
567 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 0),
569 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 0),
571 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 0, 0),
573 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 0),
575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 1),
585 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 1),
587 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 1),
589 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 0),
591 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 0),
593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 0),
606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 0),
609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 0, 0),
611 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 0),
613 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899
gfx_v9_0_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 uint64_t queue_mask)
902 {
903 struct amdgpu_device *adev = kiq_ring->adev;
904 u64 shader_mc_addr;
905
906 /* Cleaner shader MC address */
907 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908
909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 amdgpu_ring_write(kiq_ring,
911 PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 /* vmid_mask:0* queue_type:0 (KIQ) */
913 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 amdgpu_ring_write(kiq_ring,
915 lower_32_bits(queue_mask)); /* queue mask lo */
916 amdgpu_ring_write(kiq_ring,
917 upper_32_bits(queue_mask)); /* queue mask hi */
918 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
921 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
922 }
923
gfx_v9_0_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 struct amdgpu_ring *ring)
926 {
927 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 uint64_t wptr_addr = ring->wptr_gpu_addr;
929 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930
931 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 /*queue_type: normal compute queue */
940 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 /* alloc format: all_on_one_pipe */
942 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 /* num_queues: must be 1 */
945 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 amdgpu_ring_write(kiq_ring,
947 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953
gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 struct amdgpu_ring *ring,
956 enum amdgpu_unmap_queues_action action,
957 u64 gpu_addr, u64 seq)
958 {
959 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960
961 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 PACKET3_UNMAP_QUEUES_ACTION(action) |
964 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 amdgpu_ring_write(kiq_ring,
968 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969
970 if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 amdgpu_ring_write(kiq_ring, 0);
973 amdgpu_ring_write(kiq_ring, 0);
974
975 } else {
976 amdgpu_ring_write(kiq_ring, 0);
977 amdgpu_ring_write(kiq_ring, 0);
978 amdgpu_ring_write(kiq_ring, 0);
979 }
980 }
981
gfx_v9_0_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 struct amdgpu_ring *ring,
984 u64 addr,
985 u64 seq)
986 {
987 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988
989 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 amdgpu_ring_write(kiq_ring,
991 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 PACKET3_QUERY_STATUS_COMMAND(2));
994 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 amdgpu_ring_write(kiq_ring,
996 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003
gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 uint16_t pasid, uint32_t flush_type,
1006 bool all_hub)
1007 {
1008 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 amdgpu_ring_write(kiq_ring,
1010 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015
1016
gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring * kiq_ring,uint32_t queue_type,uint32_t me_id,uint32_t pipe_id,uint32_t queue_id,uint32_t xcc_id,uint32_t vmid)1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 uint32_t xcc_id, uint32_t vmid)
1020 {
1021 struct amdgpu_device *adev = kiq_ring->adev;
1022 unsigned i;
1023
1024 /* enter save mode */
1025 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 mutex_lock(&adev->srbm_mutex);
1027 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028
1029 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 /* wait till dequeue take effects */
1033 for (i = 0; i < adev->usec_timeout; i++) {
1034 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 break;
1036 udelay(1);
1037 }
1038 if (i >= adev->usec_timeout)
1039 dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 } else {
1041 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 }
1043
1044 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 mutex_unlock(&adev->srbm_mutex);
1046 /* exit safe mode */
1047 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 .kiq_query_status = gfx_v9_0_kiq_query_status,
1055 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 .set_resources_size = 8,
1058 .map_queues_size = 7,
1059 .unmap_queues_size = 6,
1060 .query_status_size = 7,
1061 .invalidate_tlbs_size = 2,
1062 };
1063
gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068
gfx_v9_0_init_golden_registers(struct amdgpu_device * adev)1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 case IP_VERSION(9, 0, 1):
1073 soc15_program_register_sequence(adev,
1074 golden_settings_gc_9_0,
1075 ARRAY_SIZE(golden_settings_gc_9_0));
1076 soc15_program_register_sequence(adev,
1077 golden_settings_gc_9_0_vg10,
1078 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 break;
1080 case IP_VERSION(9, 2, 1):
1081 soc15_program_register_sequence(adev,
1082 golden_settings_gc_9_2_1,
1083 ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 soc15_program_register_sequence(adev,
1085 golden_settings_gc_9_2_1_vg12,
1086 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 break;
1088 case IP_VERSION(9, 4, 0):
1089 soc15_program_register_sequence(adev,
1090 golden_settings_gc_9_0,
1091 ARRAY_SIZE(golden_settings_gc_9_0));
1092 soc15_program_register_sequence(adev,
1093 golden_settings_gc_9_0_vg20,
1094 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 break;
1096 case IP_VERSION(9, 4, 1):
1097 soc15_program_register_sequence(adev,
1098 golden_settings_gc_9_4_1_arct,
1099 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 break;
1101 case IP_VERSION(9, 2, 2):
1102 case IP_VERSION(9, 1, 0):
1103 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 ARRAY_SIZE(golden_settings_gc_9_1));
1105 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 soc15_program_register_sequence(adev,
1107 golden_settings_gc_9_1_rv2,
1108 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 else
1110 soc15_program_register_sequence(adev,
1111 golden_settings_gc_9_1_rv1,
1112 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 break;
1114 case IP_VERSION(9, 3, 0):
1115 soc15_program_register_sequence(adev,
1116 golden_settings_gc_9_1_rn,
1117 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 return; /* for renoir, don't need common goldensetting */
1119 case IP_VERSION(9, 4, 2):
1120 gfx_v9_4_2_init_golden_registers(adev,
1121 adev->smuio.funcs->get_die_id(adev));
1122 break;
1123 default:
1124 break;
1125 }
1126
1127 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132
gfx_v9_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 bool wc, uint32_t reg, uint32_t val)
1135 {
1136 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 WRITE_DATA_DST_SEL(0) |
1139 (wc ? WR_CONFIRM : 0));
1140 amdgpu_ring_write(ring, reg);
1141 amdgpu_ring_write(ring, 0);
1142 amdgpu_ring_write(ring, val);
1143 }
1144
gfx_v9_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 int mem_space, int opt, uint32_t addr0,
1147 uint32_t addr1, uint32_t ref, uint32_t mask,
1148 uint32_t inv)
1149 {
1150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 amdgpu_ring_write(ring,
1152 /* memory (1) or register (0) */
1153 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1156 WAIT_REG_MEM_ENGINE(eng_sel)));
1157
1158 if (mem_space)
1159 BUG_ON(addr0 & 0x3); /* Dword align */
1160 amdgpu_ring_write(ring, addr0);
1161 amdgpu_ring_write(ring, addr1);
1162 amdgpu_ring_write(ring, ref);
1163 amdgpu_ring_write(ring, mask);
1164 amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166
gfx_v9_0_ring_test_ring(struct amdgpu_ring * ring)1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 struct amdgpu_device *adev = ring->adev;
1170 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 uint32_t tmp = 0;
1172 unsigned i;
1173 int r;
1174
1175 WREG32(scratch, 0xCAFEDEAD);
1176 r = amdgpu_ring_alloc(ring, 3);
1177 if (r)
1178 return r;
1179
1180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 amdgpu_ring_write(ring, 0xDEADBEEF);
1183 amdgpu_ring_commit(ring);
1184
1185 for (i = 0; i < adev->usec_timeout; i++) {
1186 tmp = RREG32(scratch);
1187 if (tmp == 0xDEADBEEF)
1188 break;
1189 udelay(1);
1190 }
1191
1192 if (i >= adev->usec_timeout)
1193 r = -ETIMEDOUT;
1194 return r;
1195 }
1196
gfx_v9_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 struct amdgpu_device *adev = ring->adev;
1200 struct amdgpu_ib ib;
1201 struct dma_fence *f = NULL;
1202
1203 unsigned index;
1204 uint64_t gpu_addr;
1205 uint32_t tmp;
1206 long r;
1207
1208 r = amdgpu_device_wb_get(adev, &index);
1209 if (r)
1210 return r;
1211
1212 gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 memset(&ib, 0, sizeof(ib));
1215
1216 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 if (r)
1218 goto err1;
1219
1220 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 ib.ptr[2] = lower_32_bits(gpu_addr);
1223 ib.ptr[3] = upper_32_bits(gpu_addr);
1224 ib.ptr[4] = 0xDEADBEEF;
1225 ib.length_dw = 5;
1226
1227 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 if (r)
1229 goto err2;
1230
1231 r = dma_fence_wait_timeout(f, false, timeout);
1232 if (r == 0) {
1233 r = -ETIMEDOUT;
1234 goto err2;
1235 } else if (r < 0) {
1236 goto err2;
1237 }
1238
1239 tmp = adev->wb.wb[index];
1240 if (tmp == 0xDEADBEEF)
1241 r = 0;
1242 else
1243 r = -EINVAL;
1244
1245 err2:
1246 amdgpu_ib_free(&ib, NULL);
1247 dma_fence_put(f);
1248 err1:
1249 amdgpu_device_wb_free(adev, index);
1250 return r;
1251 }
1252
1253
gfx_v9_0_free_microcode(struct amdgpu_device * adev)1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 amdgpu_ucode_release(&adev->gfx.me_fw);
1258 amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262
1263 kfree(adev->gfx.rlc.register_list_format);
1264 }
1265
gfx_v9_0_check_fw_write_wait(struct amdgpu_device * adev)1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 adev->gfx.me_fw_write_wait = false;
1269 adev->gfx.mec_fw_write_wait = false;
1270
1271 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273 (adev->gfx.mec_feature_version < 46) ||
1274 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275 (adev->gfx.pfp_feature_version < 46)))
1276 DRM_WARN_ONCE("CP firmware version too old, please update!");
1277
1278 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279 case IP_VERSION(9, 0, 1):
1280 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281 (adev->gfx.me_feature_version >= 42) &&
1282 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1283 (adev->gfx.pfp_feature_version >= 42))
1284 adev->gfx.me_fw_write_wait = true;
1285
1286 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1287 (adev->gfx.mec_feature_version >= 42))
1288 adev->gfx.mec_fw_write_wait = true;
1289 break;
1290 case IP_VERSION(9, 2, 1):
1291 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292 (adev->gfx.me_feature_version >= 44) &&
1293 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1294 (adev->gfx.pfp_feature_version >= 44))
1295 adev->gfx.me_fw_write_wait = true;
1296
1297 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1298 (adev->gfx.mec_feature_version >= 44))
1299 adev->gfx.mec_fw_write_wait = true;
1300 break;
1301 case IP_VERSION(9, 4, 0):
1302 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303 (adev->gfx.me_feature_version >= 44) &&
1304 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1305 (adev->gfx.pfp_feature_version >= 44))
1306 adev->gfx.me_fw_write_wait = true;
1307
1308 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1309 (adev->gfx.mec_feature_version >= 44))
1310 adev->gfx.mec_fw_write_wait = true;
1311 break;
1312 case IP_VERSION(9, 1, 0):
1313 case IP_VERSION(9, 2, 2):
1314 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315 (adev->gfx.me_feature_version >= 42) &&
1316 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1317 (adev->gfx.pfp_feature_version >= 42))
1318 adev->gfx.me_fw_write_wait = true;
1319
1320 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1321 (adev->gfx.mec_feature_version >= 42))
1322 adev->gfx.mec_fw_write_wait = true;
1323 break;
1324 default:
1325 adev->gfx.me_fw_write_wait = true;
1326 adev->gfx.mec_fw_write_wait = true;
1327 break;
1328 }
1329 }
1330
1331 struct amdgpu_gfxoff_quirk {
1332 u16 chip_vendor;
1333 u16 chip_device;
1334 u16 subsys_vendor;
1335 u16 subsys_device;
1336 u8 revision;
1337 };
1338
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348 /* https://bbs.openkylin.top/t/topic/171497 */
1349 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350 /* HP 705G4 DM with R5 2400G */
1351 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352 { 0, 0, 0, 0, 0 },
1353 };
1354
gfx_v9_0_should_disable_gfxoff(struct pci_dev * pdev)1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358
1359 while (p && p->chip_device != 0) {
1360 if (pdev->vendor == p->chip_vendor &&
1361 pdev->device == p->chip_device &&
1362 pdev->subsystem_vendor == p->subsys_vendor &&
1363 pdev->subsystem_device == p->subsys_device &&
1364 pdev->revision == p->revision) {
1365 return true;
1366 }
1367 ++p;
1368 }
1369 return false;
1370 }
1371
is_raven_kicker(struct amdgpu_device * adev)1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374 if (adev->pm.fw_version >= 0x41e2b)
1375 return true;
1376 else
1377 return false;
1378 }
1379
check_if_enlarge_doorbell_range(struct amdgpu_device * adev)1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383 (adev->gfx.me_fw_version >= 0x000000a5) &&
1384 (adev->gfx.me_feature_version >= 52))
1385 return true;
1386 else
1387 return false;
1388 }
1389
gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device * adev)1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394
1395 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396 case IP_VERSION(9, 0, 1):
1397 case IP_VERSION(9, 2, 1):
1398 case IP_VERSION(9, 4, 0):
1399 break;
1400 case IP_VERSION(9, 2, 2):
1401 case IP_VERSION(9, 1, 0):
1402 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404 ((!is_raven_kicker(adev) &&
1405 adev->gfx.rlc_fw_version < 531) ||
1406 (adev->gfx.rlc_feature_version < 1) ||
1407 !adev->gfx.rlc.is_rlc_v2_1))
1408 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409
1410 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412 AMD_PG_SUPPORT_CP |
1413 AMD_PG_SUPPORT_RLC_SMU_HS;
1414 break;
1415 case IP_VERSION(9, 3, 0):
1416 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418 AMD_PG_SUPPORT_CP |
1419 AMD_PG_SUPPORT_RLC_SMU_HS;
1420 break;
1421 default:
1422 break;
1423 }
1424 }
1425
gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device * adev,char * chip_name)1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427 char *chip_name)
1428 {
1429 int err;
1430
1431 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432 AMDGPU_UCODE_REQUIRED,
1433 "amdgpu/%s_pfp.bin", chip_name);
1434 if (err)
1435 goto out;
1436 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1437
1438 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1439 AMDGPU_UCODE_REQUIRED,
1440 "amdgpu/%s_me.bin", chip_name);
1441 if (err)
1442 goto out;
1443 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1444
1445 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1446 AMDGPU_UCODE_REQUIRED,
1447 "amdgpu/%s_ce.bin", chip_name);
1448 if (err)
1449 goto out;
1450 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1451
1452 out:
1453 if (err) {
1454 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1455 amdgpu_ucode_release(&adev->gfx.me_fw);
1456 amdgpu_ucode_release(&adev->gfx.ce_fw);
1457 }
1458 return err;
1459 }
1460
gfx_v9_0_init_rlc_microcode(struct amdgpu_device * adev,char * chip_name)1461 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1462 char *chip_name)
1463 {
1464 int err;
1465 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1466 uint16_t version_major;
1467 uint16_t version_minor;
1468 uint32_t smu_version;
1469
1470 /*
1471 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1472 * instead of picasso_rlc.bin.
1473 * Judgment method:
1474 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1475 * or revision >= 0xD8 && revision <= 0xDF
1476 * otherwise is PCO FP5
1477 */
1478 if (!strcmp(chip_name, "picasso") &&
1479 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1480 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1481 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1482 AMDGPU_UCODE_REQUIRED,
1483 "amdgpu/%s_rlc_am4.bin", chip_name);
1484 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1485 (smu_version >= 0x41e2b))
1486 /**
1487 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1488 */
1489 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1490 AMDGPU_UCODE_REQUIRED,
1491 "amdgpu/%s_kicker_rlc.bin", chip_name);
1492 else
1493 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1494 AMDGPU_UCODE_REQUIRED,
1495 "amdgpu/%s_rlc.bin", chip_name);
1496 if (err)
1497 goto out;
1498
1499 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1500 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1501 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1502 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1503 out:
1504 if (err)
1505 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1506
1507 return err;
1508 }
1509
gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device * adev)1510 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1511 {
1512 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1513 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1514 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1515 return false;
1516
1517 return true;
1518 }
1519
gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device * adev,char * chip_name)1520 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1521 char *chip_name)
1522 {
1523 int err;
1524
1525 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1526 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1527 AMDGPU_UCODE_REQUIRED,
1528 "amdgpu/%s_sjt_mec.bin", chip_name);
1529 else
1530 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1531 AMDGPU_UCODE_REQUIRED,
1532 "amdgpu/%s_mec.bin", chip_name);
1533 if (err)
1534 goto out;
1535
1536 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1537 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1538
1539 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1540 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1541 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1542 AMDGPU_UCODE_REQUIRED,
1543 "amdgpu/%s_sjt_mec2.bin", chip_name);
1544 else
1545 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1546 AMDGPU_UCODE_REQUIRED,
1547 "amdgpu/%s_mec2.bin", chip_name);
1548 if (!err) {
1549 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1550 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1551 } else {
1552 err = 0;
1553 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1554 }
1555 } else {
1556 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1557 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1558 }
1559
1560 gfx_v9_0_check_if_need_gfxoff(adev);
1561 gfx_v9_0_check_fw_write_wait(adev);
1562
1563 out:
1564 if (err)
1565 amdgpu_ucode_release(&adev->gfx.mec_fw);
1566 return err;
1567 }
1568
gfx_v9_0_init_microcode(struct amdgpu_device * adev)1569 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1570 {
1571 char ucode_prefix[30];
1572 int r;
1573
1574 DRM_DEBUG("\n");
1575 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1576
1577 /* No CPG in Arcturus */
1578 if (adev->gfx.num_gfx_rings) {
1579 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1580 if (r)
1581 return r;
1582 }
1583
1584 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1585 if (r)
1586 return r;
1587
1588 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1589 if (r)
1590 return r;
1591
1592 return r;
1593 }
1594
gfx_v9_0_get_csb_size(struct amdgpu_device * adev)1595 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1596 {
1597 u32 count = 0;
1598 const struct cs_section_def *sect = NULL;
1599 const struct cs_extent_def *ext = NULL;
1600
1601 /* begin clear state */
1602 count += 2;
1603 /* context control state */
1604 count += 3;
1605
1606 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1607 for (ext = sect->section; ext->extent != NULL; ++ext) {
1608 if (sect->id == SECT_CONTEXT)
1609 count += 2 + ext->reg_count;
1610 else
1611 return 0;
1612 }
1613 }
1614
1615 /* end clear state */
1616 count += 2;
1617 /* clear state */
1618 count += 2;
1619
1620 return count;
1621 }
1622
gfx_v9_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1623 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1624 volatile u32 *buffer)
1625 {
1626 u32 count = 0, i;
1627 const struct cs_section_def *sect = NULL;
1628 const struct cs_extent_def *ext = NULL;
1629
1630 if (adev->gfx.rlc.cs_data == NULL)
1631 return;
1632 if (buffer == NULL)
1633 return;
1634
1635 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1636 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1637
1638 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1639 buffer[count++] = cpu_to_le32(0x80000000);
1640 buffer[count++] = cpu_to_le32(0x80000000);
1641
1642 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1643 for (ext = sect->section; ext->extent != NULL; ++ext) {
1644 if (sect->id == SECT_CONTEXT) {
1645 buffer[count++] =
1646 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1647 buffer[count++] = cpu_to_le32(ext->reg_index -
1648 PACKET3_SET_CONTEXT_REG_START);
1649 for (i = 0; i < ext->reg_count; i++)
1650 buffer[count++] = cpu_to_le32(ext->extent[i]);
1651 } else {
1652 return;
1653 }
1654 }
1655 }
1656
1657 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1658 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1659
1660 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1661 buffer[count++] = cpu_to_le32(0);
1662 }
1663
gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device * adev)1664 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1665 {
1666 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1667 uint32_t pg_always_on_cu_num = 2;
1668 uint32_t always_on_cu_num;
1669 uint32_t i, j, k;
1670 uint32_t mask, cu_bitmap, counter;
1671
1672 if (adev->flags & AMD_IS_APU)
1673 always_on_cu_num = 4;
1674 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1675 always_on_cu_num = 8;
1676 else
1677 always_on_cu_num = 12;
1678
1679 mutex_lock(&adev->grbm_idx_mutex);
1680 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1681 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1682 mask = 1;
1683 cu_bitmap = 0;
1684 counter = 0;
1685 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1686
1687 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1688 if (cu_info->bitmap[0][i][j] & mask) {
1689 if (counter == pg_always_on_cu_num)
1690 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1691 if (counter < always_on_cu_num)
1692 cu_bitmap |= mask;
1693 else
1694 break;
1695 counter++;
1696 }
1697 mask <<= 1;
1698 }
1699
1700 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1701 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1702 }
1703 }
1704 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1705 mutex_unlock(&adev->grbm_idx_mutex);
1706 }
1707
gfx_v9_0_init_lbpw(struct amdgpu_device * adev)1708 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1709 {
1710 uint32_t data;
1711
1712 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1713 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1717
1718 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1719 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1720
1721 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1722 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1723
1724 mutex_lock(&adev->grbm_idx_mutex);
1725 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1726 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1727 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1728
1729 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1730 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1731 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1732 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1733 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1734
1735 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1736 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1737 data &= 0x0000FFFF;
1738 data |= 0x00C00000;
1739 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1740
1741 /*
1742 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1743 * programmed in gfx_v9_0_init_always_on_cu_mask()
1744 */
1745
1746 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1747 * but used for RLC_LB_CNTL configuration */
1748 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1749 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1750 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1751 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1752 mutex_unlock(&adev->grbm_idx_mutex);
1753
1754 gfx_v9_0_init_always_on_cu_mask(adev);
1755 }
1756
gfx_v9_4_init_lbpw(struct amdgpu_device * adev)1757 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1758 {
1759 uint32_t data;
1760
1761 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1762 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1763 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1765 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1766
1767 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1768 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1769
1770 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1771 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1772
1773 mutex_lock(&adev->grbm_idx_mutex);
1774 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1775 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1776 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1777
1778 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1779 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1780 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1781 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1782 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1783
1784 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1785 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1786 data &= 0x0000FFFF;
1787 data |= 0x00C00000;
1788 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1789
1790 /*
1791 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1792 * programmed in gfx_v9_0_init_always_on_cu_mask()
1793 */
1794
1795 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1796 * but used for RLC_LB_CNTL configuration */
1797 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1798 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1799 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1800 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1801 mutex_unlock(&adev->grbm_idx_mutex);
1802
1803 gfx_v9_0_init_always_on_cu_mask(adev);
1804 }
1805
gfx_v9_0_enable_lbpw(struct amdgpu_device * adev,bool enable)1806 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1807 {
1808 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1809 }
1810
gfx_v9_0_cp_jump_table_num(struct amdgpu_device * adev)1811 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1812 {
1813 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1814 return 5;
1815 else
1816 return 4;
1817 }
1818
gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)1819 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1820 {
1821 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1822
1823 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1824 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1825 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1826 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1827 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1828 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1829 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1830 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1831 adev->gfx.rlc.rlcg_reg_access_supported = true;
1832 }
1833
gfx_v9_0_rlc_init(struct amdgpu_device * adev)1834 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1835 {
1836 const struct cs_section_def *cs_data;
1837 int r;
1838
1839 adev->gfx.rlc.cs_data = gfx9_cs_data;
1840
1841 cs_data = adev->gfx.rlc.cs_data;
1842
1843 if (cs_data) {
1844 /* init clear state block */
1845 r = amdgpu_gfx_rlc_init_csb(adev);
1846 if (r)
1847 return r;
1848 }
1849
1850 if (adev->flags & AMD_IS_APU) {
1851 /* TODO: double check the cp_table_size for RV */
1852 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1853 r = amdgpu_gfx_rlc_init_cpt(adev);
1854 if (r)
1855 return r;
1856 }
1857
1858 return 0;
1859 }
1860
gfx_v9_0_mec_fini(struct amdgpu_device * adev)1861 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1862 {
1863 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1864 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1865 }
1866
gfx_v9_0_mec_init(struct amdgpu_device * adev)1867 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1868 {
1869 int r;
1870 u32 *hpd;
1871 const __le32 *fw_data;
1872 unsigned fw_size;
1873 u32 *fw;
1874 size_t mec_hpd_size;
1875
1876 const struct gfx_firmware_header_v1_0 *mec_hdr;
1877
1878 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1879
1880 /* take ownership of the relevant compute queues */
1881 amdgpu_gfx_compute_queue_acquire(adev);
1882 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1883 if (mec_hpd_size) {
1884 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1885 AMDGPU_GEM_DOMAIN_VRAM |
1886 AMDGPU_GEM_DOMAIN_GTT,
1887 &adev->gfx.mec.hpd_eop_obj,
1888 &adev->gfx.mec.hpd_eop_gpu_addr,
1889 (void **)&hpd);
1890 if (r) {
1891 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1892 gfx_v9_0_mec_fini(adev);
1893 return r;
1894 }
1895
1896 memset(hpd, 0, mec_hpd_size);
1897
1898 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1899 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1900 }
1901
1902 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1903
1904 fw_data = (const __le32 *)
1905 (adev->gfx.mec_fw->data +
1906 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1907 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1908
1909 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1910 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1911 &adev->gfx.mec.mec_fw_obj,
1912 &adev->gfx.mec.mec_fw_gpu_addr,
1913 (void **)&fw);
1914 if (r) {
1915 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1916 gfx_v9_0_mec_fini(adev);
1917 return r;
1918 }
1919
1920 memcpy(fw, fw_data, fw_size);
1921
1922 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1923 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1924
1925 return 0;
1926 }
1927
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)1928 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1929 {
1930 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1931 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1932 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1933 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1934 (SQ_IND_INDEX__FORCE_READ_MASK));
1935 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1936 }
1937
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)1938 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1939 uint32_t wave, uint32_t thread,
1940 uint32_t regno, uint32_t num, uint32_t *out)
1941 {
1942 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1943 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1944 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1945 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1946 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1947 (SQ_IND_INDEX__FORCE_READ_MASK) |
1948 (SQ_IND_INDEX__AUTO_INCR_MASK));
1949 while (num--)
1950 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1951 }
1952
gfx_v9_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)1953 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1954 {
1955 /* type 1 wave data */
1956 dst[(*no_fields)++] = 1;
1957 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1965 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1966 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1967 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1968 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1969 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1970 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1971 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1972 }
1973
gfx_v9_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1974 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1975 uint32_t wave, uint32_t start,
1976 uint32_t size, uint32_t *dst)
1977 {
1978 wave_read_regs(
1979 adev, simd, wave, 0,
1980 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1981 }
1982
gfx_v9_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1983 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1984 uint32_t wave, uint32_t thread,
1985 uint32_t start, uint32_t size,
1986 uint32_t *dst)
1987 {
1988 wave_read_regs(
1989 adev, simd, wave, thread,
1990 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1991 }
1992
gfx_v9_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1993 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1994 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1995 {
1996 soc15_grbm_select(adev, me, pipe, q, vm, 0);
1997 }
1998
1999 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2000 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2001 .select_se_sh = &gfx_v9_0_select_se_sh,
2002 .read_wave_data = &gfx_v9_0_read_wave_data,
2003 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2004 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2005 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2006 };
2007
2008 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
2009 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2010 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2011 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2012 };
2013
2014 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2015 .ras_block = {
2016 .hw_ops = &gfx_v9_0_ras_ops,
2017 },
2018 };
2019
gfx_v9_0_gpu_early_init(struct amdgpu_device * adev)2020 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2021 {
2022 u32 gb_addr_config;
2023 int err;
2024
2025 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2026 case IP_VERSION(9, 0, 1):
2027 adev->gfx.config.max_hw_contexts = 8;
2028 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2029 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2030 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2031 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2032 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2033 break;
2034 case IP_VERSION(9, 2, 1):
2035 adev->gfx.config.max_hw_contexts = 8;
2036 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2041 DRM_INFO("fix gfx.config for vega12\n");
2042 break;
2043 case IP_VERSION(9, 4, 0):
2044 adev->gfx.ras = &gfx_v9_0_ras;
2045 adev->gfx.config.max_hw_contexts = 8;
2046 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2047 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2048 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2049 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2050 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2051 gb_addr_config &= ~0xf3e777ff;
2052 gb_addr_config |= 0x22014042;
2053 /* check vbios table if gpu info is not available */
2054 err = amdgpu_atomfirmware_get_gfx_info(adev);
2055 if (err)
2056 return err;
2057 break;
2058 case IP_VERSION(9, 2, 2):
2059 case IP_VERSION(9, 1, 0):
2060 adev->gfx.config.max_hw_contexts = 8;
2061 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2062 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2063 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2064 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2065 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2066 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2067 else
2068 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2069 break;
2070 case IP_VERSION(9, 4, 1):
2071 adev->gfx.ras = &gfx_v9_4_ras;
2072 adev->gfx.config.max_hw_contexts = 8;
2073 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2076 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078 gb_addr_config &= ~0xf3e777ff;
2079 gb_addr_config |= 0x22014042;
2080 break;
2081 case IP_VERSION(9, 3, 0):
2082 adev->gfx.config.max_hw_contexts = 8;
2083 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2084 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2085 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2086 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2087 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2088 gb_addr_config &= ~0xf3e777ff;
2089 gb_addr_config |= 0x22010042;
2090 break;
2091 case IP_VERSION(9, 4, 2):
2092 adev->gfx.ras = &gfx_v9_4_2_ras;
2093 adev->gfx.config.max_hw_contexts = 8;
2094 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2095 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2096 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2097 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2098 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2099 gb_addr_config &= ~0xf3e777ff;
2100 gb_addr_config |= 0x22014042;
2101 /* check vbios table if gpu info is not available */
2102 err = amdgpu_atomfirmware_get_gfx_info(adev);
2103 if (err)
2104 return err;
2105 break;
2106 default:
2107 BUG();
2108 break;
2109 }
2110
2111 adev->gfx.config.gb_addr_config = gb_addr_config;
2112
2113 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2114 REG_GET_FIELD(
2115 adev->gfx.config.gb_addr_config,
2116 GB_ADDR_CONFIG,
2117 NUM_PIPES);
2118
2119 adev->gfx.config.max_tile_pipes =
2120 adev->gfx.config.gb_addr_config_fields.num_pipes;
2121
2122 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2123 REG_GET_FIELD(
2124 adev->gfx.config.gb_addr_config,
2125 GB_ADDR_CONFIG,
2126 NUM_BANKS);
2127 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2128 REG_GET_FIELD(
2129 adev->gfx.config.gb_addr_config,
2130 GB_ADDR_CONFIG,
2131 MAX_COMPRESSED_FRAGS);
2132 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2133 REG_GET_FIELD(
2134 adev->gfx.config.gb_addr_config,
2135 GB_ADDR_CONFIG,
2136 NUM_RB_PER_SE);
2137 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2138 REG_GET_FIELD(
2139 adev->gfx.config.gb_addr_config,
2140 GB_ADDR_CONFIG,
2141 NUM_SHADER_ENGINES);
2142 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2143 REG_GET_FIELD(
2144 adev->gfx.config.gb_addr_config,
2145 GB_ADDR_CONFIG,
2146 PIPE_INTERLEAVE_SIZE));
2147
2148 return 0;
2149 }
2150
gfx_v9_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)2151 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2152 int mec, int pipe, int queue)
2153 {
2154 unsigned irq_type;
2155 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2156 unsigned int hw_prio;
2157
2158 ring = &adev->gfx.compute_ring[ring_id];
2159
2160 /* mec0 is me1 */
2161 ring->me = mec + 1;
2162 ring->pipe = pipe;
2163 ring->queue = queue;
2164
2165 ring->ring_obj = NULL;
2166 ring->use_doorbell = true;
2167 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2168 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2169 + (ring_id * GFX9_MEC_HPD_SIZE);
2170 ring->vm_hub = AMDGPU_GFXHUB(0);
2171 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2172
2173 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2174 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2175 + ring->pipe;
2176 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2177 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2178 /* type-2 packets are deprecated on MEC, use type-3 instead */
2179 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2180 hw_prio, NULL);
2181 }
2182
gfx_v9_0_alloc_ip_dump(struct amdgpu_device * adev)2183 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2184 {
2185 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2186 uint32_t *ptr;
2187 uint32_t inst;
2188
2189 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2190 if (!ptr) {
2191 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2192 adev->gfx.ip_dump_core = NULL;
2193 } else {
2194 adev->gfx.ip_dump_core = ptr;
2195 }
2196
2197 /* Allocate memory for compute queue registers for all the instances */
2198 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2199 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2200 adev->gfx.mec.num_queue_per_pipe;
2201
2202 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2203 if (!ptr) {
2204 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2205 adev->gfx.ip_dump_compute_queues = NULL;
2206 } else {
2207 adev->gfx.ip_dump_compute_queues = ptr;
2208 }
2209 }
2210
gfx_v9_0_sw_init(struct amdgpu_ip_block * ip_block)2211 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2212 {
2213 int i, j, k, r, ring_id;
2214 int xcc_id = 0;
2215 struct amdgpu_ring *ring;
2216 struct amdgpu_device *adev = ip_block->adev;
2217 unsigned int hw_prio;
2218
2219 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2220 case IP_VERSION(9, 0, 1):
2221 case IP_VERSION(9, 2, 1):
2222 case IP_VERSION(9, 4, 0):
2223 case IP_VERSION(9, 2, 2):
2224 case IP_VERSION(9, 1, 0):
2225 case IP_VERSION(9, 4, 1):
2226 case IP_VERSION(9, 3, 0):
2227 case IP_VERSION(9, 4, 2):
2228 adev->gfx.mec.num_mec = 2;
2229 break;
2230 default:
2231 adev->gfx.mec.num_mec = 1;
2232 break;
2233 }
2234
2235 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2236 case IP_VERSION(9, 4, 2):
2237 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2238 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2239 if (adev->gfx.mec_fw_version >= 88) {
2240 adev->gfx.enable_cleaner_shader = true;
2241 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2242 if (r) {
2243 adev->gfx.enable_cleaner_shader = false;
2244 dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2245 }
2246 }
2247 break;
2248 default:
2249 adev->gfx.enable_cleaner_shader = false;
2250 break;
2251 }
2252
2253 adev->gfx.mec.num_pipe_per_mec = 4;
2254 adev->gfx.mec.num_queue_per_pipe = 8;
2255
2256 /* EOP Event */
2257 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2258 if (r)
2259 return r;
2260
2261 /* Bad opcode Event */
2262 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2263 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2264 &adev->gfx.bad_op_irq);
2265 if (r)
2266 return r;
2267
2268 /* Privileged reg */
2269 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2270 &adev->gfx.priv_reg_irq);
2271 if (r)
2272 return r;
2273
2274 /* Privileged inst */
2275 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2276 &adev->gfx.priv_inst_irq);
2277 if (r)
2278 return r;
2279
2280 /* ECC error */
2281 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2282 &adev->gfx.cp_ecc_error_irq);
2283 if (r)
2284 return r;
2285
2286 /* FUE error */
2287 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2288 &adev->gfx.cp_ecc_error_irq);
2289 if (r)
2290 return r;
2291
2292 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2293
2294 if (adev->gfx.rlc.funcs) {
2295 if (adev->gfx.rlc.funcs->init) {
2296 r = adev->gfx.rlc.funcs->init(adev);
2297 if (r) {
2298 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2299 return r;
2300 }
2301 }
2302 }
2303
2304 r = gfx_v9_0_mec_init(adev);
2305 if (r) {
2306 DRM_ERROR("Failed to init MEC BOs!\n");
2307 return r;
2308 }
2309
2310 /* set up the gfx ring */
2311 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2312 ring = &adev->gfx.gfx_ring[i];
2313 ring->ring_obj = NULL;
2314 if (!i)
2315 sprintf(ring->name, "gfx");
2316 else
2317 sprintf(ring->name, "gfx_%d", i);
2318 ring->use_doorbell = true;
2319 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2320
2321 /* disable scheduler on the real ring */
2322 ring->no_scheduler = adev->gfx.mcbp;
2323 ring->vm_hub = AMDGPU_GFXHUB(0);
2324 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2325 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2326 AMDGPU_RING_PRIO_DEFAULT, NULL);
2327 if (r)
2328 return r;
2329 }
2330
2331 /* set up the software rings */
2332 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2333 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2334 ring = &adev->gfx.sw_gfx_ring[i];
2335 ring->ring_obj = NULL;
2336 sprintf(ring->name, amdgpu_sw_ring_name(i));
2337 ring->use_doorbell = true;
2338 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2339 ring->is_sw_ring = true;
2340 hw_prio = amdgpu_sw_ring_priority(i);
2341 ring->vm_hub = AMDGPU_GFXHUB(0);
2342 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2343 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2344 NULL);
2345 if (r)
2346 return r;
2347 ring->wptr = 0;
2348 }
2349
2350 /* init the muxer and add software rings */
2351 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2352 GFX9_NUM_SW_GFX_RINGS);
2353 if (r) {
2354 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2355 return r;
2356 }
2357 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2358 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2359 &adev->gfx.sw_gfx_ring[i]);
2360 if (r) {
2361 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2362 return r;
2363 }
2364 }
2365 }
2366
2367 /* set up the compute queues - allocate horizontally across pipes */
2368 ring_id = 0;
2369 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2370 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2371 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2372 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2373 k, j))
2374 continue;
2375
2376 r = gfx_v9_0_compute_ring_init(adev,
2377 ring_id,
2378 i, k, j);
2379 if (r)
2380 return r;
2381
2382 ring_id++;
2383 }
2384 }
2385 }
2386
2387 /* TODO: Add queue reset mask when FW fully supports it */
2388 adev->gfx.gfx_supported_reset =
2389 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2390 adev->gfx.compute_supported_reset =
2391 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2392
2393 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2394 if (r) {
2395 DRM_ERROR("Failed to init KIQ BOs!\n");
2396 return r;
2397 }
2398
2399 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2400 if (r)
2401 return r;
2402
2403 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2404 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2405 if (r)
2406 return r;
2407
2408 adev->gfx.ce_ram_size = 0x8000;
2409
2410 r = gfx_v9_0_gpu_early_init(adev);
2411 if (r)
2412 return r;
2413
2414 if (amdgpu_gfx_ras_sw_init(adev)) {
2415 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2416 return -EINVAL;
2417 }
2418
2419 gfx_v9_0_alloc_ip_dump(adev);
2420
2421 r = amdgpu_gfx_sysfs_init(adev);
2422 if (r)
2423 return r;
2424
2425 return 0;
2426 }
2427
2428
gfx_v9_0_sw_fini(struct amdgpu_ip_block * ip_block)2429 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2430 {
2431 int i;
2432 struct amdgpu_device *adev = ip_block->adev;
2433
2434 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2435 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2436 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2437 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2438 }
2439
2440 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2441 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2442 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2443 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2444
2445 amdgpu_gfx_mqd_sw_fini(adev, 0);
2446 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2447 amdgpu_gfx_kiq_fini(adev, 0);
2448
2449 amdgpu_gfx_cleaner_shader_sw_fini(adev);
2450
2451 gfx_v9_0_mec_fini(adev);
2452 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2453 &adev->gfx.rlc.clear_state_gpu_addr,
2454 (void **)&adev->gfx.rlc.cs_ptr);
2455 if (adev->flags & AMD_IS_APU) {
2456 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2457 &adev->gfx.rlc.cp_table_gpu_addr,
2458 (void **)&adev->gfx.rlc.cp_table_ptr);
2459 }
2460 gfx_v9_0_free_microcode(adev);
2461
2462 amdgpu_gfx_sysfs_fini(adev);
2463
2464 kfree(adev->gfx.ip_dump_core);
2465 kfree(adev->gfx.ip_dump_compute_queues);
2466
2467 return 0;
2468 }
2469
2470
gfx_v9_0_tiling_mode_table_init(struct amdgpu_device * adev)2471 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2472 {
2473 /* TODO */
2474 }
2475
gfx_v9_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)2476 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2477 u32 instance, int xcc_id)
2478 {
2479 u32 data;
2480
2481 if (instance == 0xffffffff)
2482 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2483 else
2484 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2485
2486 if (se_num == 0xffffffff)
2487 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2488 else
2489 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2490
2491 if (sh_num == 0xffffffff)
2492 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2493 else
2494 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2495
2496 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2497 }
2498
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device * adev)2499 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2500 {
2501 u32 data, mask;
2502
2503 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2504 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2505
2506 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2507 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2508
2509 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2510 adev->gfx.config.max_sh_per_se);
2511
2512 return (~data) & mask;
2513 }
2514
gfx_v9_0_setup_rb(struct amdgpu_device * adev)2515 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2516 {
2517 int i, j;
2518 u32 data;
2519 u32 active_rbs = 0;
2520 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2521 adev->gfx.config.max_sh_per_se;
2522
2523 mutex_lock(&adev->grbm_idx_mutex);
2524 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2525 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2526 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2527 data = gfx_v9_0_get_rb_active_bitmap(adev);
2528 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2529 rb_bitmap_width_per_sh);
2530 }
2531 }
2532 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2533 mutex_unlock(&adev->grbm_idx_mutex);
2534
2535 adev->gfx.config.backend_enable_mask = active_rbs;
2536 adev->gfx.config.num_rbs = hweight32(active_rbs);
2537 }
2538
gfx_v9_0_debug_trap_config_init(struct amdgpu_device * adev,uint32_t first_vmid,uint32_t last_vmid)2539 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2540 uint32_t first_vmid,
2541 uint32_t last_vmid)
2542 {
2543 uint32_t data;
2544 uint32_t trap_config_vmid_mask = 0;
2545 int i;
2546
2547 /* Calculate trap config vmid mask */
2548 for (i = first_vmid; i < last_vmid; i++)
2549 trap_config_vmid_mask |= (1 << i);
2550
2551 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2552 VMID_SEL, trap_config_vmid_mask);
2553 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2554 TRAP_EN, 1);
2555 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2556 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2557
2558 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2559 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2560 }
2561
2562 #define DEFAULT_SH_MEM_BASES (0x6000)
gfx_v9_0_init_compute_vmid(struct amdgpu_device * adev)2563 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2564 {
2565 int i;
2566 uint32_t sh_mem_config;
2567 uint32_t sh_mem_bases;
2568
2569 /*
2570 * Configure apertures:
2571 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2572 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2573 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2574 */
2575 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2576
2577 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2578 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2579 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2580
2581 mutex_lock(&adev->srbm_mutex);
2582 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2583 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2584 /* CP and shaders */
2585 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2586 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2587 }
2588 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2589 mutex_unlock(&adev->srbm_mutex);
2590
2591 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2592 access. These should be enabled by FW for target VMIDs. */
2593 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2594 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2598 }
2599 }
2600
gfx_v9_0_init_gds_vmid(struct amdgpu_device * adev)2601 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2602 {
2603 int vmid;
2604
2605 /*
2606 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2607 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2608 * the driver can enable them for graphics. VMID0 should maintain
2609 * access so that HWS firmware can save/restore entries.
2610 */
2611 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2612 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2613 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2614 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2615 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2616 }
2617 }
2618
gfx_v9_0_init_sq_config(struct amdgpu_device * adev)2619 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2620 {
2621 uint32_t tmp;
2622
2623 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2624 case IP_VERSION(9, 4, 1):
2625 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2626 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2627 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2628 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2629 break;
2630 default:
2631 break;
2632 }
2633 }
2634
gfx_v9_0_constants_init(struct amdgpu_device * adev)2635 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2636 {
2637 u32 tmp;
2638 int i;
2639
2640 if (!amdgpu_sriov_vf(adev) ||
2641 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) {
2642 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2643 }
2644
2645 gfx_v9_0_tiling_mode_table_init(adev);
2646
2647 if (adev->gfx.num_gfx_rings)
2648 gfx_v9_0_setup_rb(adev);
2649 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2650 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2651
2652 /* XXX SH_MEM regs */
2653 /* where to put LDS, scratch, GPUVM in FSA64 space */
2654 mutex_lock(&adev->srbm_mutex);
2655 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2656 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2657 /* CP and shaders */
2658 if (i == 0) {
2659 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2660 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2661 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2662 !!adev->gmc.noretry);
2663 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2664 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2665 } else {
2666 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2667 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2668 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2669 !!adev->gmc.noretry);
2670 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2671 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2672 (adev->gmc.private_aperture_start >> 48));
2673 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2674 (adev->gmc.shared_aperture_start >> 48));
2675 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2676 }
2677 }
2678 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2679
2680 mutex_unlock(&adev->srbm_mutex);
2681
2682 gfx_v9_0_init_compute_vmid(adev);
2683 gfx_v9_0_init_gds_vmid(adev);
2684 gfx_v9_0_init_sq_config(adev);
2685 }
2686
gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device * adev)2687 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2688 {
2689 u32 i, j, k;
2690 u32 mask;
2691
2692 mutex_lock(&adev->grbm_idx_mutex);
2693 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2694 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2695 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2696 for (k = 0; k < adev->usec_timeout; k++) {
2697 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2698 break;
2699 udelay(1);
2700 }
2701 if (k == adev->usec_timeout) {
2702 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2703 0xffffffff, 0xffffffff, 0);
2704 mutex_unlock(&adev->grbm_idx_mutex);
2705 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2706 i, j);
2707 return;
2708 }
2709 }
2710 }
2711 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2712 mutex_unlock(&adev->grbm_idx_mutex);
2713
2714 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2715 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2716 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2717 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2718 for (k = 0; k < adev->usec_timeout; k++) {
2719 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2720 break;
2721 udelay(1);
2722 }
2723 }
2724
gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2725 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2726 bool enable)
2727 {
2728 u32 tmp;
2729
2730 /* These interrupts should be enabled to drive DS clock */
2731
2732 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2733
2734 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2735 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2736 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2737 if (adev->gfx.num_gfx_rings)
2738 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2739
2740 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2741 }
2742
gfx_v9_0_init_csb(struct amdgpu_device * adev)2743 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2744 {
2745 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2746 /* csib */
2747 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2748 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2749 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2750 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2751 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2752 adev->gfx.rlc.clear_state_size);
2753 }
2754
gfx_v9_1_parse_ind_reg_list(int * register_list_format,int indirect_offset,int list_size,int * unique_indirect_regs,int unique_indirect_reg_count,int * indirect_start_offsets,int * indirect_start_offsets_count,int max_start_offsets_count)2755 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2756 int indirect_offset,
2757 int list_size,
2758 int *unique_indirect_regs,
2759 int unique_indirect_reg_count,
2760 int *indirect_start_offsets,
2761 int *indirect_start_offsets_count,
2762 int max_start_offsets_count)
2763 {
2764 int idx;
2765
2766 for (; indirect_offset < list_size; indirect_offset++) {
2767 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2768 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2769 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2770
2771 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2772 indirect_offset += 2;
2773
2774 /* look for the matching indice */
2775 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2776 if (unique_indirect_regs[idx] ==
2777 register_list_format[indirect_offset] ||
2778 !unique_indirect_regs[idx])
2779 break;
2780 }
2781
2782 BUG_ON(idx >= unique_indirect_reg_count);
2783
2784 if (!unique_indirect_regs[idx])
2785 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2786
2787 indirect_offset++;
2788 }
2789 }
2790 }
2791
gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device * adev)2792 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2793 {
2794 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2795 int unique_indirect_reg_count = 0;
2796
2797 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2798 int indirect_start_offsets_count = 0;
2799
2800 int list_size = 0;
2801 int i = 0, j = 0;
2802 u32 tmp = 0;
2803
2804 u32 *register_list_format =
2805 kmemdup(adev->gfx.rlc.register_list_format,
2806 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2807 if (!register_list_format)
2808 return -ENOMEM;
2809
2810 /* setup unique_indirect_regs array and indirect_start_offsets array */
2811 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2812 gfx_v9_1_parse_ind_reg_list(register_list_format,
2813 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2814 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2815 unique_indirect_regs,
2816 unique_indirect_reg_count,
2817 indirect_start_offsets,
2818 &indirect_start_offsets_count,
2819 ARRAY_SIZE(indirect_start_offsets));
2820
2821 /* enable auto inc in case it is disabled */
2822 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2823 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2824 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2825
2826 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2827 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2828 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2829 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2830 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2831 adev->gfx.rlc.register_restore[i]);
2832
2833 /* load indirect register */
2834 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2835 adev->gfx.rlc.reg_list_format_start);
2836
2837 /* direct register portion */
2838 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2839 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2840 register_list_format[i]);
2841
2842 /* indirect register portion */
2843 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2844 if (register_list_format[i] == 0xFFFFFFFF) {
2845 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2846 continue;
2847 }
2848
2849 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2850 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2851
2852 for (j = 0; j < unique_indirect_reg_count; j++) {
2853 if (register_list_format[i] == unique_indirect_regs[j]) {
2854 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2855 break;
2856 }
2857 }
2858
2859 BUG_ON(j >= unique_indirect_reg_count);
2860
2861 i++;
2862 }
2863
2864 /* set save/restore list size */
2865 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2866 list_size = list_size >> 1;
2867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2868 adev->gfx.rlc.reg_restore_list_size);
2869 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2870
2871 /* write the starting offsets to RLC scratch ram */
2872 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2873 adev->gfx.rlc.starting_offsets_start);
2874 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2875 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2876 indirect_start_offsets[i]);
2877
2878 /* load unique indirect regs*/
2879 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2880 if (unique_indirect_regs[i] != 0) {
2881 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2882 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2883 unique_indirect_regs[i] & 0x3FFFF);
2884
2885 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2886 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2887 unique_indirect_regs[i] >> 20);
2888 }
2889 }
2890
2891 kfree(register_list_format);
2892 return 0;
2893 }
2894
gfx_v9_0_enable_save_restore_machine(struct amdgpu_device * adev)2895 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2896 {
2897 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2898 }
2899
pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device * adev,bool enable)2900 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2901 bool enable)
2902 {
2903 uint32_t data = 0;
2904 uint32_t default_data = 0;
2905
2906 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2907 if (enable) {
2908 /* enable GFXIP control over CGPG */
2909 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2910 if(default_data != data)
2911 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2912
2913 /* update status */
2914 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2915 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2916 if(default_data != data)
2917 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2918 } else {
2919 /* restore GFXIP control over GCPG */
2920 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2921 if(default_data != data)
2922 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2923 }
2924 }
2925
gfx_v9_0_init_gfx_power_gating(struct amdgpu_device * adev)2926 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2927 {
2928 uint32_t data = 0;
2929
2930 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2931 AMD_PG_SUPPORT_GFX_SMG |
2932 AMD_PG_SUPPORT_GFX_DMG)) {
2933 /* init IDLE_POLL_COUNT = 60 */
2934 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2935 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2936 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2937 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2938
2939 /* init RLC PG Delay */
2940 data = 0;
2941 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2942 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2943 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2944 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2945 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2946
2947 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2948 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2949 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2950 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2951
2952 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2953 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2954 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2955 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2956
2957 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2958 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2959
2960 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2961 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2962 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2963 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2964 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2965 }
2966 }
2967
gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)2968 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2969 bool enable)
2970 {
2971 uint32_t data = 0;
2972 uint32_t default_data = 0;
2973
2974 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2975 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2976 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2977 enable ? 1 : 0);
2978 if (default_data != data)
2979 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2980 }
2981
gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)2982 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2983 bool enable)
2984 {
2985 uint32_t data = 0;
2986 uint32_t default_data = 0;
2987
2988 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2989 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2990 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2991 enable ? 1 : 0);
2992 if(default_data != data)
2993 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2994 }
2995
gfx_v9_0_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)2996 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2997 bool enable)
2998 {
2999 uint32_t data = 0;
3000 uint32_t default_data = 0;
3001
3002 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3003 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3004 CP_PG_DISABLE,
3005 enable ? 0 : 1);
3006 if(default_data != data)
3007 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3008 }
3009
gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)3010 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
3011 bool enable)
3012 {
3013 uint32_t data, default_data;
3014
3015 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3016 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3017 GFX_POWER_GATING_ENABLE,
3018 enable ? 1 : 0);
3019 if(default_data != data)
3020 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3021 }
3022
gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device * adev,bool enable)3023 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3024 bool enable)
3025 {
3026 uint32_t data, default_data;
3027
3028 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3029 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3030 GFX_PIPELINE_PG_ENABLE,
3031 enable ? 1 : 0);
3032 if(default_data != data)
3033 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3034
3035 if (!enable)
3036 /* read any GFX register to wake up GFX */
3037 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3038 }
3039
gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)3040 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3041 bool enable)
3042 {
3043 uint32_t data, default_data;
3044
3045 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3046 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3047 STATIC_PER_CU_PG_ENABLE,
3048 enable ? 1 : 0);
3049 if(default_data != data)
3050 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3051 }
3052
gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)3053 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3054 bool enable)
3055 {
3056 uint32_t data, default_data;
3057
3058 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3059 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3060 DYN_PER_CU_PG_ENABLE,
3061 enable ? 1 : 0);
3062 if(default_data != data)
3063 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3064 }
3065
gfx_v9_0_init_pg(struct amdgpu_device * adev)3066 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3067 {
3068 gfx_v9_0_init_csb(adev);
3069
3070 /*
3071 * Rlc save restore list is workable since v2_1.
3072 * And it's needed by gfxoff feature.
3073 */
3074 if (adev->gfx.rlc.is_rlc_v2_1) {
3075 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3076 IP_VERSION(9, 2, 1) ||
3077 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3078 gfx_v9_1_init_rlc_save_restore_list(adev);
3079 gfx_v9_0_enable_save_restore_machine(adev);
3080 }
3081
3082 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3083 AMD_PG_SUPPORT_GFX_SMG |
3084 AMD_PG_SUPPORT_GFX_DMG |
3085 AMD_PG_SUPPORT_CP |
3086 AMD_PG_SUPPORT_GDS |
3087 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3088 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3089 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3090 gfx_v9_0_init_gfx_power_gating(adev);
3091 }
3092 }
3093
gfx_v9_0_rlc_stop(struct amdgpu_device * adev)3094 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3095 {
3096 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3097 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3098 gfx_v9_0_wait_for_rlc_serdes(adev);
3099 }
3100
gfx_v9_0_rlc_reset(struct amdgpu_device * adev)3101 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3102 {
3103 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3104 udelay(50);
3105 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3106 udelay(50);
3107 }
3108
gfx_v9_0_rlc_start(struct amdgpu_device * adev)3109 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3110 {
3111 #ifdef AMDGPU_RLC_DEBUG_RETRY
3112 u32 rlc_ucode_ver;
3113 #endif
3114
3115 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3116 udelay(50);
3117
3118 /* carrizo do enable cp interrupt after cp inited */
3119 if (!(adev->flags & AMD_IS_APU)) {
3120 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3121 udelay(50);
3122 }
3123
3124 #ifdef AMDGPU_RLC_DEBUG_RETRY
3125 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3126 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3127 if(rlc_ucode_ver == 0x108) {
3128 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3129 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3130 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3131 * default is 0x9C4 to create a 100us interval */
3132 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3133 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3134 * to disable the page fault retry interrupts, default is
3135 * 0x100 (256) */
3136 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3137 }
3138 #endif
3139 }
3140
gfx_v9_0_rlc_load_microcode(struct amdgpu_device * adev)3141 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3142 {
3143 const struct rlc_firmware_header_v2_0 *hdr;
3144 const __le32 *fw_data;
3145 unsigned i, fw_size;
3146
3147 if (!adev->gfx.rlc_fw)
3148 return -EINVAL;
3149
3150 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3151 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3152
3153 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3154 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3155 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3156
3157 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3158 RLCG_UCODE_LOADING_START_ADDRESS);
3159 for (i = 0; i < fw_size; i++)
3160 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3161 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3162
3163 return 0;
3164 }
3165
gfx_v9_0_rlc_resume(struct amdgpu_device * adev)3166 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3167 {
3168 int r;
3169
3170 if (amdgpu_sriov_vf(adev)) {
3171 gfx_v9_0_init_csb(adev);
3172 return 0;
3173 }
3174
3175 adev->gfx.rlc.funcs->stop(adev);
3176
3177 /* disable CG */
3178 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3179
3180 gfx_v9_0_init_pg(adev);
3181
3182 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3183 /* legacy rlc firmware loading */
3184 r = gfx_v9_0_rlc_load_microcode(adev);
3185 if (r)
3186 return r;
3187 }
3188
3189 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3190 case IP_VERSION(9, 2, 2):
3191 case IP_VERSION(9, 1, 0):
3192 gfx_v9_0_init_lbpw(adev);
3193 if (amdgpu_lbpw == 0)
3194 gfx_v9_0_enable_lbpw(adev, false);
3195 else
3196 gfx_v9_0_enable_lbpw(adev, true);
3197 break;
3198 case IP_VERSION(9, 4, 0):
3199 gfx_v9_4_init_lbpw(adev);
3200 if (amdgpu_lbpw > 0)
3201 gfx_v9_0_enable_lbpw(adev, true);
3202 else
3203 gfx_v9_0_enable_lbpw(adev, false);
3204 break;
3205 default:
3206 break;
3207 }
3208
3209 gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3210
3211 adev->gfx.rlc.funcs->start(adev);
3212
3213 return 0;
3214 }
3215
gfx_v9_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)3216 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3217 {
3218 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3219
3220 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3221 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3222 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3223 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3224 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3225 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3226 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3227 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3229 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3230 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3231 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3232 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3233 udelay(50);
3234 }
3235
gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device * adev)3236 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3237 {
3238 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3239 const struct gfx_firmware_header_v1_0 *ce_hdr;
3240 const struct gfx_firmware_header_v1_0 *me_hdr;
3241 const __le32 *fw_data;
3242 unsigned i, fw_size;
3243
3244 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3245 return -EINVAL;
3246
3247 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3248 adev->gfx.pfp_fw->data;
3249 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3250 adev->gfx.ce_fw->data;
3251 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3252 adev->gfx.me_fw->data;
3253
3254 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3255 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3256 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3257
3258 gfx_v9_0_cp_gfx_enable(adev, false);
3259
3260 /* PFP */
3261 fw_data = (const __le32 *)
3262 (adev->gfx.pfp_fw->data +
3263 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3264 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3265 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3266 for (i = 0; i < fw_size; i++)
3267 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3268 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3269
3270 /* CE */
3271 fw_data = (const __le32 *)
3272 (adev->gfx.ce_fw->data +
3273 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3274 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3275 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3276 for (i = 0; i < fw_size; i++)
3277 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3278 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3279
3280 /* ME */
3281 fw_data = (const __le32 *)
3282 (adev->gfx.me_fw->data +
3283 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3284 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3285 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3286 for (i = 0; i < fw_size; i++)
3287 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3288 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3289
3290 return 0;
3291 }
3292
gfx_v9_0_cp_gfx_start(struct amdgpu_device * adev)3293 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3294 {
3295 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3296 const struct cs_section_def *sect = NULL;
3297 const struct cs_extent_def *ext = NULL;
3298 int r, i, tmp;
3299
3300 /* init the CP */
3301 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3302 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3303
3304 gfx_v9_0_cp_gfx_enable(adev, true);
3305
3306 /* Now only limit the quirk on the APU gfx9 series and already
3307 * confirmed that the APU gfx10/gfx11 needn't such update.
3308 */
3309 if (adev->flags & AMD_IS_APU &&
3310 adev->in_s3 && !pm_resume_via_firmware()) {
3311 DRM_INFO("Will skip the CSB packet resubmit\n");
3312 return 0;
3313 }
3314 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3315 if (r) {
3316 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3317 return r;
3318 }
3319
3320 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3321 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3322
3323 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3324 amdgpu_ring_write(ring, 0x80000000);
3325 amdgpu_ring_write(ring, 0x80000000);
3326
3327 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3328 for (ext = sect->section; ext->extent != NULL; ++ext) {
3329 if (sect->id == SECT_CONTEXT) {
3330 amdgpu_ring_write(ring,
3331 PACKET3(PACKET3_SET_CONTEXT_REG,
3332 ext->reg_count));
3333 amdgpu_ring_write(ring,
3334 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3335 for (i = 0; i < ext->reg_count; i++)
3336 amdgpu_ring_write(ring, ext->extent[i]);
3337 }
3338 }
3339 }
3340
3341 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3342 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3343
3344 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3345 amdgpu_ring_write(ring, 0);
3346
3347 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3348 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3349 amdgpu_ring_write(ring, 0x8000);
3350 amdgpu_ring_write(ring, 0x8000);
3351
3352 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3353 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3354 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3355 amdgpu_ring_write(ring, tmp);
3356 amdgpu_ring_write(ring, 0);
3357
3358 amdgpu_ring_commit(ring);
3359
3360 return 0;
3361 }
3362
gfx_v9_0_cp_gfx_resume(struct amdgpu_device * adev)3363 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3364 {
3365 struct amdgpu_ring *ring;
3366 u32 tmp;
3367 u32 rb_bufsz;
3368 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3369
3370 /* Set the write pointer delay */
3371 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3372
3373 /* set the RB to use vmid 0 */
3374 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3375
3376 /* Set ring buffer size */
3377 ring = &adev->gfx.gfx_ring[0];
3378 rb_bufsz = order_base_2(ring->ring_size / 8);
3379 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3380 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3381 #ifdef __BIG_ENDIAN
3382 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3383 #endif
3384 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3385
3386 /* Initialize the ring buffer's write pointers */
3387 ring->wptr = 0;
3388 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3389 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3390
3391 /* set the wb address whether it's enabled or not */
3392 rptr_addr = ring->rptr_gpu_addr;
3393 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3394 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3395
3396 wptr_gpu_addr = ring->wptr_gpu_addr;
3397 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3398 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3399
3400 mdelay(1);
3401 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3402
3403 rb_addr = ring->gpu_addr >> 8;
3404 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3405 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3406
3407 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3408 if (ring->use_doorbell) {
3409 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3410 DOORBELL_OFFSET, ring->doorbell_index);
3411 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3412 DOORBELL_EN, 1);
3413 } else {
3414 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3415 }
3416 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3417
3418 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3419 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3420 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3421
3422 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3423 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3424
3425
3426 /* start the ring */
3427 gfx_v9_0_cp_gfx_start(adev);
3428
3429 return 0;
3430 }
3431
gfx_v9_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3432 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3433 {
3434 if (enable) {
3435 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3436 } else {
3437 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3438 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3439 CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3440 CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3441 CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3442 CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3443 CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3444 CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3445 CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3446 CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3447 adev->gfx.kiq[0].ring.sched.ready = false;
3448 }
3449 udelay(50);
3450 }
3451
gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device * adev)3452 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3453 {
3454 const struct gfx_firmware_header_v1_0 *mec_hdr;
3455 const __le32 *fw_data;
3456 unsigned i;
3457 u32 tmp;
3458
3459 if (!adev->gfx.mec_fw)
3460 return -EINVAL;
3461
3462 gfx_v9_0_cp_compute_enable(adev, false);
3463
3464 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3465 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3466
3467 fw_data = (const __le32 *)
3468 (adev->gfx.mec_fw->data +
3469 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3470 tmp = 0;
3471 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3472 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3473 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3474
3475 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3476 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3477 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3478 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3479
3480 /* MEC1 */
3481 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3482 mec_hdr->jt_offset);
3483 for (i = 0; i < mec_hdr->jt_size; i++)
3484 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3485 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3486
3487 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3488 adev->gfx.mec_fw_version);
3489 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3490
3491 return 0;
3492 }
3493
3494 /* KIQ functions */
gfx_v9_0_kiq_setting(struct amdgpu_ring * ring)3495 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3496 {
3497 uint32_t tmp;
3498 struct amdgpu_device *adev = ring->adev;
3499
3500 /* tell RLC which is KIQ queue */
3501 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3502 tmp &= 0xffffff00;
3503 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3504 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
3505 }
3506
gfx_v9_0_mqd_set_priority(struct amdgpu_ring * ring,struct v9_mqd * mqd)3507 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3508 {
3509 struct amdgpu_device *adev = ring->adev;
3510
3511 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3512 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3513 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3514 mqd->cp_hqd_queue_priority =
3515 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3516 }
3517 }
3518 }
3519
gfx_v9_0_mqd_init(struct amdgpu_ring * ring)3520 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3521 {
3522 struct amdgpu_device *adev = ring->adev;
3523 struct v9_mqd *mqd = ring->mqd_ptr;
3524 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3525 uint32_t tmp;
3526
3527 mqd->header = 0xC0310800;
3528 mqd->compute_pipelinestat_enable = 0x00000001;
3529 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3530 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3531 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3532 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3533 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3534 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3535 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3536 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3537 mqd->compute_misc_reserved = 0x00000003;
3538
3539 mqd->dynamic_cu_mask_addr_lo =
3540 lower_32_bits(ring->mqd_gpu_addr
3541 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3542 mqd->dynamic_cu_mask_addr_hi =
3543 upper_32_bits(ring->mqd_gpu_addr
3544 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3545
3546 eop_base_addr = ring->eop_gpu_addr >> 8;
3547 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3548 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3549
3550 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3551 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3552 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3553 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3554
3555 mqd->cp_hqd_eop_control = tmp;
3556
3557 /* enable doorbell? */
3558 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3559
3560 if (ring->use_doorbell) {
3561 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3562 DOORBELL_OFFSET, ring->doorbell_index);
3563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3564 DOORBELL_EN, 1);
3565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3566 DOORBELL_SOURCE, 0);
3567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3568 DOORBELL_HIT, 0);
3569 } else {
3570 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3571 DOORBELL_EN, 0);
3572 }
3573
3574 mqd->cp_hqd_pq_doorbell_control = tmp;
3575
3576 /* disable the queue if it's active */
3577 ring->wptr = 0;
3578 mqd->cp_hqd_dequeue_request = 0;
3579 mqd->cp_hqd_pq_rptr = 0;
3580 mqd->cp_hqd_pq_wptr_lo = 0;
3581 mqd->cp_hqd_pq_wptr_hi = 0;
3582
3583 /* set the pointer to the MQD */
3584 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3585 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3586
3587 /* set MQD vmid to 0 */
3588 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3589 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3590 mqd->cp_mqd_control = tmp;
3591
3592 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3593 hqd_gpu_addr = ring->gpu_addr >> 8;
3594 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3595 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3596
3597 /* set up the HQD, this is similar to CP_RB0_CNTL */
3598 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3599 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3600 (order_base_2(ring->ring_size / 4) - 1));
3601 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3602 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3603 #ifdef __BIG_ENDIAN
3604 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3605 #endif
3606 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3607 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3608 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3609 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3610 mqd->cp_hqd_pq_control = tmp;
3611
3612 /* set the wb address whether it's enabled or not */
3613 wb_gpu_addr = ring->rptr_gpu_addr;
3614 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3615 mqd->cp_hqd_pq_rptr_report_addr_hi =
3616 upper_32_bits(wb_gpu_addr) & 0xffff;
3617
3618 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3619 wb_gpu_addr = ring->wptr_gpu_addr;
3620 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3621 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3622
3623 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3624 ring->wptr = 0;
3625 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3626
3627 /* set the vmid for the queue */
3628 mqd->cp_hqd_vmid = 0;
3629
3630 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3631 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3632 mqd->cp_hqd_persistent_state = tmp;
3633
3634 /* set MIN_IB_AVAIL_SIZE */
3635 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3636 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3637 mqd->cp_hqd_ib_control = tmp;
3638
3639 /* set static priority for a queue/ring */
3640 gfx_v9_0_mqd_set_priority(ring, mqd);
3641 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3642
3643 /* map_queues packet doesn't need activate the queue,
3644 * so only kiq need set this field.
3645 */
3646 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3647 mqd->cp_hqd_active = 1;
3648
3649 return 0;
3650 }
3651
gfx_v9_0_kiq_init_register(struct amdgpu_ring * ring)3652 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3653 {
3654 struct amdgpu_device *adev = ring->adev;
3655 struct v9_mqd *mqd = ring->mqd_ptr;
3656 int j;
3657
3658 /* disable wptr polling */
3659 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3660
3661 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3662 mqd->cp_hqd_eop_base_addr_lo);
3663 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3664 mqd->cp_hqd_eop_base_addr_hi);
3665
3666 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3667 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3668 mqd->cp_hqd_eop_control);
3669
3670 /* enable doorbell? */
3671 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3672 mqd->cp_hqd_pq_doorbell_control);
3673
3674 /* disable the queue if it's active */
3675 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3676 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3677 for (j = 0; j < adev->usec_timeout; j++) {
3678 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3679 break;
3680 udelay(1);
3681 }
3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3683 mqd->cp_hqd_dequeue_request);
3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3685 mqd->cp_hqd_pq_rptr);
3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3687 mqd->cp_hqd_pq_wptr_lo);
3688 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3689 mqd->cp_hqd_pq_wptr_hi);
3690 }
3691
3692 /* set the pointer to the MQD */
3693 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3694 mqd->cp_mqd_base_addr_lo);
3695 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3696 mqd->cp_mqd_base_addr_hi);
3697
3698 /* set MQD vmid to 0 */
3699 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3700 mqd->cp_mqd_control);
3701
3702 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3703 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3704 mqd->cp_hqd_pq_base_lo);
3705 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3706 mqd->cp_hqd_pq_base_hi);
3707
3708 /* set up the HQD, this is similar to CP_RB0_CNTL */
3709 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3710 mqd->cp_hqd_pq_control);
3711
3712 /* set the wb address whether it's enabled or not */
3713 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3714 mqd->cp_hqd_pq_rptr_report_addr_lo);
3715 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3716 mqd->cp_hqd_pq_rptr_report_addr_hi);
3717
3718 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3719 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3720 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3721 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3722 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3723
3724 /* enable the doorbell if requested */
3725 if (ring->use_doorbell) {
3726 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3727 (adev->doorbell_index.kiq * 2) << 2);
3728 /* If GC has entered CGPG, ringing doorbell > first page
3729 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3730 * workaround this issue. And this change has to align with firmware
3731 * update.
3732 */
3733 if (check_if_enlarge_doorbell_range(adev))
3734 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3735 (adev->doorbell.size - 4));
3736 else
3737 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3738 (adev->doorbell_index.userqueue_end * 2) << 2);
3739 }
3740
3741 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3742 mqd->cp_hqd_pq_doorbell_control);
3743
3744 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3746 mqd->cp_hqd_pq_wptr_lo);
3747 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3748 mqd->cp_hqd_pq_wptr_hi);
3749
3750 /* set the vmid for the queue */
3751 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3752
3753 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3754 mqd->cp_hqd_persistent_state);
3755
3756 /* activate the queue */
3757 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3758 mqd->cp_hqd_active);
3759
3760 if (ring->use_doorbell)
3761 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3762
3763 return 0;
3764 }
3765
gfx_v9_0_kiq_fini_register(struct amdgpu_ring * ring)3766 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3767 {
3768 struct amdgpu_device *adev = ring->adev;
3769 int j;
3770
3771 /* disable the queue if it's active */
3772 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3773
3774 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3775
3776 for (j = 0; j < adev->usec_timeout; j++) {
3777 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3778 break;
3779 udelay(1);
3780 }
3781
3782 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3783 DRM_DEBUG("KIQ dequeue request failed.\n");
3784
3785 /* Manual disable if dequeue request times out */
3786 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3787 }
3788
3789 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3790 0);
3791 }
3792
3793 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3794 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3795 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3796 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3797 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3798 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3799 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3800 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3801
3802 return 0;
3803 }
3804
gfx_v9_0_kiq_init_queue(struct amdgpu_ring * ring)3805 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3806 {
3807 struct amdgpu_device *adev = ring->adev;
3808 struct v9_mqd *mqd = ring->mqd_ptr;
3809 struct v9_mqd *tmp_mqd;
3810
3811 gfx_v9_0_kiq_setting(ring);
3812
3813 /* GPU could be in bad state during probe, driver trigger the reset
3814 * after load the SMU, in this case , the mqd is not be initialized.
3815 * driver need to re-init the mqd.
3816 * check mqd->cp_hqd_pq_control since this value should not be 0
3817 */
3818 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3819 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3820 /* for GPU_RESET case , reset MQD to a clean status */
3821 if (adev->gfx.kiq[0].mqd_backup)
3822 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3823
3824 /* reset ring buffer */
3825 ring->wptr = 0;
3826 amdgpu_ring_clear_ring(ring);
3827
3828 mutex_lock(&adev->srbm_mutex);
3829 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3830 gfx_v9_0_kiq_init_register(ring);
3831 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3832 mutex_unlock(&adev->srbm_mutex);
3833 } else {
3834 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3835 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3836 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3837 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3838 amdgpu_ring_clear_ring(ring);
3839 mutex_lock(&adev->srbm_mutex);
3840 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3841 gfx_v9_0_mqd_init(ring);
3842 gfx_v9_0_kiq_init_register(ring);
3843 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3844 mutex_unlock(&adev->srbm_mutex);
3845
3846 if (adev->gfx.kiq[0].mqd_backup)
3847 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3848 }
3849
3850 return 0;
3851 }
3852
gfx_v9_0_kcq_init_queue(struct amdgpu_ring * ring,bool restore)3853 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3854 {
3855 struct amdgpu_device *adev = ring->adev;
3856 struct v9_mqd *mqd = ring->mqd_ptr;
3857 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3858 struct v9_mqd *tmp_mqd;
3859
3860 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3861 * is not be initialized before
3862 */
3863 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3864
3865 if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3866 (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3867 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3868 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3869 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3870 mutex_lock(&adev->srbm_mutex);
3871 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3872 gfx_v9_0_mqd_init(ring);
3873 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3874 mutex_unlock(&adev->srbm_mutex);
3875
3876 if (adev->gfx.mec.mqd_backup[mqd_idx])
3877 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3878 } else {
3879 /* restore MQD to a clean status */
3880 if (adev->gfx.mec.mqd_backup[mqd_idx])
3881 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3882 /* reset ring buffer */
3883 ring->wptr = 0;
3884 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3885 amdgpu_ring_clear_ring(ring);
3886 }
3887
3888 return 0;
3889 }
3890
gfx_v9_0_kiq_resume(struct amdgpu_device * adev)3891 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3892 {
3893 struct amdgpu_ring *ring;
3894 int r;
3895
3896 ring = &adev->gfx.kiq[0].ring;
3897
3898 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3899 if (unlikely(r != 0))
3900 return r;
3901
3902 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3903 if (unlikely(r != 0)) {
3904 amdgpu_bo_unreserve(ring->mqd_obj);
3905 return r;
3906 }
3907
3908 gfx_v9_0_kiq_init_queue(ring);
3909 amdgpu_bo_kunmap(ring->mqd_obj);
3910 ring->mqd_ptr = NULL;
3911 amdgpu_bo_unreserve(ring->mqd_obj);
3912 return 0;
3913 }
3914
gfx_v9_0_kcq_resume(struct amdgpu_device * adev)3915 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3916 {
3917 struct amdgpu_ring *ring = NULL;
3918 int r = 0, i;
3919
3920 gfx_v9_0_cp_compute_enable(adev, true);
3921
3922 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3923 ring = &adev->gfx.compute_ring[i];
3924
3925 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3926 if (unlikely(r != 0))
3927 goto done;
3928 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3929 if (!r) {
3930 r = gfx_v9_0_kcq_init_queue(ring, false);
3931 amdgpu_bo_kunmap(ring->mqd_obj);
3932 ring->mqd_ptr = NULL;
3933 }
3934 amdgpu_bo_unreserve(ring->mqd_obj);
3935 if (r)
3936 goto done;
3937 }
3938
3939 r = amdgpu_gfx_enable_kcq(adev, 0);
3940 done:
3941 return r;
3942 }
3943
gfx_v9_0_cp_resume(struct amdgpu_device * adev)3944 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3945 {
3946 int r, i;
3947 struct amdgpu_ring *ring;
3948
3949 if (!(adev->flags & AMD_IS_APU))
3950 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3951
3952 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3953 if (adev->gfx.num_gfx_rings) {
3954 /* legacy firmware loading */
3955 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3956 if (r)
3957 return r;
3958 }
3959
3960 r = gfx_v9_0_cp_compute_load_microcode(adev);
3961 if (r)
3962 return r;
3963 }
3964
3965 if (adev->gfx.num_gfx_rings)
3966 gfx_v9_0_cp_gfx_enable(adev, false);
3967 gfx_v9_0_cp_compute_enable(adev, false);
3968
3969 r = gfx_v9_0_kiq_resume(adev);
3970 if (r)
3971 return r;
3972
3973 if (adev->gfx.num_gfx_rings) {
3974 r = gfx_v9_0_cp_gfx_resume(adev);
3975 if (r)
3976 return r;
3977 }
3978
3979 r = gfx_v9_0_kcq_resume(adev);
3980 if (r)
3981 return r;
3982
3983 if (adev->gfx.num_gfx_rings) {
3984 ring = &adev->gfx.gfx_ring[0];
3985 r = amdgpu_ring_test_helper(ring);
3986 if (r)
3987 return r;
3988 }
3989
3990 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3991 ring = &adev->gfx.compute_ring[i];
3992 amdgpu_ring_test_helper(ring);
3993 }
3994
3995 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3996
3997 return 0;
3998 }
3999
gfx_v9_0_init_tcp_config(struct amdgpu_device * adev)4000 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
4001 {
4002 u32 tmp;
4003
4004 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
4005 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
4006 return;
4007
4008 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
4009 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
4010 adev->df.hash_status.hash_64k);
4011 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4012 adev->df.hash_status.hash_2m);
4013 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4014 adev->df.hash_status.hash_1g);
4015 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4016 }
4017
gfx_v9_0_cp_enable(struct amdgpu_device * adev,bool enable)4018 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4019 {
4020 if (adev->gfx.num_gfx_rings)
4021 gfx_v9_0_cp_gfx_enable(adev, enable);
4022 gfx_v9_0_cp_compute_enable(adev, enable);
4023 }
4024
gfx_v9_0_hw_init(struct amdgpu_ip_block * ip_block)4025 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4026 {
4027 int r;
4028 struct amdgpu_device *adev = ip_block->adev;
4029
4030 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4031 adev->gfx.cleaner_shader_ptr);
4032
4033 if (!amdgpu_sriov_vf(adev))
4034 gfx_v9_0_init_golden_registers(adev);
4035
4036 gfx_v9_0_constants_init(adev);
4037
4038 gfx_v9_0_init_tcp_config(adev);
4039
4040 r = adev->gfx.rlc.funcs->resume(adev);
4041 if (r)
4042 return r;
4043
4044 r = gfx_v9_0_cp_resume(adev);
4045 if (r)
4046 return r;
4047
4048 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) &&
4049 !amdgpu_sriov_vf(adev))
4050 gfx_v9_4_2_set_power_brake_sequence(adev);
4051
4052 return r;
4053 }
4054
gfx_v9_0_hw_fini(struct amdgpu_ip_block * ip_block)4055 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4056 {
4057 struct amdgpu_device *adev = ip_block->adev;
4058
4059 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4060 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4061 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4062 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4063 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4064
4065 /* DF freeze and kcq disable will fail */
4066 if (!amdgpu_ras_intr_triggered())
4067 /* disable KCQ to avoid CPC touch memory not valid anymore */
4068 amdgpu_gfx_disable_kcq(adev, 0);
4069
4070 if (amdgpu_sriov_vf(adev)) {
4071 gfx_v9_0_cp_gfx_enable(adev, false);
4072 /* must disable polling for SRIOV when hw finished, otherwise
4073 * CPC engine may still keep fetching WB address which is already
4074 * invalid after sw finished and trigger DMAR reading error in
4075 * hypervisor side.
4076 */
4077 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4078 return 0;
4079 }
4080
4081 /* Use deinitialize sequence from CAIL when unbinding device from driver,
4082 * otherwise KIQ is hanging when binding back
4083 */
4084 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4085 mutex_lock(&adev->srbm_mutex);
4086 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4087 adev->gfx.kiq[0].ring.pipe,
4088 adev->gfx.kiq[0].ring.queue, 0, 0);
4089 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4090 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4091 mutex_unlock(&adev->srbm_mutex);
4092 }
4093
4094 gfx_v9_0_cp_enable(adev, false);
4095
4096 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4097 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4098 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4099 dev_dbg(adev->dev, "Skipping RLC halt\n");
4100 return 0;
4101 }
4102
4103 adev->gfx.rlc.funcs->stop(adev);
4104 return 0;
4105 }
4106
gfx_v9_0_suspend(struct amdgpu_ip_block * ip_block)4107 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4108 {
4109 return gfx_v9_0_hw_fini(ip_block);
4110 }
4111
gfx_v9_0_resume(struct amdgpu_ip_block * ip_block)4112 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4113 {
4114 return gfx_v9_0_hw_init(ip_block);
4115 }
4116
gfx_v9_0_is_idle(struct amdgpu_ip_block * ip_block)4117 static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block)
4118 {
4119 struct amdgpu_device *adev = ip_block->adev;
4120
4121 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4122 GRBM_STATUS, GUI_ACTIVE))
4123 return false;
4124 else
4125 return true;
4126 }
4127
gfx_v9_0_wait_for_idle(struct amdgpu_ip_block * ip_block)4128 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4129 {
4130 unsigned i;
4131 struct amdgpu_device *adev = ip_block->adev;
4132
4133 for (i = 0; i < adev->usec_timeout; i++) {
4134 if (gfx_v9_0_is_idle(ip_block))
4135 return 0;
4136 udelay(1);
4137 }
4138 return -ETIMEDOUT;
4139 }
4140
gfx_v9_0_soft_reset(struct amdgpu_ip_block * ip_block)4141 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4142 {
4143 u32 grbm_soft_reset = 0;
4144 u32 tmp;
4145 struct amdgpu_device *adev = ip_block->adev;
4146
4147 /* GRBM_STATUS */
4148 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4149 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4150 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4151 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4152 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4153 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4154 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4155 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4156 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4157 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4158 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4159 }
4160
4161 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4163 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4164 }
4165
4166 /* GRBM_STATUS2 */
4167 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4168 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4169 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4170 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4171
4172
4173 if (grbm_soft_reset) {
4174 /* stop the rlc */
4175 adev->gfx.rlc.funcs->stop(adev);
4176
4177 if (adev->gfx.num_gfx_rings)
4178 /* Disable GFX parsing/prefetching */
4179 gfx_v9_0_cp_gfx_enable(adev, false);
4180
4181 /* Disable MEC parsing/prefetching */
4182 gfx_v9_0_cp_compute_enable(adev, false);
4183
4184 if (grbm_soft_reset) {
4185 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4186 tmp |= grbm_soft_reset;
4187 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4188 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4189 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4190
4191 udelay(50);
4192
4193 tmp &= ~grbm_soft_reset;
4194 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4195 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4196 }
4197
4198 /* Wait a little for things to settle down */
4199 udelay(50);
4200 }
4201 return 0;
4202 }
4203
gfx_v9_0_kiq_read_clock(struct amdgpu_device * adev)4204 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4205 {
4206 signed long r, cnt = 0;
4207 unsigned long flags;
4208 uint32_t seq, reg_val_offs = 0;
4209 uint64_t value = 0;
4210 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4211 struct amdgpu_ring *ring = &kiq->ring;
4212
4213 BUG_ON(!ring->funcs->emit_rreg);
4214
4215 spin_lock_irqsave(&kiq->ring_lock, flags);
4216 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4217 pr_err("critical bug! too many kiq readers\n");
4218 goto failed_unlock;
4219 }
4220 amdgpu_ring_alloc(ring, 32);
4221 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4222 amdgpu_ring_write(ring, 9 | /* src: register*/
4223 (5 << 8) | /* dst: memory */
4224 (1 << 16) | /* count sel */
4225 (1 << 20)); /* write confirm */
4226 amdgpu_ring_write(ring, 0);
4227 amdgpu_ring_write(ring, 0);
4228 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4229 reg_val_offs * 4));
4230 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4231 reg_val_offs * 4));
4232 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4233 if (r)
4234 goto failed_undo;
4235
4236 amdgpu_ring_commit(ring);
4237 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4238
4239 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4240
4241 /* don't wait anymore for gpu reset case because this way may
4242 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4243 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4244 * never return if we keep waiting in virt_kiq_rreg, which cause
4245 * gpu_recover() hang there.
4246 *
4247 * also don't wait anymore for IRQ context
4248 * */
4249 if (r < 1 && (amdgpu_in_reset(adev)))
4250 goto failed_kiq_read;
4251
4252 might_sleep();
4253 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4254 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4255 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4256 }
4257
4258 if (cnt > MAX_KIQ_REG_TRY)
4259 goto failed_kiq_read;
4260
4261 mb();
4262 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4263 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4264 amdgpu_device_wb_free(adev, reg_val_offs);
4265 return value;
4266
4267 failed_undo:
4268 amdgpu_ring_undo(ring);
4269 failed_unlock:
4270 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4271 failed_kiq_read:
4272 if (reg_val_offs)
4273 amdgpu_device_wb_free(adev, reg_val_offs);
4274 pr_err("failed to read gpu clock\n");
4275 return ~0;
4276 }
4277
gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device * adev)4278 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4279 {
4280 uint64_t clock, clock_lo, clock_hi, hi_check;
4281
4282 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4283 case IP_VERSION(9, 3, 0):
4284 preempt_disable();
4285 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4286 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4287 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4288 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4289 * roughly every 42 seconds.
4290 */
4291 if (hi_check != clock_hi) {
4292 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4293 clock_hi = hi_check;
4294 }
4295 preempt_enable();
4296 clock = clock_lo | (clock_hi << 32ULL);
4297 break;
4298 default:
4299 amdgpu_gfx_off_ctrl(adev, false);
4300 mutex_lock(&adev->gfx.gpu_clock_mutex);
4301 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4302 IP_VERSION(9, 0, 1) &&
4303 amdgpu_sriov_runtime(adev)) {
4304 clock = gfx_v9_0_kiq_read_clock(adev);
4305 } else {
4306 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4307 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4308 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4309 }
4310 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4311 amdgpu_gfx_off_ctrl(adev, true);
4312 break;
4313 }
4314 return clock;
4315 }
4316
gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)4317 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4318 uint32_t vmid,
4319 uint32_t gds_base, uint32_t gds_size,
4320 uint32_t gws_base, uint32_t gws_size,
4321 uint32_t oa_base, uint32_t oa_size)
4322 {
4323 struct amdgpu_device *adev = ring->adev;
4324
4325 /* GDS Base */
4326 gfx_v9_0_write_data_to_reg(ring, 0, false,
4327 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4328 gds_base);
4329
4330 /* GDS Size */
4331 gfx_v9_0_write_data_to_reg(ring, 0, false,
4332 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4333 gds_size);
4334
4335 /* GWS */
4336 gfx_v9_0_write_data_to_reg(ring, 0, false,
4337 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4338 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4339
4340 /* OA */
4341 gfx_v9_0_write_data_to_reg(ring, 0, false,
4342 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4343 (1 << (oa_size + oa_base)) - (1 << oa_base));
4344 }
4345
4346 static const u32 vgpr_init_compute_shader[] =
4347 {
4348 0xb07c0000, 0xbe8000ff,
4349 0x000000f8, 0xbf110800,
4350 0x7e000280, 0x7e020280,
4351 0x7e040280, 0x7e060280,
4352 0x7e080280, 0x7e0a0280,
4353 0x7e0c0280, 0x7e0e0280,
4354 0x80808800, 0xbe803200,
4355 0xbf84fff5, 0xbf9c0000,
4356 0xd28c0001, 0x0001007f,
4357 0xd28d0001, 0x0002027e,
4358 0x10020288, 0xb8810904,
4359 0xb7814000, 0xd1196a01,
4360 0x00000301, 0xbe800087,
4361 0xbefc00c1, 0xd89c4000,
4362 0x00020201, 0xd89cc080,
4363 0x00040401, 0x320202ff,
4364 0x00000800, 0x80808100,
4365 0xbf84fff8, 0x7e020280,
4366 0xbf810000, 0x00000000,
4367 };
4368
4369 static const u32 sgpr_init_compute_shader[] =
4370 {
4371 0xb07c0000, 0xbe8000ff,
4372 0x0000005f, 0xbee50080,
4373 0xbe812c65, 0xbe822c65,
4374 0xbe832c65, 0xbe842c65,
4375 0xbe852c65, 0xb77c0005,
4376 0x80808500, 0xbf84fff8,
4377 0xbe800080, 0xbf810000,
4378 };
4379
4380 static const u32 vgpr_init_compute_shader_arcturus[] = {
4381 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4382 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4383 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4384 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4385 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4386 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4387 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4388 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4389 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4390 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4391 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4392 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4393 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4394 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4395 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4396 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4397 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4398 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4399 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4400 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4401 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4402 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4403 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4404 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4405 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4406 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4407 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4408 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4409 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4410 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4411 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4412 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4413 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4414 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4415 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4416 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4417 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4418 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4419 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4420 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4421 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4422 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4423 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4424 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4425 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4426 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4427 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4428 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4429 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4430 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4431 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4432 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4433 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4434 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4435 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4436 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4437 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4438 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4439 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4440 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4441 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4442 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4443 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4444 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4445 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4446 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4447 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4448 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4449 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4450 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4451 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4452 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4453 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4454 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4455 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4456 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4457 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4458 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4459 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4460 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4461 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4462 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4463 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4464 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4465 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4466 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4467 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4468 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4469 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4470 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4471 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4472 0xbf84fff8, 0xbf810000,
4473 };
4474
4475 /* When below register arrays changed, please update gpr_reg_size,
4476 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4477 to cover all gfx9 ASICs */
4478 static const struct soc15_reg_entry vgpr_init_regs[] = {
4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4490 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4491 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4492 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4493 };
4494
4495 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4507 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4508 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4509 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4510 };
4511
4512 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4513 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4514 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4515 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4516 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4517 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4518 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4519 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4520 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4521 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4522 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4523 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4524 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4525 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4526 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4527 };
4528
4529 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4530 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4531 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4532 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4533 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4534 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4535 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4536 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4537 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4538 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4539 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4540 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4541 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4542 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4543 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4544 };
4545
4546 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4547 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4548 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4549 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4550 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4551 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4552 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4553 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4554 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4555 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4556 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4557 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4558 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4559 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4560 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4561 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4562 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4563 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4564 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4565 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4566 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4567 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4568 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4569 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4570 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4571 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4572 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4573 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4574 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4575 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4576 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4577 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4578 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4579 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4580 };
4581
gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device * adev)4582 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4583 {
4584 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4585 int i, r;
4586
4587 /* only support when RAS is enabled */
4588 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4589 return 0;
4590
4591 r = amdgpu_ring_alloc(ring, 7);
4592 if (r) {
4593 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4594 ring->name, r);
4595 return r;
4596 }
4597
4598 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4599 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4600
4601 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4602 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4603 PACKET3_DMA_DATA_DST_SEL(1) |
4604 PACKET3_DMA_DATA_SRC_SEL(2) |
4605 PACKET3_DMA_DATA_ENGINE(0)));
4606 amdgpu_ring_write(ring, 0);
4607 amdgpu_ring_write(ring, 0);
4608 amdgpu_ring_write(ring, 0);
4609 amdgpu_ring_write(ring, 0);
4610 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4611 adev->gds.gds_size);
4612
4613 amdgpu_ring_commit(ring);
4614
4615 for (i = 0; i < adev->usec_timeout; i++) {
4616 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4617 break;
4618 udelay(1);
4619 }
4620
4621 if (i >= adev->usec_timeout)
4622 r = -ETIMEDOUT;
4623
4624 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4625
4626 return r;
4627 }
4628
gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)4629 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4630 {
4631 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4632 struct amdgpu_ib ib;
4633 struct dma_fence *f = NULL;
4634 int r, i;
4635 unsigned total_size, vgpr_offset, sgpr_offset;
4636 u64 gpu_addr;
4637
4638 int compute_dim_x = adev->gfx.config.max_shader_engines *
4639 adev->gfx.config.max_cu_per_sh *
4640 adev->gfx.config.max_sh_per_se;
4641 int sgpr_work_group_size = 5;
4642 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4643 int vgpr_init_shader_size;
4644 const u32 *vgpr_init_shader_ptr;
4645 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4646
4647 /* only support when RAS is enabled */
4648 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4649 return 0;
4650
4651 /* bail if the compute ring is not ready */
4652 if (!ring->sched.ready)
4653 return 0;
4654
4655 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4656 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4657 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4658 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4659 } else {
4660 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4661 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4662 vgpr_init_regs_ptr = vgpr_init_regs;
4663 }
4664
4665 total_size =
4666 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4667 total_size +=
4668 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4669 total_size +=
4670 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4671 total_size = ALIGN(total_size, 256);
4672 vgpr_offset = total_size;
4673 total_size += ALIGN(vgpr_init_shader_size, 256);
4674 sgpr_offset = total_size;
4675 total_size += sizeof(sgpr_init_compute_shader);
4676
4677 /* allocate an indirect buffer to put the commands in */
4678 memset(&ib, 0, sizeof(ib));
4679 r = amdgpu_ib_get(adev, NULL, total_size,
4680 AMDGPU_IB_POOL_DIRECT, &ib);
4681 if (r) {
4682 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4683 return r;
4684 }
4685
4686 /* load the compute shaders */
4687 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4688 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4689
4690 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4691 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4692
4693 /* init the ib length to 0 */
4694 ib.length_dw = 0;
4695
4696 /* VGPR */
4697 /* write the register state for the compute dispatch */
4698 for (i = 0; i < gpr_reg_size; i++) {
4699 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4700 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4701 - PACKET3_SET_SH_REG_START;
4702 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4703 }
4704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4705 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4707 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4708 - PACKET3_SET_SH_REG_START;
4709 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4710 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4711
4712 /* write dispatch packet */
4713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4714 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4715 ib.ptr[ib.length_dw++] = 1; /* y */
4716 ib.ptr[ib.length_dw++] = 1; /* z */
4717 ib.ptr[ib.length_dw++] =
4718 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4719
4720 /* write CS partial flush packet */
4721 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4722 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4723
4724 /* SGPR1 */
4725 /* write the register state for the compute dispatch */
4726 for (i = 0; i < gpr_reg_size; i++) {
4727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4728 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4729 - PACKET3_SET_SH_REG_START;
4730 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4731 }
4732 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4733 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4734 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4735 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4736 - PACKET3_SET_SH_REG_START;
4737 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4738 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4739
4740 /* write dispatch packet */
4741 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4742 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4743 ib.ptr[ib.length_dw++] = 1; /* y */
4744 ib.ptr[ib.length_dw++] = 1; /* z */
4745 ib.ptr[ib.length_dw++] =
4746 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4747
4748 /* write CS partial flush packet */
4749 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4750 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4751
4752 /* SGPR2 */
4753 /* write the register state for the compute dispatch */
4754 for (i = 0; i < gpr_reg_size; i++) {
4755 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4756 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4757 - PACKET3_SET_SH_REG_START;
4758 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4759 }
4760 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4761 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4762 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4763 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4764 - PACKET3_SET_SH_REG_START;
4765 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4766 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4767
4768 /* write dispatch packet */
4769 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4770 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4771 ib.ptr[ib.length_dw++] = 1; /* y */
4772 ib.ptr[ib.length_dw++] = 1; /* z */
4773 ib.ptr[ib.length_dw++] =
4774 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4775
4776 /* write CS partial flush packet */
4777 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4778 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4779
4780 /* shedule the ib on the ring */
4781 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4782 if (r) {
4783 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4784 goto fail;
4785 }
4786
4787 /* wait for the GPU to finish processing the IB */
4788 r = dma_fence_wait(f, false);
4789 if (r) {
4790 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4791 goto fail;
4792 }
4793
4794 fail:
4795 amdgpu_ib_free(&ib, NULL);
4796 dma_fence_put(f);
4797
4798 return r;
4799 }
4800
gfx_v9_0_early_init(struct amdgpu_ip_block * ip_block)4801 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4802 {
4803 struct amdgpu_device *adev = ip_block->adev;
4804
4805 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4806
4807 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4808 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4809 adev->gfx.num_gfx_rings = 0;
4810 else
4811 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4812 adev->gfx.xcc_mask = 1;
4813 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4814 AMDGPU_MAX_COMPUTE_RINGS);
4815 gfx_v9_0_set_kiq_pm4_funcs(adev);
4816 gfx_v9_0_set_ring_funcs(adev);
4817 gfx_v9_0_set_irq_funcs(adev);
4818 gfx_v9_0_set_gds_init(adev);
4819 gfx_v9_0_set_rlc_funcs(adev);
4820
4821 /* init rlcg reg access ctrl */
4822 gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4823
4824 return gfx_v9_0_init_microcode(adev);
4825 }
4826
gfx_v9_0_ecc_late_init(struct amdgpu_ip_block * ip_block)4827 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4828 {
4829 struct amdgpu_device *adev = ip_block->adev;
4830 int r;
4831
4832 /*
4833 * Temp workaround to fix the issue that CP firmware fails to
4834 * update read pointer when CPDMA is writing clearing operation
4835 * to GDS in suspend/resume sequence on several cards. So just
4836 * limit this operation in cold boot sequence.
4837 */
4838 if ((!adev->in_suspend) &&
4839 (adev->gds.gds_size)) {
4840 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4841 if (r)
4842 return r;
4843 }
4844
4845 /* requires IBs so do in late init after IB pool is initialized */
4846 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4847 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4848 else
4849 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4850
4851 if (r)
4852 return r;
4853
4854 if (adev->gfx.ras &&
4855 adev->gfx.ras->enable_watchdog_timer)
4856 adev->gfx.ras->enable_watchdog_timer(adev);
4857
4858 return 0;
4859 }
4860
gfx_v9_0_late_init(struct amdgpu_ip_block * ip_block)4861 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4862 {
4863 struct amdgpu_device *adev = ip_block->adev;
4864 int r;
4865
4866 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4867 if (r)
4868 return r;
4869
4870 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4871 if (r)
4872 return r;
4873
4874 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4875 if (r)
4876 return r;
4877
4878 r = gfx_v9_0_ecc_late_init(ip_block);
4879 if (r)
4880 return r;
4881
4882 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4883 gfx_v9_4_2_debug_trap_config_init(adev,
4884 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4885 else
4886 gfx_v9_0_debug_trap_config_init(adev,
4887 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4888
4889 return 0;
4890 }
4891
gfx_v9_0_is_rlc_enabled(struct amdgpu_device * adev)4892 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4893 {
4894 uint32_t rlc_setting;
4895
4896 /* if RLC is not enabled, do nothing */
4897 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4898 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4899 return false;
4900
4901 return true;
4902 }
4903
gfx_v9_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)4904 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4905 {
4906 uint32_t data;
4907 unsigned i;
4908
4909 data = RLC_SAFE_MODE__CMD_MASK;
4910 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4911 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4912
4913 /* wait for RLC_SAFE_MODE */
4914 for (i = 0; i < adev->usec_timeout; i++) {
4915 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4916 break;
4917 udelay(1);
4918 }
4919 }
4920
gfx_v9_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)4921 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4922 {
4923 uint32_t data;
4924
4925 data = RLC_SAFE_MODE__CMD_MASK;
4926 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4927 }
4928
gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)4929 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4930 bool enable)
4931 {
4932 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4933
4934 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4935 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4936 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4937 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4938 } else {
4939 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4940 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4941 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4942 }
4943
4944 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4945 }
4946
gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device * adev,bool enable)4947 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4948 bool enable)
4949 {
4950 /* TODO: double check if we need to perform under safe mode */
4951 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4952
4953 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4954 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4955 else
4956 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4957
4958 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4959 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4960 else
4961 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4962
4963 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4964 }
4965
gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)4966 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4967 bool enable)
4968 {
4969 uint32_t data, def;
4970
4971 /* It is disabled by HW by default */
4972 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4973 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4974 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4975
4976 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4977 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4978
4979 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4980 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4981 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4982
4983 /* only for Vega10 & Raven1 */
4984 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4985
4986 if (def != data)
4987 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4988
4989 /* MGLS is a global flag to control all MGLS in GFX */
4990 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4991 /* 2 - RLC memory Light sleep */
4992 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4993 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4994 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4995 if (def != data)
4996 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4997 }
4998 /* 3 - CP memory Light sleep */
4999 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5000 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5001 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5002 if (def != data)
5003 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5004 }
5005 }
5006 } else {
5007 /* 1 - MGCG_OVERRIDE */
5008 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5009
5010 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
5011 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5012
5013 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5014 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5015 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5016 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5017
5018 if (def != data)
5019 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5020
5021 /* 2 - disable MGLS in RLC */
5022 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5023 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5024 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5025 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5026 }
5027
5028 /* 3 - disable MGLS in CP */
5029 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5030 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5031 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5032 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5033 }
5034 }
5035 }
5036
gfx_v9_0_update_3d_clock_gating(struct amdgpu_device * adev,bool enable)5037 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5038 bool enable)
5039 {
5040 uint32_t data, def;
5041
5042 if (!adev->gfx.num_gfx_rings)
5043 return;
5044
5045 /* Enable 3D CGCG/CGLS */
5046 if (enable) {
5047 /* write cmd to clear cgcg/cgls ov */
5048 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5049 /* unset CGCG override */
5050 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5051 /* update CGCG and CGLS override bits */
5052 if (def != data)
5053 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5054
5055 /* enable 3Dcgcg FSM(0x0000363f) */
5056 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5057
5058 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5059 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5060 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5061 else
5062 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5063
5064 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5065 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5066 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5067 if (def != data)
5068 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5069
5070 /* set IDLE_POLL_COUNT(0x00900100) */
5071 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5072 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5073 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5074 if (def != data)
5075 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5076 } else {
5077 /* Disable CGCG/CGLS */
5078 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5079 /* disable cgcg, cgls should be disabled */
5080 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5081 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5082 /* disable cgcg and cgls in FSM */
5083 if (def != data)
5084 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5085 }
5086 }
5087
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5088 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5089 bool enable)
5090 {
5091 uint32_t def, data;
5092
5093 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5094 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5095 /* unset CGCG override */
5096 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5097 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5098 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5099 else
5100 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5101 /* update CGCG and CGLS override bits */
5102 if (def != data)
5103 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5104
5105 /* enable cgcg FSM(0x0000363F) */
5106 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5107
5108 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5109 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5110 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5111 else
5112 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5113 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5114 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5115 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5116 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5117 if (def != data)
5118 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5119
5120 /* set IDLE_POLL_COUNT(0x00900100) */
5121 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5122 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5123 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5124 if (def != data)
5125 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5126 } else {
5127 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5128 /* reset CGCG/CGLS bits */
5129 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5130 /* disable cgcg and cgls in FSM */
5131 if (def != data)
5132 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5133 }
5134 }
5135
gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5136 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5137 bool enable)
5138 {
5139 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5140 if (enable) {
5141 /* CGCG/CGLS should be enabled after MGCG/MGLS
5142 * === MGCG + MGLS ===
5143 */
5144 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5145 /* === CGCG /CGLS for GFX 3D Only === */
5146 gfx_v9_0_update_3d_clock_gating(adev, enable);
5147 /* === CGCG + CGLS === */
5148 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5149 } else {
5150 /* CGCG/CGLS should be disabled before MGCG/MGLS
5151 * === CGCG + CGLS ===
5152 */
5153 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5154 /* === CGCG /CGLS for GFX 3D Only === */
5155 gfx_v9_0_update_3d_clock_gating(adev, enable);
5156 /* === MGCG + MGLS === */
5157 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5158 }
5159 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5160 return 0;
5161 }
5162
gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device * adev,unsigned int vmid)5163 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5164 unsigned int vmid)
5165 {
5166 u32 reg, data;
5167
5168 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5169 if (amdgpu_sriov_is_pp_one_vf(adev))
5170 data = RREG32_NO_KIQ(reg);
5171 else
5172 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5173
5174 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5175 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5176
5177 if (amdgpu_sriov_is_pp_one_vf(adev))
5178 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5179 else
5180 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5181 }
5182
gfx_v9_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned int vmid)5183 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5184 {
5185 amdgpu_gfx_off_ctrl(adev, false);
5186
5187 gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5188
5189 amdgpu_gfx_off_ctrl(adev, true);
5190 }
5191
gfx_v9_0_check_rlcg_range(struct amdgpu_device * adev,uint32_t offset,struct soc15_reg_rlcg * entries,int arr_size)5192 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5193 uint32_t offset,
5194 struct soc15_reg_rlcg *entries, int arr_size)
5195 {
5196 int i;
5197 uint32_t reg;
5198
5199 if (!entries)
5200 return false;
5201
5202 for (i = 0; i < arr_size; i++) {
5203 const struct soc15_reg_rlcg *entry;
5204
5205 entry = &entries[i];
5206 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5207 if (offset == reg)
5208 return true;
5209 }
5210
5211 return false;
5212 }
5213
gfx_v9_0_is_rlcg_access_range(struct amdgpu_device * adev,u32 offset)5214 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5215 {
5216 return gfx_v9_0_check_rlcg_range(adev, offset,
5217 (void *)rlcg_access_gc_9_0,
5218 ARRAY_SIZE(rlcg_access_gc_9_0));
5219 }
5220
5221 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5222 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5223 .set_safe_mode = gfx_v9_0_set_safe_mode,
5224 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5225 .init = gfx_v9_0_rlc_init,
5226 .get_csb_size = gfx_v9_0_get_csb_size,
5227 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5228 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5229 .resume = gfx_v9_0_rlc_resume,
5230 .stop = gfx_v9_0_rlc_stop,
5231 .reset = gfx_v9_0_rlc_reset,
5232 .start = gfx_v9_0_rlc_start,
5233 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5234 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5235 };
5236
gfx_v9_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)5237 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5238 enum amd_powergating_state state)
5239 {
5240 struct amdgpu_device *adev = ip_block->adev;
5241 bool enable = (state == AMD_PG_STATE_GATE);
5242
5243 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5244 case IP_VERSION(9, 2, 2):
5245 case IP_VERSION(9, 1, 0):
5246 case IP_VERSION(9, 3, 0):
5247 if (!enable)
5248 amdgpu_gfx_off_ctrl_immediate(adev, false);
5249
5250 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5251 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5252 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5253 } else {
5254 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5255 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5256 }
5257
5258 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5259 gfx_v9_0_enable_cp_power_gating(adev, true);
5260 else
5261 gfx_v9_0_enable_cp_power_gating(adev, false);
5262
5263 /* update gfx cgpg state */
5264 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5265
5266 /* update mgcg state */
5267 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5268
5269 if (enable)
5270 amdgpu_gfx_off_ctrl_immediate(adev, true);
5271 break;
5272 case IP_VERSION(9, 2, 1):
5273 amdgpu_gfx_off_ctrl_immediate(adev, enable);
5274 break;
5275 default:
5276 break;
5277 }
5278
5279 return 0;
5280 }
5281
gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)5282 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5283 enum amd_clockgating_state state)
5284 {
5285 struct amdgpu_device *adev = ip_block->adev;
5286
5287 if (amdgpu_sriov_vf(adev))
5288 return 0;
5289
5290 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5291 case IP_VERSION(9, 0, 1):
5292 case IP_VERSION(9, 2, 1):
5293 case IP_VERSION(9, 4, 0):
5294 case IP_VERSION(9, 2, 2):
5295 case IP_VERSION(9, 1, 0):
5296 case IP_VERSION(9, 4, 1):
5297 case IP_VERSION(9, 3, 0):
5298 case IP_VERSION(9, 4, 2):
5299 gfx_v9_0_update_gfx_clock_gating(adev,
5300 state == AMD_CG_STATE_GATE);
5301 break;
5302 default:
5303 break;
5304 }
5305 return 0;
5306 }
5307
gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block * ip_block,u64 * flags)5308 static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5309 {
5310 struct amdgpu_device *adev = ip_block->adev;
5311 int data;
5312
5313 if (amdgpu_sriov_vf(adev))
5314 *flags = 0;
5315
5316 /* AMD_CG_SUPPORT_GFX_MGCG */
5317 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5318 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5319 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5320
5321 /* AMD_CG_SUPPORT_GFX_CGCG */
5322 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5323 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5324 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5325
5326 /* AMD_CG_SUPPORT_GFX_CGLS */
5327 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5328 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5329
5330 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5331 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5332 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5333 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5334
5335 /* AMD_CG_SUPPORT_GFX_CP_LS */
5336 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5337 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5338 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5339
5340 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5341 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5342 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5343 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5344 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5345
5346 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5347 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5348 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5349 }
5350 }
5351
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5352 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5353 {
5354 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5355 }
5356
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5357 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5358 {
5359 struct amdgpu_device *adev = ring->adev;
5360 u64 wptr;
5361
5362 /* XXX check if swapping is necessary on BE */
5363 if (ring->use_doorbell) {
5364 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5365 } else {
5366 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5367 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5368 }
5369
5370 return wptr;
5371 }
5372
gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5373 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5374 {
5375 struct amdgpu_device *adev = ring->adev;
5376
5377 if (ring->use_doorbell) {
5378 /* XXX check if swapping is necessary on BE */
5379 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5380 WDOORBELL64(ring->doorbell_index, ring->wptr);
5381 } else {
5382 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5383 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5384 }
5385 }
5386
gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5387 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5388 {
5389 struct amdgpu_device *adev = ring->adev;
5390 u32 ref_and_mask, reg_mem_engine;
5391 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5392
5393 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5394 switch (ring->me) {
5395 case 1:
5396 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5397 break;
5398 case 2:
5399 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5400 break;
5401 default:
5402 return;
5403 }
5404 reg_mem_engine = 0;
5405 } else {
5406 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5407 reg_mem_engine = 1; /* pfp */
5408 }
5409
5410 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5411 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5412 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5413 ref_and_mask, ref_and_mask, 0x20);
5414 }
5415
gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5416 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5417 struct amdgpu_job *job,
5418 struct amdgpu_ib *ib,
5419 uint32_t flags)
5420 {
5421 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5422 u32 header, control = 0;
5423
5424 if (ib->flags & AMDGPU_IB_FLAG_CE)
5425 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5426 else
5427 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5428
5429 control |= ib->length_dw | (vmid << 24);
5430
5431 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5432 control |= INDIRECT_BUFFER_PRE_ENB(1);
5433
5434 if (flags & AMDGPU_IB_PREEMPTED)
5435 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5436
5437 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5438 gfx_v9_0_ring_emit_de_meta(ring,
5439 (!amdgpu_sriov_vf(ring->adev) &&
5440 flags & AMDGPU_IB_PREEMPTED) ?
5441 true : false,
5442 job->gds_size > 0 && job->gds_base != 0);
5443 }
5444
5445 amdgpu_ring_write(ring, header);
5446 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5447 amdgpu_ring_write(ring,
5448 #ifdef __BIG_ENDIAN
5449 (2 << 0) |
5450 #endif
5451 lower_32_bits(ib->gpu_addr));
5452 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5453 amdgpu_ring_ib_on_emit_cntl(ring);
5454 amdgpu_ring_write(ring, control);
5455 }
5456
gfx_v9_0_ring_patch_cntl(struct amdgpu_ring * ring,unsigned offset)5457 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5458 unsigned offset)
5459 {
5460 u32 control = ring->ring[offset];
5461
5462 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5463 ring->ring[offset] = control;
5464 }
5465
gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring * ring,unsigned offset)5466 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5467 unsigned offset)
5468 {
5469 struct amdgpu_device *adev = ring->adev;
5470 void *ce_payload_cpu_addr;
5471 uint64_t payload_offset, payload_size;
5472
5473 payload_size = sizeof(struct v9_ce_ib_state);
5474
5475 if (ring->is_mes_queue) {
5476 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5477 gfx[0].gfx_meta_data) +
5478 offsetof(struct v9_gfx_meta_data, ce_payload);
5479 ce_payload_cpu_addr =
5480 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5481 } else {
5482 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5483 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5484 }
5485
5486 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5487 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5488 } else {
5489 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5490 (ring->buf_mask + 1 - offset) << 2);
5491 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5492 memcpy((void *)&ring->ring[0],
5493 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5494 payload_size);
5495 }
5496 }
5497
gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring * ring,unsigned offset)5498 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5499 unsigned offset)
5500 {
5501 struct amdgpu_device *adev = ring->adev;
5502 void *de_payload_cpu_addr;
5503 uint64_t payload_offset, payload_size;
5504
5505 payload_size = sizeof(struct v9_de_ib_state);
5506
5507 if (ring->is_mes_queue) {
5508 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5509 gfx[0].gfx_meta_data) +
5510 offsetof(struct v9_gfx_meta_data, de_payload);
5511 de_payload_cpu_addr =
5512 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5513 } else {
5514 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5515 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5516 }
5517
5518 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5519 IB_COMPLETION_STATUS_PREEMPTED;
5520
5521 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5522 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5523 } else {
5524 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5525 (ring->buf_mask + 1 - offset) << 2);
5526 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5527 memcpy((void *)&ring->ring[0],
5528 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5529 payload_size);
5530 }
5531 }
5532
gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5533 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5534 struct amdgpu_job *job,
5535 struct amdgpu_ib *ib,
5536 uint32_t flags)
5537 {
5538 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5539 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5540
5541 /* Currently, there is a high possibility to get wave ID mismatch
5542 * between ME and GDS, leading to a hw deadlock, because ME generates
5543 * different wave IDs than the GDS expects. This situation happens
5544 * randomly when at least 5 compute pipes use GDS ordered append.
5545 * The wave IDs generated by ME are also wrong after suspend/resume.
5546 * Those are probably bugs somewhere else in the kernel driver.
5547 *
5548 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5549 * GDS to 0 for this ring (me/pipe).
5550 */
5551 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5552 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5553 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5554 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5555 }
5556
5557 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5558 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5559 amdgpu_ring_write(ring,
5560 #ifdef __BIG_ENDIAN
5561 (2 << 0) |
5562 #endif
5563 lower_32_bits(ib->gpu_addr));
5564 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5565 amdgpu_ring_write(ring, control);
5566 }
5567
gfx_v9_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5568 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5569 u64 seq, unsigned flags)
5570 {
5571 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5572 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5573 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5574 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5575 uint32_t dw2 = 0;
5576
5577 /* RELEASE_MEM - flush caches, send int */
5578 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5579
5580 if (writeback) {
5581 dw2 = EOP_TC_NC_ACTION_EN;
5582 } else {
5583 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5584 EOP_TC_MD_ACTION_EN;
5585 }
5586 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5587 EVENT_INDEX(5);
5588 if (exec)
5589 dw2 |= EOP_EXEC;
5590
5591 amdgpu_ring_write(ring, dw2);
5592 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5593
5594 /*
5595 * the address should be Qword aligned if 64bit write, Dword
5596 * aligned if only send 32bit data low (discard data high)
5597 */
5598 if (write64bit)
5599 BUG_ON(addr & 0x7);
5600 else
5601 BUG_ON(addr & 0x3);
5602 amdgpu_ring_write(ring, lower_32_bits(addr));
5603 amdgpu_ring_write(ring, upper_32_bits(addr));
5604 amdgpu_ring_write(ring, lower_32_bits(seq));
5605 amdgpu_ring_write(ring, upper_32_bits(seq));
5606 amdgpu_ring_write(ring, 0);
5607 }
5608
gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5609 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5610 {
5611 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5612 uint32_t seq = ring->fence_drv.sync_seq;
5613 uint64_t addr = ring->fence_drv.gpu_addr;
5614
5615 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5616 lower_32_bits(addr), upper_32_bits(addr),
5617 seq, 0xffffffff, 4);
5618 }
5619
gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5620 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5621 unsigned vmid, uint64_t pd_addr)
5622 {
5623 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5624
5625 /* compute doesn't have PFP */
5626 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5627 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5628 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5629 amdgpu_ring_write(ring, 0x0);
5630 }
5631 }
5632
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5633 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5634 {
5635 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5636 }
5637
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5638 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5639 {
5640 u64 wptr;
5641
5642 /* XXX check if swapping is necessary on BE */
5643 if (ring->use_doorbell)
5644 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5645 else
5646 BUG();
5647 return wptr;
5648 }
5649
gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5650 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5651 {
5652 struct amdgpu_device *adev = ring->adev;
5653
5654 /* XXX check if swapping is necessary on BE */
5655 if (ring->use_doorbell) {
5656 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5657 WDOORBELL64(ring->doorbell_index, ring->wptr);
5658 } else{
5659 BUG(); /* only DOORBELL method supported on gfx9 now */
5660 }
5661 }
5662
gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5663 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5664 u64 seq, unsigned int flags)
5665 {
5666 struct amdgpu_device *adev = ring->adev;
5667
5668 /* we only allocate 32bit for each seq wb address */
5669 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5670
5671 /* write fence seq to the "addr" */
5672 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5673 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5674 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5675 amdgpu_ring_write(ring, lower_32_bits(addr));
5676 amdgpu_ring_write(ring, upper_32_bits(addr));
5677 amdgpu_ring_write(ring, lower_32_bits(seq));
5678
5679 if (flags & AMDGPU_FENCE_FLAG_INT) {
5680 /* set register to trigger INT */
5681 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5682 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5683 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5684 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5685 amdgpu_ring_write(ring, 0);
5686 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5687 }
5688 }
5689
gfx_v9_ring_emit_sb(struct amdgpu_ring * ring)5690 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5691 {
5692 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5693 amdgpu_ring_write(ring, 0);
5694 }
5695
gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring * ring,bool resume)5696 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5697 {
5698 struct amdgpu_device *adev = ring->adev;
5699 struct v9_ce_ib_state ce_payload = {0};
5700 uint64_t offset, ce_payload_gpu_addr;
5701 void *ce_payload_cpu_addr;
5702 int cnt;
5703
5704 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5705
5706 if (ring->is_mes_queue) {
5707 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5708 gfx[0].gfx_meta_data) +
5709 offsetof(struct v9_gfx_meta_data, ce_payload);
5710 ce_payload_gpu_addr =
5711 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5712 ce_payload_cpu_addr =
5713 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5714 } else {
5715 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5716 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5717 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5718 }
5719
5720 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5721 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5722 WRITE_DATA_DST_SEL(8) |
5723 WR_CONFIRM) |
5724 WRITE_DATA_CACHE_POLICY(0));
5725 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5726 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5727
5728 amdgpu_ring_ib_on_emit_ce(ring);
5729
5730 if (resume)
5731 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5732 sizeof(ce_payload) >> 2);
5733 else
5734 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5735 sizeof(ce_payload) >> 2);
5736 }
5737
gfx_v9_0_ring_preempt_ib(struct amdgpu_ring * ring)5738 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5739 {
5740 int i, r = 0;
5741 struct amdgpu_device *adev = ring->adev;
5742 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5743 struct amdgpu_ring *kiq_ring = &kiq->ring;
5744 unsigned long flags;
5745
5746 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5747 return -EINVAL;
5748
5749 spin_lock_irqsave(&kiq->ring_lock, flags);
5750
5751 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5752 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5753 return -ENOMEM;
5754 }
5755
5756 /* assert preemption condition */
5757 amdgpu_ring_set_preempt_cond_exec(ring, false);
5758
5759 ring->trail_seq += 1;
5760 amdgpu_ring_alloc(ring, 13);
5761 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5762 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5763
5764 /* assert IB preemption, emit the trailing fence */
5765 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5766 ring->trail_fence_gpu_addr,
5767 ring->trail_seq);
5768
5769 amdgpu_ring_commit(kiq_ring);
5770 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5771
5772 /* poll the trailing fence */
5773 for (i = 0; i < adev->usec_timeout; i++) {
5774 if (ring->trail_seq ==
5775 le32_to_cpu(*ring->trail_fence_cpu_addr))
5776 break;
5777 udelay(1);
5778 }
5779
5780 if (i >= adev->usec_timeout) {
5781 r = -EINVAL;
5782 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5783 }
5784
5785 /*reset the CP_VMID_PREEMPT after trailing fence*/
5786 amdgpu_ring_emit_wreg(ring,
5787 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5788 0x0);
5789 amdgpu_ring_commit(ring);
5790
5791 /* deassert preemption condition */
5792 amdgpu_ring_set_preempt_cond_exec(ring, true);
5793 return r;
5794 }
5795
gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume,bool usegds)5796 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5797 {
5798 struct amdgpu_device *adev = ring->adev;
5799 struct v9_de_ib_state de_payload = {0};
5800 uint64_t offset, gds_addr, de_payload_gpu_addr;
5801 void *de_payload_cpu_addr;
5802 int cnt;
5803
5804 if (ring->is_mes_queue) {
5805 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5806 gfx[0].gfx_meta_data) +
5807 offsetof(struct v9_gfx_meta_data, de_payload);
5808 de_payload_gpu_addr =
5809 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5810 de_payload_cpu_addr =
5811 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5812
5813 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5814 gfx[0].gds_backup) +
5815 offsetof(struct v9_gfx_meta_data, de_payload);
5816 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5817 } else {
5818 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5819 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5820 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5821
5822 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5823 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5824 PAGE_SIZE);
5825 }
5826
5827 if (usegds) {
5828 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5829 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5830 }
5831
5832 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5833 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5834 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5835 WRITE_DATA_DST_SEL(8) |
5836 WR_CONFIRM) |
5837 WRITE_DATA_CACHE_POLICY(0));
5838 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5839 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5840
5841 amdgpu_ring_ib_on_emit_de(ring);
5842 if (resume)
5843 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5844 sizeof(de_payload) >> 2);
5845 else
5846 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5847 sizeof(de_payload) >> 2);
5848 }
5849
gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)5850 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5851 bool secure)
5852 {
5853 uint32_t v = secure ? FRAME_TMZ : 0;
5854
5855 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5856 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5857 }
5858
gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5859 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5860 {
5861 uint32_t dw2 = 0;
5862
5863 gfx_v9_0_ring_emit_ce_meta(ring,
5864 (!amdgpu_sriov_vf(ring->adev) &&
5865 flags & AMDGPU_IB_PREEMPTED) ? true : false);
5866
5867 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5868 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5869 /* set load_global_config & load_global_uconfig */
5870 dw2 |= 0x8001;
5871 /* set load_cs_sh_regs */
5872 dw2 |= 0x01000000;
5873 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5874 dw2 |= 0x10002;
5875
5876 /* set load_ce_ram if preamble presented */
5877 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5878 dw2 |= 0x10000000;
5879 } else {
5880 /* still load_ce_ram if this is the first time preamble presented
5881 * although there is no context switch happens.
5882 */
5883 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5884 dw2 |= 0x10000000;
5885 }
5886
5887 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5888 amdgpu_ring_write(ring, dw2);
5889 amdgpu_ring_write(ring, 0);
5890 }
5891
gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)5892 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5893 uint64_t addr)
5894 {
5895 unsigned ret;
5896 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5897 amdgpu_ring_write(ring, lower_32_bits(addr));
5898 amdgpu_ring_write(ring, upper_32_bits(addr));
5899 /* discard following DWs if *cond_exec_gpu_addr==0 */
5900 amdgpu_ring_write(ring, 0);
5901 ret = ring->wptr & ring->buf_mask;
5902 /* patch dummy value later */
5903 amdgpu_ring_write(ring, 0);
5904 return ret;
5905 }
5906
gfx_v9_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)5907 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5908 uint32_t reg_val_offs)
5909 {
5910 struct amdgpu_device *adev = ring->adev;
5911
5912 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5913 amdgpu_ring_write(ring, 0 | /* src: register*/
5914 (5 << 8) | /* dst: memory */
5915 (1 << 20)); /* write confirm */
5916 amdgpu_ring_write(ring, reg);
5917 amdgpu_ring_write(ring, 0);
5918 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5919 reg_val_offs * 4));
5920 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5921 reg_val_offs * 4));
5922 }
5923
gfx_v9_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)5924 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5925 uint32_t val)
5926 {
5927 uint32_t cmd = 0;
5928
5929 switch (ring->funcs->type) {
5930 case AMDGPU_RING_TYPE_GFX:
5931 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5932 break;
5933 case AMDGPU_RING_TYPE_KIQ:
5934 cmd = (1 << 16); /* no inc addr */
5935 break;
5936 default:
5937 cmd = WR_CONFIRM;
5938 break;
5939 }
5940 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5941 amdgpu_ring_write(ring, cmd);
5942 amdgpu_ring_write(ring, reg);
5943 amdgpu_ring_write(ring, 0);
5944 amdgpu_ring_write(ring, val);
5945 }
5946
gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)5947 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5948 uint32_t val, uint32_t mask)
5949 {
5950 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5951 }
5952
gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)5953 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5954 uint32_t reg0, uint32_t reg1,
5955 uint32_t ref, uint32_t mask)
5956 {
5957 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5958 struct amdgpu_device *adev = ring->adev;
5959 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5960 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5961
5962 if (fw_version_ok)
5963 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5964 ref, mask, 0x20);
5965 else
5966 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5967 ref, mask);
5968 }
5969
gfx_v9_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)5970 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5971 {
5972 struct amdgpu_device *adev = ring->adev;
5973 uint32_t value = 0;
5974
5975 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5976 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5977 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5978 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5979 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5980 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5981 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5982 }
5983
gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)5984 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5985 enum amdgpu_interrupt_state state)
5986 {
5987 switch (state) {
5988 case AMDGPU_IRQ_STATE_DISABLE:
5989 case AMDGPU_IRQ_STATE_ENABLE:
5990 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5991 TIME_STAMP_INT_ENABLE,
5992 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5993 break;
5994 default:
5995 break;
5996 }
5997 }
5998
gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)5999 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6000 int me, int pipe,
6001 enum amdgpu_interrupt_state state)
6002 {
6003 u32 mec_int_cntl, mec_int_cntl_reg;
6004
6005 /*
6006 * amdgpu controls only the first MEC. That's why this function only
6007 * handles the setting of interrupts for this specific MEC. All other
6008 * pipes' interrupts are set by amdkfd.
6009 */
6010
6011 if (me == 1) {
6012 switch (pipe) {
6013 case 0:
6014 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6015 break;
6016 case 1:
6017 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6018 break;
6019 case 2:
6020 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6021 break;
6022 case 3:
6023 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6024 break;
6025 default:
6026 DRM_DEBUG("invalid pipe %d\n", pipe);
6027 return;
6028 }
6029 } else {
6030 DRM_DEBUG("invalid me %d\n", me);
6031 return;
6032 }
6033
6034 switch (state) {
6035 case AMDGPU_IRQ_STATE_DISABLE:
6036 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6037 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6038 TIME_STAMP_INT_ENABLE, 0);
6039 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6040 break;
6041 case AMDGPU_IRQ_STATE_ENABLE:
6042 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6043 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6044 TIME_STAMP_INT_ENABLE, 1);
6045 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6046 break;
6047 default:
6048 break;
6049 }
6050 }
6051
gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device * adev,int me,int pipe)6052 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6053 int me, int pipe)
6054 {
6055 /*
6056 * amdgpu controls only the first MEC. That's why this function only
6057 * handles the setting of interrupts for this specific MEC. All other
6058 * pipes' interrupts are set by amdkfd.
6059 */
6060 if (me != 1)
6061 return 0;
6062
6063 switch (pipe) {
6064 case 0:
6065 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6066 case 1:
6067 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6068 case 2:
6069 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6070 case 3:
6071 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6072 default:
6073 return 0;
6074 }
6075 }
6076
gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6077 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6078 struct amdgpu_irq_src *source,
6079 unsigned type,
6080 enum amdgpu_interrupt_state state)
6081 {
6082 u32 cp_int_cntl_reg, cp_int_cntl;
6083 int i, j;
6084
6085 switch (state) {
6086 case AMDGPU_IRQ_STATE_DISABLE:
6087 case AMDGPU_IRQ_STATE_ENABLE:
6088 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6089 PRIV_REG_INT_ENABLE,
6090 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6091 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6092 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6093 /* MECs start at 1 */
6094 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6095
6096 if (cp_int_cntl_reg) {
6097 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6098 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6099 PRIV_REG_INT_ENABLE,
6100 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6101 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6102 }
6103 }
6104 }
6105 break;
6106 default:
6107 break;
6108 }
6109
6110 return 0;
6111 }
6112
gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6113 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6114 struct amdgpu_irq_src *source,
6115 unsigned type,
6116 enum amdgpu_interrupt_state state)
6117 {
6118 u32 cp_int_cntl_reg, cp_int_cntl;
6119 int i, j;
6120
6121 switch (state) {
6122 case AMDGPU_IRQ_STATE_DISABLE:
6123 case AMDGPU_IRQ_STATE_ENABLE:
6124 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6125 OPCODE_ERROR_INT_ENABLE,
6126 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6127 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6128 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6129 /* MECs start at 1 */
6130 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6131
6132 if (cp_int_cntl_reg) {
6133 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6134 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6135 OPCODE_ERROR_INT_ENABLE,
6136 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6137 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6138 }
6139 }
6140 }
6141 break;
6142 default:
6143 break;
6144 }
6145
6146 return 0;
6147 }
6148
gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6149 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6150 struct amdgpu_irq_src *source,
6151 unsigned type,
6152 enum amdgpu_interrupt_state state)
6153 {
6154 switch (state) {
6155 case AMDGPU_IRQ_STATE_DISABLE:
6156 case AMDGPU_IRQ_STATE_ENABLE:
6157 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6158 PRIV_INSTR_INT_ENABLE,
6159 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6160 break;
6161 default:
6162 break;
6163 }
6164
6165 return 0;
6166 }
6167
6168 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
6169 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6170 CP_ECC_ERROR_INT_ENABLE, 1)
6171
6172 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
6173 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6174 CP_ECC_ERROR_INT_ENABLE, 0)
6175
gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6176 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6177 struct amdgpu_irq_src *source,
6178 unsigned type,
6179 enum amdgpu_interrupt_state state)
6180 {
6181 switch (state) {
6182 case AMDGPU_IRQ_STATE_DISABLE:
6183 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6184 CP_ECC_ERROR_INT_ENABLE, 0);
6185 DISABLE_ECC_ON_ME_PIPE(1, 0);
6186 DISABLE_ECC_ON_ME_PIPE(1, 1);
6187 DISABLE_ECC_ON_ME_PIPE(1, 2);
6188 DISABLE_ECC_ON_ME_PIPE(1, 3);
6189 break;
6190
6191 case AMDGPU_IRQ_STATE_ENABLE:
6192 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6193 CP_ECC_ERROR_INT_ENABLE, 1);
6194 ENABLE_ECC_ON_ME_PIPE(1, 0);
6195 ENABLE_ECC_ON_ME_PIPE(1, 1);
6196 ENABLE_ECC_ON_ME_PIPE(1, 2);
6197 ENABLE_ECC_ON_ME_PIPE(1, 3);
6198 break;
6199 default:
6200 break;
6201 }
6202
6203 return 0;
6204 }
6205
6206
gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6207 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6208 struct amdgpu_irq_src *src,
6209 unsigned type,
6210 enum amdgpu_interrupt_state state)
6211 {
6212 switch (type) {
6213 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6214 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6215 break;
6216 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6217 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6218 break;
6219 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6220 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6221 break;
6222 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6223 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6224 break;
6225 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6226 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6227 break;
6228 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6229 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6230 break;
6231 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6232 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6233 break;
6234 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6235 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6236 break;
6237 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6238 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6239 break;
6240 default:
6241 break;
6242 }
6243 return 0;
6244 }
6245
gfx_v9_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6246 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6247 struct amdgpu_irq_src *source,
6248 struct amdgpu_iv_entry *entry)
6249 {
6250 int i;
6251 u8 me_id, pipe_id, queue_id;
6252 struct amdgpu_ring *ring;
6253
6254 DRM_DEBUG("IH: CP EOP\n");
6255 me_id = (entry->ring_id & 0x0c) >> 2;
6256 pipe_id = (entry->ring_id & 0x03) >> 0;
6257 queue_id = (entry->ring_id & 0x70) >> 4;
6258
6259 switch (me_id) {
6260 case 0:
6261 if (adev->gfx.num_gfx_rings) {
6262 if (!adev->gfx.mcbp) {
6263 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6264 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6265 /* Fence signals are handled on the software rings*/
6266 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6267 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6268 }
6269 }
6270 break;
6271 case 1:
6272 case 2:
6273 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6274 ring = &adev->gfx.compute_ring[i];
6275 /* Per-queue interrupt is supported for MEC starting from VI.
6276 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6277 */
6278 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6279 amdgpu_fence_process(ring);
6280 }
6281 break;
6282 }
6283 return 0;
6284 }
6285
gfx_v9_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6286 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6287 struct amdgpu_iv_entry *entry)
6288 {
6289 u8 me_id, pipe_id, queue_id;
6290 struct amdgpu_ring *ring;
6291 int i;
6292
6293 me_id = (entry->ring_id & 0x0c) >> 2;
6294 pipe_id = (entry->ring_id & 0x03) >> 0;
6295 queue_id = (entry->ring_id & 0x70) >> 4;
6296
6297 switch (me_id) {
6298 case 0:
6299 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6300 break;
6301 case 1:
6302 case 2:
6303 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6304 ring = &adev->gfx.compute_ring[i];
6305 if (ring->me == me_id && ring->pipe == pipe_id &&
6306 ring->queue == queue_id)
6307 drm_sched_fault(&ring->sched);
6308 }
6309 break;
6310 }
6311 }
6312
gfx_v9_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6313 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6314 struct amdgpu_irq_src *source,
6315 struct amdgpu_iv_entry *entry)
6316 {
6317 DRM_ERROR("Illegal register access in command stream\n");
6318 gfx_v9_0_fault(adev, entry);
6319 return 0;
6320 }
6321
gfx_v9_0_bad_op_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6322 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6323 struct amdgpu_irq_src *source,
6324 struct amdgpu_iv_entry *entry)
6325 {
6326 DRM_ERROR("Illegal opcode in command stream\n");
6327 gfx_v9_0_fault(adev, entry);
6328 return 0;
6329 }
6330
gfx_v9_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6331 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6332 struct amdgpu_irq_src *source,
6333 struct amdgpu_iv_entry *entry)
6334 {
6335 DRM_ERROR("Illegal instruction in command stream\n");
6336 gfx_v9_0_fault(adev, entry);
6337 return 0;
6338 }
6339
6340
6341 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6342 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6343 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6344 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6345 },
6346 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6347 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6348 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6349 },
6350 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6351 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6352 0, 0
6353 },
6354 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6355 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6356 0, 0
6357 },
6358 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6359 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6360 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6361 },
6362 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6363 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6364 0, 0
6365 },
6366 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6367 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6368 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6369 },
6370 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6371 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6372 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6373 },
6374 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6375 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6376 0, 0
6377 },
6378 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6379 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6380 0, 0
6381 },
6382 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6383 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6384 0, 0
6385 },
6386 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6387 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6388 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6389 },
6390 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6391 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6392 0, 0
6393 },
6394 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6395 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6396 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6397 },
6398 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6399 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6400 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6401 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6402 },
6403 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6404 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6405 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6406 0, 0
6407 },
6408 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6409 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6410 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6411 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6412 },
6413 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6414 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6415 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6416 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6417 },
6418 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6419 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6420 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6421 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6422 },
6423 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6424 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6425 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6426 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6427 },
6428 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6429 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6430 0, 0
6431 },
6432 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6433 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6434 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6435 },
6436 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6437 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6438 0, 0
6439 },
6440 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6441 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6442 0, 0
6443 },
6444 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6445 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6446 0, 0
6447 },
6448 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6449 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6450 0, 0
6451 },
6452 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6453 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6454 0, 0
6455 },
6456 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6457 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6458 0, 0
6459 },
6460 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6461 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6462 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6463 },
6464 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6465 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6466 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6467 },
6468 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6469 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6470 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6471 },
6472 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6473 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6474 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6475 },
6476 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6477 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6478 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6479 },
6480 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6481 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6482 0, 0
6483 },
6484 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6485 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6486 0, 0
6487 },
6488 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6489 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6490 0, 0
6491 },
6492 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6493 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6494 0, 0
6495 },
6496 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6497 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6498 0, 0
6499 },
6500 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6501 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6502 0, 0
6503 },
6504 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6505 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6506 0, 0
6507 },
6508 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6509 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6510 0, 0
6511 },
6512 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6513 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6514 0, 0
6515 },
6516 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6517 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6518 0, 0
6519 },
6520 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6521 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6522 0, 0
6523 },
6524 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6525 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6526 0, 0
6527 },
6528 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6529 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6530 0, 0
6531 },
6532 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6533 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6534 0, 0
6535 },
6536 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6537 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6538 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6539 },
6540 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6541 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6542 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6543 },
6544 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6545 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6546 0, 0
6547 },
6548 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6549 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6550 0, 0
6551 },
6552 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6553 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6554 0, 0
6555 },
6556 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6557 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6558 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6559 },
6560 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6561 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6562 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6563 },
6564 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6565 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6566 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6567 },
6568 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6569 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6570 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6571 },
6572 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6573 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6574 0, 0
6575 },
6576 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6577 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6578 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6579 },
6580 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6581 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6582 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6583 },
6584 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6585 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6586 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6587 },
6588 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6589 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6590 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6591 },
6592 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6593 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6594 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6595 },
6596 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6597 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6598 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6599 },
6600 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6601 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6602 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6603 },
6604 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6605 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6606 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6607 },
6608 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6609 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6610 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6611 },
6612 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6613 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6614 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6615 },
6616 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6617 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6618 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6619 },
6620 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6621 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6622 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6623 },
6624 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6625 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6626 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6627 },
6628 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6629 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6630 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6631 },
6632 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6633 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6634 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6635 },
6636 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6637 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6638 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6639 },
6640 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6641 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6642 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6643 },
6644 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6645 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6646 0, 0
6647 },
6648 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6649 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6650 0, 0
6651 },
6652 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6653 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6654 0, 0
6655 },
6656 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6657 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6658 0, 0
6659 },
6660 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6661 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6662 0, 0
6663 },
6664 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6665 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6666 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6667 },
6668 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6669 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6670 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6671 },
6672 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6673 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6674 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6675 },
6676 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6677 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6678 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6679 },
6680 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6681 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6682 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6683 },
6684 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6685 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6686 0, 0
6687 },
6688 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6689 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6690 0, 0
6691 },
6692 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6693 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6694 0, 0
6695 },
6696 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6697 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6698 0, 0
6699 },
6700 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6701 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6702 0, 0
6703 },
6704 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6705 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6706 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6707 },
6708 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6709 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6710 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6711 },
6712 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6713 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6714 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6715 },
6716 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6717 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6718 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6719 },
6720 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6721 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6722 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6723 },
6724 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6725 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6726 0, 0
6727 },
6728 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6729 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6730 0, 0
6731 },
6732 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6733 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6734 0, 0
6735 },
6736 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6737 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6738 0, 0
6739 },
6740 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6741 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6742 0, 0
6743 },
6744 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6745 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6746 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6747 },
6748 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6749 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6750 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6751 },
6752 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6753 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6754 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6755 },
6756 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6757 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6758 0, 0
6759 },
6760 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6761 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6762 0, 0
6763 },
6764 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6765 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6766 0, 0
6767 },
6768 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6769 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6770 0, 0
6771 },
6772 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6773 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6774 0, 0
6775 },
6776 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6777 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6778 0, 0
6779 }
6780 };
6781
gfx_v9_0_ras_error_inject(struct amdgpu_device * adev,void * inject_if,uint32_t instance_mask)6782 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6783 void *inject_if, uint32_t instance_mask)
6784 {
6785 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6786 int ret;
6787 struct ta_ras_trigger_error_input block_info = { 0 };
6788
6789 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6790 return -EINVAL;
6791
6792 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6793 return -EINVAL;
6794
6795 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6796 return -EPERM;
6797
6798 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6799 info->head.type)) {
6800 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6801 ras_gfx_subblocks[info->head.sub_block_index].name,
6802 info->head.type);
6803 return -EPERM;
6804 }
6805
6806 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6807 info->head.type)) {
6808 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6809 ras_gfx_subblocks[info->head.sub_block_index].name,
6810 info->head.type);
6811 return -EPERM;
6812 }
6813
6814 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6815 block_info.sub_block_index =
6816 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6817 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6818 block_info.address = info->address;
6819 block_info.value = info->value;
6820
6821 mutex_lock(&adev->grbm_idx_mutex);
6822 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6823 mutex_unlock(&adev->grbm_idx_mutex);
6824
6825 return ret;
6826 }
6827
6828 static const char * const vml2_mems[] = {
6829 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6830 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6831 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6832 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6833 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6834 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6835 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6836 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6837 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6838 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6839 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6840 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6841 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6842 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6843 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6844 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6845 };
6846
6847 static const char * const vml2_walker_mems[] = {
6848 "UTC_VML2_CACHE_PDE0_MEM0",
6849 "UTC_VML2_CACHE_PDE0_MEM1",
6850 "UTC_VML2_CACHE_PDE1_MEM0",
6851 "UTC_VML2_CACHE_PDE1_MEM1",
6852 "UTC_VML2_CACHE_PDE2_MEM0",
6853 "UTC_VML2_CACHE_PDE2_MEM1",
6854 "UTC_VML2_RDIF_LOG_FIFO",
6855 };
6856
6857 static const char * const atc_l2_cache_2m_mems[] = {
6858 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6859 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6860 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6861 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6862 };
6863
6864 static const char *atc_l2_cache_4k_mems[] = {
6865 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6866 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6867 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6868 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6869 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6870 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6871 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6872 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6873 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6874 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6875 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6876 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6877 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6878 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6879 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6880 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6881 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6882 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6883 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6884 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6885 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6886 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6887 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6888 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6889 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6890 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6891 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6892 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6893 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6894 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6895 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6896 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6897 };
6898
gfx_v9_0_query_utc_edc_status(struct amdgpu_device * adev,struct ras_err_data * err_data)6899 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6900 struct ras_err_data *err_data)
6901 {
6902 uint32_t i, data;
6903 uint32_t sec_count, ded_count;
6904
6905 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6906 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6907 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6908 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6909 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6910 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6911 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6912 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6913
6914 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6915 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6916 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6917
6918 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6919 if (sec_count) {
6920 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6921 "SEC %d\n", i, vml2_mems[i], sec_count);
6922 err_data->ce_count += sec_count;
6923 }
6924
6925 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6926 if (ded_count) {
6927 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6928 "DED %d\n", i, vml2_mems[i], ded_count);
6929 err_data->ue_count += ded_count;
6930 }
6931 }
6932
6933 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6934 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6935 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6936
6937 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6938 SEC_COUNT);
6939 if (sec_count) {
6940 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6941 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6942 err_data->ce_count += sec_count;
6943 }
6944
6945 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6946 DED_COUNT);
6947 if (ded_count) {
6948 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6949 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6950 err_data->ue_count += ded_count;
6951 }
6952 }
6953
6954 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6955 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6956 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6957
6958 sec_count = (data & 0x00006000L) >> 0xd;
6959 if (sec_count) {
6960 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6961 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6962 sec_count);
6963 err_data->ce_count += sec_count;
6964 }
6965 }
6966
6967 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6968 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6969 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6970
6971 sec_count = (data & 0x00006000L) >> 0xd;
6972 if (sec_count) {
6973 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6974 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6975 sec_count);
6976 err_data->ce_count += sec_count;
6977 }
6978
6979 ded_count = (data & 0x00018000L) >> 0xf;
6980 if (ded_count) {
6981 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6982 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6983 ded_count);
6984 err_data->ue_count += ded_count;
6985 }
6986 }
6987
6988 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6989 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6990 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6991 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6992
6993 return 0;
6994 }
6995
gfx_v9_0_ras_error_count(struct amdgpu_device * adev,const struct soc15_reg_entry * reg,uint32_t se_id,uint32_t inst_id,uint32_t value,uint32_t * sec_count,uint32_t * ded_count)6996 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6997 const struct soc15_reg_entry *reg,
6998 uint32_t se_id, uint32_t inst_id, uint32_t value,
6999 uint32_t *sec_count, uint32_t *ded_count)
7000 {
7001 uint32_t i;
7002 uint32_t sec_cnt, ded_cnt;
7003
7004 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
7005 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
7006 gfx_v9_0_ras_fields[i].seg != reg->seg ||
7007 gfx_v9_0_ras_fields[i].inst != reg->inst)
7008 continue;
7009
7010 sec_cnt = (value &
7011 gfx_v9_0_ras_fields[i].sec_count_mask) >>
7012 gfx_v9_0_ras_fields[i].sec_count_shift;
7013 if (sec_cnt) {
7014 dev_info(adev->dev, "GFX SubBlock %s, "
7015 "Instance[%d][%d], SEC %d\n",
7016 gfx_v9_0_ras_fields[i].name,
7017 se_id, inst_id,
7018 sec_cnt);
7019 *sec_count += sec_cnt;
7020 }
7021
7022 ded_cnt = (value &
7023 gfx_v9_0_ras_fields[i].ded_count_mask) >>
7024 gfx_v9_0_ras_fields[i].ded_count_shift;
7025 if (ded_cnt) {
7026 dev_info(adev->dev, "GFX SubBlock %s, "
7027 "Instance[%d][%d], DED %d\n",
7028 gfx_v9_0_ras_fields[i].name,
7029 se_id, inst_id,
7030 ded_cnt);
7031 *ded_count += ded_cnt;
7032 }
7033 }
7034
7035 return 0;
7036 }
7037
gfx_v9_0_reset_ras_error_count(struct amdgpu_device * adev)7038 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7039 {
7040 int i, j, k;
7041
7042 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7043 return;
7044
7045 /* read back registers to clear the counters */
7046 mutex_lock(&adev->grbm_idx_mutex);
7047 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7048 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7049 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7050 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7051 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7052 }
7053 }
7054 }
7055 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7056 mutex_unlock(&adev->grbm_idx_mutex);
7057
7058 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7059 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7060 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7061 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7062 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7063 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7064 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7065 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7066
7067 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7068 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7069 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7070 }
7071
7072 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7073 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7074 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7075 }
7076
7077 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7078 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7079 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7080 }
7081
7082 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7083 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7084 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7085 }
7086
7087 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7088 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7089 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7090 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7091 }
7092
gfx_v9_0_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)7093 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7094 void *ras_error_status)
7095 {
7096 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7097 uint32_t sec_count = 0, ded_count = 0;
7098 uint32_t i, j, k;
7099 uint32_t reg_value;
7100
7101 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7102 return;
7103
7104 err_data->ue_count = 0;
7105 err_data->ce_count = 0;
7106
7107 mutex_lock(&adev->grbm_idx_mutex);
7108
7109 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7110 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7111 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7112 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7113 reg_value =
7114 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7115 if (reg_value)
7116 gfx_v9_0_ras_error_count(adev,
7117 &gfx_v9_0_edc_counter_regs[i],
7118 j, k, reg_value,
7119 &sec_count, &ded_count);
7120 }
7121 }
7122 }
7123
7124 err_data->ce_count += sec_count;
7125 err_data->ue_count += ded_count;
7126
7127 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7128 mutex_unlock(&adev->grbm_idx_mutex);
7129
7130 gfx_v9_0_query_utc_edc_status(adev, err_data);
7131 }
7132
gfx_v9_0_emit_mem_sync(struct amdgpu_ring * ring)7133 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7134 {
7135 const unsigned int cp_coher_cntl =
7136 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7137 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7138 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7139 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7140 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7141
7142 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7143 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7144 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7145 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
7146 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
7147 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7148 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
7149 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7150 }
7151
gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)7152 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7153 uint32_t pipe, bool enable)
7154 {
7155 struct amdgpu_device *adev = ring->adev;
7156 uint32_t val;
7157 uint32_t wcl_cs_reg;
7158
7159 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7160 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7161
7162 switch (pipe) {
7163 case 0:
7164 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7165 break;
7166 case 1:
7167 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7168 break;
7169 case 2:
7170 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7171 break;
7172 case 3:
7173 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7174 break;
7175 default:
7176 DRM_DEBUG("invalid pipe %d\n", pipe);
7177 return;
7178 }
7179
7180 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7181
7182 }
gfx_v9_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)7183 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7184 {
7185 struct amdgpu_device *adev = ring->adev;
7186 uint32_t val;
7187 int i;
7188
7189
7190 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7191 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7192 * around 25% of gpu resources.
7193 */
7194 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7195 amdgpu_ring_emit_wreg(ring,
7196 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7197 val);
7198
7199 /* Restrict waves for normal/low priority compute queues as well
7200 * to get best QoS for high priority compute jobs.
7201 *
7202 * amdgpu controls only 1st ME(0-3 CS pipes).
7203 */
7204 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7205 if (i != ring->pipe)
7206 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7207
7208 }
7209 }
7210
gfx_v9_ring_insert_nop(struct amdgpu_ring * ring,uint32_t num_nop)7211 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7212 {
7213 /* Header itself is a NOP packet */
7214 if (num_nop == 1) {
7215 amdgpu_ring_write(ring, ring->funcs->nop);
7216 return;
7217 }
7218
7219 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7220 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7221
7222 /* Header is at index 0, followed by num_nops - 1 NOP packet's */
7223 amdgpu_ring_insert_nop(ring, num_nop - 1);
7224 }
7225
gfx_v9_0_reset_kgq(struct amdgpu_ring * ring,unsigned int vmid)7226 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7227 {
7228 struct amdgpu_device *adev = ring->adev;
7229 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7230 struct amdgpu_ring *kiq_ring = &kiq->ring;
7231 unsigned long flags;
7232 u32 tmp;
7233 int r;
7234
7235 if (amdgpu_sriov_vf(adev))
7236 return -EINVAL;
7237
7238 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7239 return -EINVAL;
7240
7241 spin_lock_irqsave(&kiq->ring_lock, flags);
7242
7243 if (amdgpu_ring_alloc(kiq_ring, 5)) {
7244 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7245 return -ENOMEM;
7246 }
7247
7248 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7249 gfx_v9_0_ring_emit_wreg(kiq_ring,
7250 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7251 amdgpu_ring_commit(kiq_ring);
7252
7253 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7254
7255 r = amdgpu_ring_test_ring(kiq_ring);
7256 if (r)
7257 return r;
7258
7259 if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7260 return -ENOMEM;
7261 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7262 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7263 gfx_v9_0_ring_emit_reg_wait(ring,
7264 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7265 gfx_v9_0_ring_emit_wreg(ring,
7266 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7267
7268 return amdgpu_ring_test_ring(ring);
7269 }
7270
gfx_v9_0_reset_kcq(struct amdgpu_ring * ring,unsigned int vmid)7271 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7272 unsigned int vmid)
7273 {
7274 struct amdgpu_device *adev = ring->adev;
7275 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7276 struct amdgpu_ring *kiq_ring = &kiq->ring;
7277 unsigned long flags;
7278 int i, r;
7279
7280 if (amdgpu_sriov_vf(adev))
7281 return -EINVAL;
7282
7283 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7284 return -EINVAL;
7285
7286 spin_lock_irqsave(&kiq->ring_lock, flags);
7287
7288 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7289 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7290 return -ENOMEM;
7291 }
7292
7293 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7294 0, 0);
7295 amdgpu_ring_commit(kiq_ring);
7296
7297 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7298
7299 r = amdgpu_ring_test_ring(kiq_ring);
7300 if (r)
7301 return r;
7302
7303 /* make sure dequeue is complete*/
7304 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7305 mutex_lock(&adev->srbm_mutex);
7306 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7307 for (i = 0; i < adev->usec_timeout; i++) {
7308 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7309 break;
7310 udelay(1);
7311 }
7312 if (i >= adev->usec_timeout)
7313 r = -ETIMEDOUT;
7314 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7315 mutex_unlock(&adev->srbm_mutex);
7316 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7317 if (r) {
7318 dev_err(adev->dev, "fail to wait on hqd deactive\n");
7319 return r;
7320 }
7321
7322 r = amdgpu_bo_reserve(ring->mqd_obj, false);
7323 if (unlikely(r != 0)){
7324 dev_err(adev->dev, "fail to resv mqd_obj\n");
7325 return r;
7326 }
7327 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7328 if (!r) {
7329 r = gfx_v9_0_kcq_init_queue(ring, true);
7330 amdgpu_bo_kunmap(ring->mqd_obj);
7331 ring->mqd_ptr = NULL;
7332 }
7333 amdgpu_bo_unreserve(ring->mqd_obj);
7334 if (r) {
7335 dev_err(adev->dev, "fail to unresv mqd_obj\n");
7336 return r;
7337 }
7338 spin_lock_irqsave(&kiq->ring_lock, flags);
7339 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7340 if (r) {
7341 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7342 return -ENOMEM;
7343 }
7344 kiq->pmf->kiq_map_queues(kiq_ring, ring);
7345 amdgpu_ring_commit(kiq_ring);
7346 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7347 r = amdgpu_ring_test_ring(kiq_ring);
7348 if (r) {
7349 DRM_ERROR("fail to remap queue\n");
7350 return r;
7351 }
7352 return amdgpu_ring_test_ring(ring);
7353 }
7354
gfx_v9_ip_print(struct amdgpu_ip_block * ip_block,struct drm_printer * p)7355 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7356 {
7357 struct amdgpu_device *adev = ip_block->adev;
7358 uint32_t i, j, k, reg, index = 0;
7359 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7360
7361 if (!adev->gfx.ip_dump_core)
7362 return;
7363
7364 for (i = 0; i < reg_count; i++)
7365 drm_printf(p, "%-50s \t 0x%08x\n",
7366 gc_reg_list_9[i].reg_name,
7367 adev->gfx.ip_dump_core[i]);
7368
7369 /* print compute queue registers for all instances */
7370 if (!adev->gfx.ip_dump_compute_queues)
7371 return;
7372
7373 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7374 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7375 adev->gfx.mec.num_mec,
7376 adev->gfx.mec.num_pipe_per_mec,
7377 adev->gfx.mec.num_queue_per_pipe);
7378
7379 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7380 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7381 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7382 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7383 for (reg = 0; reg < reg_count; reg++) {
7384 drm_printf(p, "%-50s \t 0x%08x\n",
7385 gc_cp_reg_list_9[reg].reg_name,
7386 adev->gfx.ip_dump_compute_queues[index + reg]);
7387 }
7388 index += reg_count;
7389 }
7390 }
7391 }
7392
7393 }
7394
gfx_v9_ip_dump(struct amdgpu_ip_block * ip_block)7395 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7396 {
7397 struct amdgpu_device *adev = ip_block->adev;
7398 uint32_t i, j, k, reg, index = 0;
7399 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7400
7401 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7402 return;
7403
7404 amdgpu_gfx_off_ctrl(adev, false);
7405 for (i = 0; i < reg_count; i++)
7406 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7407 amdgpu_gfx_off_ctrl(adev, true);
7408
7409 /* dump compute queue registers for all instances */
7410 if (!adev->gfx.ip_dump_compute_queues)
7411 return;
7412
7413 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7414 amdgpu_gfx_off_ctrl(adev, false);
7415 mutex_lock(&adev->srbm_mutex);
7416 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7417 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7418 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7419 /* ME0 is for GFX so start from 1 for CP */
7420 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7421
7422 for (reg = 0; reg < reg_count; reg++) {
7423 adev->gfx.ip_dump_compute_queues[index + reg] =
7424 RREG32(SOC15_REG_ENTRY_OFFSET(
7425 gc_cp_reg_list_9[reg]));
7426 }
7427 index += reg_count;
7428 }
7429 }
7430 }
7431 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7432 mutex_unlock(&adev->srbm_mutex);
7433 amdgpu_gfx_off_ctrl(adev, true);
7434
7435 }
7436
gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring * ring)7437 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7438 {
7439 /* Emit the cleaner shader */
7440 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7441 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
7442 }
7443
gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring * ring)7444 static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
7445 {
7446 struct amdgpu_device *adev = ring->adev;
7447 struct amdgpu_ip_block *gfx_block =
7448 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
7449
7450 amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7451
7452 /* Raven and PCO APUs seem to have stability issues
7453 * with compute and gfxoff and gfx pg. Disable gfx pg during
7454 * submission and allow again afterwards.
7455 */
7456 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7457 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
7458 }
7459
gfx_v9_0_ring_end_use_compute(struct amdgpu_ring * ring)7460 static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
7461 {
7462 struct amdgpu_device *adev = ring->adev;
7463 struct amdgpu_ip_block *gfx_block =
7464 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
7465
7466 /* Raven and PCO APUs seem to have stability issues
7467 * with compute and gfxoff and gfx pg. Disable gfx pg during
7468 * submission and allow again afterwards.
7469 */
7470 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7471 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
7472
7473 amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7474 }
7475
7476 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7477 .name = "gfx_v9_0",
7478 .early_init = gfx_v9_0_early_init,
7479 .late_init = gfx_v9_0_late_init,
7480 .sw_init = gfx_v9_0_sw_init,
7481 .sw_fini = gfx_v9_0_sw_fini,
7482 .hw_init = gfx_v9_0_hw_init,
7483 .hw_fini = gfx_v9_0_hw_fini,
7484 .suspend = gfx_v9_0_suspend,
7485 .resume = gfx_v9_0_resume,
7486 .is_idle = gfx_v9_0_is_idle,
7487 .wait_for_idle = gfx_v9_0_wait_for_idle,
7488 .soft_reset = gfx_v9_0_soft_reset,
7489 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7490 .set_powergating_state = gfx_v9_0_set_powergating_state,
7491 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7492 .dump_ip_state = gfx_v9_ip_dump,
7493 .print_ip_state = gfx_v9_ip_print,
7494 };
7495
7496 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7497 .type = AMDGPU_RING_TYPE_GFX,
7498 .align_mask = 0xff,
7499 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7500 .support_64bit_ptrs = true,
7501 .secure_submission_supported = true,
7502 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7503 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7504 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7505 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7506 5 + /* COND_EXEC */
7507 7 + /* PIPELINE_SYNC */
7508 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7509 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7510 2 + /* VM_FLUSH */
7511 8 + /* FENCE for VM_FLUSH */
7512 20 + /* GDS switch */
7513 4 + /* double SWITCH_BUFFER,
7514 the first COND_EXEC jump to the place just
7515 prior to this double SWITCH_BUFFER */
7516 5 + /* COND_EXEC */
7517 7 + /* HDP_flush */
7518 4 + /* VGT_flush */
7519 14 + /* CE_META */
7520 31 + /* DE_META */
7521 3 + /* CNTX_CTRL */
7522 5 + /* HDP_INVL */
7523 8 + 8 + /* FENCE x2 */
7524 2 + /* SWITCH_BUFFER */
7525 7 + /* gfx_v9_0_emit_mem_sync */
7526 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7527 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7528 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7529 .emit_fence = gfx_v9_0_ring_emit_fence,
7530 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7531 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7532 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7533 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7534 .test_ring = gfx_v9_0_ring_test_ring,
7535 .insert_nop = gfx_v9_ring_insert_nop,
7536 .pad_ib = amdgpu_ring_generic_pad_ib,
7537 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7538 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7539 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7540 .preempt_ib = gfx_v9_0_ring_preempt_ib,
7541 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7542 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7543 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7544 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7545 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7546 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7547 .reset = gfx_v9_0_reset_kgq,
7548 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7549 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7550 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7551 };
7552
7553 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7554 .type = AMDGPU_RING_TYPE_GFX,
7555 .align_mask = 0xff,
7556 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7557 .support_64bit_ptrs = true,
7558 .secure_submission_supported = true,
7559 .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7560 .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7561 .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7562 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7563 5 + /* COND_EXEC */
7564 7 + /* PIPELINE_SYNC */
7565 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7566 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7567 2 + /* VM_FLUSH */
7568 8 + /* FENCE for VM_FLUSH */
7569 20 + /* GDS switch */
7570 4 + /* double SWITCH_BUFFER,
7571 * the first COND_EXEC jump to the place just
7572 * prior to this double SWITCH_BUFFER
7573 */
7574 5 + /* COND_EXEC */
7575 7 + /* HDP_flush */
7576 4 + /* VGT_flush */
7577 14 + /* CE_META */
7578 31 + /* DE_META */
7579 3 + /* CNTX_CTRL */
7580 5 + /* HDP_INVL */
7581 8 + 8 + /* FENCE x2 */
7582 2 + /* SWITCH_BUFFER */
7583 7 + /* gfx_v9_0_emit_mem_sync */
7584 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7585 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7586 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7587 .emit_fence = gfx_v9_0_ring_emit_fence,
7588 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7589 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7590 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7591 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7592 .test_ring = gfx_v9_0_ring_test_ring,
7593 .test_ib = gfx_v9_0_ring_test_ib,
7594 .insert_nop = gfx_v9_ring_insert_nop,
7595 .pad_ib = amdgpu_ring_generic_pad_ib,
7596 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7597 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7598 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7599 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7600 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7601 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7602 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7603 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7604 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7605 .patch_cntl = gfx_v9_0_ring_patch_cntl,
7606 .patch_de = gfx_v9_0_ring_patch_de_meta,
7607 .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7608 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7609 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7610 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7611 };
7612
7613 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7614 .type = AMDGPU_RING_TYPE_COMPUTE,
7615 .align_mask = 0xff,
7616 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7617 .support_64bit_ptrs = true,
7618 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7619 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7620 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7621 .emit_frame_size =
7622 20 + /* gfx_v9_0_ring_emit_gds_switch */
7623 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7624 5 + /* hdp invalidate */
7625 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7626 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7627 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7628 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7629 7 + /* gfx_v9_0_emit_mem_sync */
7630 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7631 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7632 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7633 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7634 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7635 .emit_fence = gfx_v9_0_ring_emit_fence,
7636 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7637 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7638 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7639 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7640 .test_ring = gfx_v9_0_ring_test_ring,
7641 .test_ib = gfx_v9_0_ring_test_ib,
7642 .insert_nop = gfx_v9_ring_insert_nop,
7643 .pad_ib = amdgpu_ring_generic_pad_ib,
7644 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7645 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7646 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7647 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7648 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7649 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7650 .reset = gfx_v9_0_reset_kcq,
7651 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7652 .begin_use = gfx_v9_0_ring_begin_use_compute,
7653 .end_use = gfx_v9_0_ring_end_use_compute,
7654 };
7655
7656 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7657 .type = AMDGPU_RING_TYPE_KIQ,
7658 .align_mask = 0xff,
7659 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7660 .support_64bit_ptrs = true,
7661 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7662 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7663 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7664 .emit_frame_size =
7665 20 + /* gfx_v9_0_ring_emit_gds_switch */
7666 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7667 5 + /* hdp invalidate */
7668 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7669 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7670 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7671 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7672 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7673 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7674 .test_ring = gfx_v9_0_ring_test_ring,
7675 .insert_nop = amdgpu_ring_insert_nop,
7676 .pad_ib = amdgpu_ring_generic_pad_ib,
7677 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7678 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7679 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7680 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7681 };
7682
gfx_v9_0_set_ring_funcs(struct amdgpu_device * adev)7683 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7684 {
7685 int i;
7686
7687 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7688
7689 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7690 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7691
7692 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7693 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7694 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7695 }
7696
7697 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7698 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7699 }
7700
7701 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7702 .set = gfx_v9_0_set_eop_interrupt_state,
7703 .process = gfx_v9_0_eop_irq,
7704 };
7705
7706 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7707 .set = gfx_v9_0_set_priv_reg_fault_state,
7708 .process = gfx_v9_0_priv_reg_irq,
7709 };
7710
7711 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7712 .set = gfx_v9_0_set_bad_op_fault_state,
7713 .process = gfx_v9_0_bad_op_irq,
7714 };
7715
7716 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7717 .set = gfx_v9_0_set_priv_inst_fault_state,
7718 .process = gfx_v9_0_priv_inst_irq,
7719 };
7720
7721 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7722 .set = gfx_v9_0_set_cp_ecc_error_state,
7723 .process = amdgpu_gfx_cp_ecc_error_irq,
7724 };
7725
7726
gfx_v9_0_set_irq_funcs(struct amdgpu_device * adev)7727 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7728 {
7729 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7730 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7731
7732 adev->gfx.priv_reg_irq.num_types = 1;
7733 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7734
7735 adev->gfx.bad_op_irq.num_types = 1;
7736 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7737
7738 adev->gfx.priv_inst_irq.num_types = 1;
7739 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7740
7741 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7742 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7743 }
7744
gfx_v9_0_set_rlc_funcs(struct amdgpu_device * adev)7745 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7746 {
7747 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7748 case IP_VERSION(9, 0, 1):
7749 case IP_VERSION(9, 2, 1):
7750 case IP_VERSION(9, 4, 0):
7751 case IP_VERSION(9, 2, 2):
7752 case IP_VERSION(9, 1, 0):
7753 case IP_VERSION(9, 4, 1):
7754 case IP_VERSION(9, 3, 0):
7755 case IP_VERSION(9, 4, 2):
7756 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7757 break;
7758 default:
7759 break;
7760 }
7761 }
7762
gfx_v9_0_set_gds_init(struct amdgpu_device * adev)7763 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7764 {
7765 /* init asci gds info */
7766 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7767 case IP_VERSION(9, 0, 1):
7768 case IP_VERSION(9, 2, 1):
7769 case IP_VERSION(9, 4, 0):
7770 adev->gds.gds_size = 0x10000;
7771 break;
7772 case IP_VERSION(9, 2, 2):
7773 case IP_VERSION(9, 1, 0):
7774 case IP_VERSION(9, 4, 1):
7775 adev->gds.gds_size = 0x1000;
7776 break;
7777 case IP_VERSION(9, 4, 2):
7778 /* aldebaran removed all the GDS internal memory,
7779 * only support GWS opcode in kernel, like barrier
7780 * semaphore.etc */
7781 adev->gds.gds_size = 0;
7782 break;
7783 default:
7784 adev->gds.gds_size = 0x10000;
7785 break;
7786 }
7787
7788 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7789 case IP_VERSION(9, 0, 1):
7790 case IP_VERSION(9, 4, 0):
7791 adev->gds.gds_compute_max_wave_id = 0x7ff;
7792 break;
7793 case IP_VERSION(9, 2, 1):
7794 adev->gds.gds_compute_max_wave_id = 0x27f;
7795 break;
7796 case IP_VERSION(9, 2, 2):
7797 case IP_VERSION(9, 1, 0):
7798 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7799 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7800 else
7801 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7802 break;
7803 case IP_VERSION(9, 4, 1):
7804 adev->gds.gds_compute_max_wave_id = 0xfff;
7805 break;
7806 case IP_VERSION(9, 4, 2):
7807 /* deprecated for Aldebaran, no usage at all */
7808 adev->gds.gds_compute_max_wave_id = 0;
7809 break;
7810 default:
7811 /* this really depends on the chip */
7812 adev->gds.gds_compute_max_wave_id = 0x7ff;
7813 break;
7814 }
7815
7816 adev->gds.gws_size = 64;
7817 adev->gds.oa_size = 16;
7818 }
7819
gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7820 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7821 u32 bitmap)
7822 {
7823 u32 data;
7824
7825 if (!bitmap)
7826 return;
7827
7828 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7829 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7830
7831 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7832 }
7833
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device * adev)7834 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7835 {
7836 u32 data, mask;
7837
7838 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7839 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7840
7841 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7842 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7843
7844 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7845
7846 return (~data) & mask;
7847 }
7848
gfx_v9_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7849 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7850 struct amdgpu_cu_info *cu_info)
7851 {
7852 int i, j, k, counter, active_cu_number = 0;
7853 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7854 unsigned disable_masks[4 * 4];
7855
7856 if (!adev || !cu_info)
7857 return -EINVAL;
7858
7859 /*
7860 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7861 */
7862 if (adev->gfx.config.max_shader_engines *
7863 adev->gfx.config.max_sh_per_se > 16)
7864 return -EINVAL;
7865
7866 amdgpu_gfx_parse_disable_cu(disable_masks,
7867 adev->gfx.config.max_shader_engines,
7868 adev->gfx.config.max_sh_per_se);
7869
7870 mutex_lock(&adev->grbm_idx_mutex);
7871 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7872 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7873 mask = 1;
7874 ao_bitmap = 0;
7875 counter = 0;
7876 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7877 gfx_v9_0_set_user_cu_inactive_bitmap(
7878 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7879 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7880
7881 /*
7882 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7883 * 4x4 size array, and it's usually suitable for Vega
7884 * ASICs which has 4*2 SE/SH layout.
7885 * But for Arcturus, SE/SH layout is changed to 8*1.
7886 * To mostly reduce the impact, we make it compatible
7887 * with current bitmap array as below:
7888 * SE4,SH0 --> bitmap[0][1]
7889 * SE5,SH0 --> bitmap[1][1]
7890 * SE6,SH0 --> bitmap[2][1]
7891 * SE7,SH0 --> bitmap[3][1]
7892 */
7893 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7894
7895 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7896 if (bitmap & mask) {
7897 if (counter < adev->gfx.config.max_cu_per_sh)
7898 ao_bitmap |= mask;
7899 counter ++;
7900 }
7901 mask <<= 1;
7902 }
7903 active_cu_number += counter;
7904 if (i < 2 && j < 2)
7905 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7906 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7907 }
7908 }
7909 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7910 mutex_unlock(&adev->grbm_idx_mutex);
7911
7912 cu_info->number = active_cu_number;
7913 cu_info->ao_cu_mask = ao_cu_mask;
7914 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7915
7916 return 0;
7917 }
7918
7919 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7920 {
7921 .type = AMD_IP_BLOCK_TYPE_GFX,
7922 .major = 9,
7923 .minor = 0,
7924 .rev = 0,
7925 .funcs = &gfx_v9_0_ip_funcs,
7926 };
7927