xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55 
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59 
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65 
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68 
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75 
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82 
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89 
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104 
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128 
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134 
135 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
137 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
139 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
141 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
143 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
145 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
147 
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152 
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 	SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 	/* cp header registers */
229 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 	/* SE status registers */
235 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240 
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 	/* compute queue registers */
243 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281 
282 enum ta_ras_gfx_subblock {
283 	/*CPC*/
284 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 	TA_RAS_BLOCK__GFX_CPC_UCODE,
287 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 	/* CPF*/
295 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 	TA_RAS_BLOCK__GFX_CPF_TAG,
299 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 	/* CPG*/
301 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 	TA_RAS_BLOCK__GFX_CPG_TAG,
305 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 	/* GDS*/
307 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 	/* SPI*/
315 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 	/* SQ*/
317 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 	/* SQC (3 ranges)*/
324 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 	/* SQC range 0*/
326 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 	/* SQC range 1*/
338 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 	/* SQC range 2*/
352 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 	/* TA*/
367 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 	/* TCA*/
375 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 	/* TCC (5 sub-ranges)*/
380 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 	/* TCC range 0*/
382 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 	/* TCC range 1*/
393 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 	/* TCC range 2*/
399 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 	/* TCC range 3*/
411 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 	/* TCC range 4*/
417 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 	/* TCI*/
425 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 	/* TCP*/
427 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 	/* TD*/
437 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 	/* EA (3 sub-ranges)*/
443 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 	/* EA range 0*/
445 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 	/* EA range 1*/
456 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 	/* EA range 2*/
466 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 	/* UTC VM L2 bank*/
474 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 	/* UTC VM walker*/
476 	TA_RAS_BLOCK__UTC_VML2_WALKER,
477 	/* UTC ATC L2 2MB cache*/
478 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 	/* UTC ATC L2 4KB cache*/
480 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 	TA_RAS_BLOCK__GFX_MAX
482 };
483 
484 struct ras_gfx_subblock {
485 	unsigned char *name;
486 	int ta_subblock;
487 	int hw_supported_error_type;
488 	int sw_supported_error_type;
489 };
490 
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493 		#subblock,                                                     \
494 		TA_RAS_BLOCK__##subblock,                                      \
495 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497 	}
498 
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 			     0),
518 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 			     0),
520 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 			     0, 0),
529 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 			     0),
531 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 			     0, 0),
533 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 			     0),
535 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 			     0, 0),
537 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 			     0),
539 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 			     1),
541 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 			     0, 0, 0),
543 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 			     0),
545 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 			     0),
547 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 			     0),
549 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 			     0),
551 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 			     0),
553 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 			     0, 0),
555 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 			     0),
557 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 			     0),
559 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 			     0, 0, 0),
561 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 			     0),
563 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 			     0),
565 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 			     0),
567 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 			     0),
569 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 			     0),
571 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 			     0, 0),
573 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 			     0),
575 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 			     1),
585 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 			     1),
587 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 			     1),
589 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 			     0),
591 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 			     0),
593 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 			     0),
606 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 			     0),
609 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 			     0, 0),
611 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 			     0),
613 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672 
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694 
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709 
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737 
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748 
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771 
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787 
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794 
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814 
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831 
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846 
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851 
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863 
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875 
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880 
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 				struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 					  void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 				     void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 					      unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899 
gfx_v9_0_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 				uint64_t queue_mask)
902 {
903 	struct amdgpu_device *adev = kiq_ring->adev;
904 	u64 shader_mc_addr;
905 
906 	/* Cleaner shader MC address */
907 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908 
909 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 	amdgpu_ring_write(kiq_ring,
911 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 		/* vmid_mask:0* queue_type:0 (KIQ) */
913 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 	amdgpu_ring_write(kiq_ring,
915 			lower_32_bits(queue_mask));	/* queue mask lo */
916 	amdgpu_ring_write(kiq_ring,
917 			upper_32_bits(queue_mask));	/* queue mask hi */
918 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
921 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
922 }
923 
gfx_v9_0_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 				 struct amdgpu_ring *ring)
926 {
927 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 	uint64_t wptr_addr = ring->wptr_gpu_addr;
929 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930 
931 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 			 /*queue_type: normal compute queue */
940 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 			 /* alloc format: all_on_one_pipe */
942 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 			 /* num_queues: must be 1 */
945 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 	amdgpu_ring_write(kiq_ring,
947 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953 
gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 				   struct amdgpu_ring *ring,
956 				   enum amdgpu_unmap_queues_action action,
957 				   u64 gpu_addr, u64 seq)
958 {
959 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960 
961 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
964 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 	amdgpu_ring_write(kiq_ring,
968 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969 
970 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 		amdgpu_ring_write(kiq_ring, 0);
973 		amdgpu_ring_write(kiq_ring, 0);
974 
975 	} else {
976 		amdgpu_ring_write(kiq_ring, 0);
977 		amdgpu_ring_write(kiq_ring, 0);
978 		amdgpu_ring_write(kiq_ring, 0);
979 	}
980 }
981 
gfx_v9_0_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 				   struct amdgpu_ring *ring,
984 				   u64 addr,
985 				   u64 seq)
986 {
987 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988 
989 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 	amdgpu_ring_write(kiq_ring,
991 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 			  PACKET3_QUERY_STATUS_COMMAND(2));
994 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 	amdgpu_ring_write(kiq_ring,
996 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003 
gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 				uint16_t pasid, uint32_t flush_type,
1006 				bool all_hub)
1007 {
1008 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 	amdgpu_ring_write(kiq_ring,
1010 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015 
1016 
gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring * kiq_ring,uint32_t queue_type,uint32_t me_id,uint32_t pipe_id,uint32_t queue_id,uint32_t xcc_id,uint32_t vmid)1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 					uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 					uint32_t xcc_id, uint32_t vmid)
1020 {
1021 	struct amdgpu_device *adev = kiq_ring->adev;
1022 	unsigned i;
1023 
1024 	/* enter save mode */
1025 	amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 	mutex_lock(&adev->srbm_mutex);
1027 	soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028 
1029 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 		WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 		/* wait till dequeue take effects */
1033 		for (i = 0; i < adev->usec_timeout; i++) {
1034 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 				break;
1036 			udelay(1);
1037 		}
1038 		if (i >= adev->usec_timeout)
1039 			dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 	} else {
1041 		dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 	}
1043 
1044 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 	mutex_unlock(&adev->srbm_mutex);
1046 	/* exit safe mode */
1047 	amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049 
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1055 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 	.kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 	.set_resources_size = 8,
1058 	.map_queues_size = 7,
1059 	.unmap_queues_size = 6,
1060 	.query_status_size = 7,
1061 	.invalidate_tlbs_size = 2,
1062 };
1063 
gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068 
gfx_v9_0_init_golden_registers(struct amdgpu_device * adev)1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 	case IP_VERSION(9, 0, 1):
1073 		soc15_program_register_sequence(adev,
1074 						golden_settings_gc_9_0,
1075 						ARRAY_SIZE(golden_settings_gc_9_0));
1076 		soc15_program_register_sequence(adev,
1077 						golden_settings_gc_9_0_vg10,
1078 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 		break;
1080 	case IP_VERSION(9, 2, 1):
1081 		soc15_program_register_sequence(adev,
1082 						golden_settings_gc_9_2_1,
1083 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 		soc15_program_register_sequence(adev,
1085 						golden_settings_gc_9_2_1_vg12,
1086 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 		break;
1088 	case IP_VERSION(9, 4, 0):
1089 		soc15_program_register_sequence(adev,
1090 						golden_settings_gc_9_0,
1091 						ARRAY_SIZE(golden_settings_gc_9_0));
1092 		soc15_program_register_sequence(adev,
1093 						golden_settings_gc_9_0_vg20,
1094 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 		break;
1096 	case IP_VERSION(9, 4, 1):
1097 		soc15_program_register_sequence(adev,
1098 						golden_settings_gc_9_4_1_arct,
1099 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 		break;
1101 	case IP_VERSION(9, 2, 2):
1102 	case IP_VERSION(9, 1, 0):
1103 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 						ARRAY_SIZE(golden_settings_gc_9_1));
1105 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 			soc15_program_register_sequence(adev,
1107 							golden_settings_gc_9_1_rv2,
1108 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 		else
1110 			soc15_program_register_sequence(adev,
1111 							golden_settings_gc_9_1_rv1,
1112 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 		break;
1114 	 case IP_VERSION(9, 3, 0):
1115 		soc15_program_register_sequence(adev,
1116 						golden_settings_gc_9_1_rn,
1117 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 		return; /* for renoir, don't need common goldensetting */
1119 	case IP_VERSION(9, 4, 2):
1120 		gfx_v9_4_2_init_golden_registers(adev,
1121 						 adev->smuio.funcs->get_die_id(adev));
1122 		break;
1123 	default:
1124 		break;
1125 	}
1126 
1127 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132 
gfx_v9_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 				       bool wc, uint32_t reg, uint32_t val)
1135 {
1136 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 				WRITE_DATA_DST_SEL(0) |
1139 				(wc ? WR_CONFIRM : 0));
1140 	amdgpu_ring_write(ring, reg);
1141 	amdgpu_ring_write(ring, 0);
1142 	amdgpu_ring_write(ring, val);
1143 }
1144 
gfx_v9_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 				  int mem_space, int opt, uint32_t addr0,
1147 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1148 				  uint32_t inv)
1149 {
1150 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 	amdgpu_ring_write(ring,
1152 				 /* memory (1) or register (0) */
1153 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1157 
1158 	if (mem_space)
1159 		BUG_ON(addr0 & 0x3); /* Dword align */
1160 	amdgpu_ring_write(ring, addr0);
1161 	amdgpu_ring_write(ring, addr1);
1162 	amdgpu_ring_write(ring, ref);
1163 	amdgpu_ring_write(ring, mask);
1164 	amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166 
gfx_v9_0_ring_test_ring(struct amdgpu_ring * ring)1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 	struct amdgpu_device *adev = ring->adev;
1170 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 	uint32_t tmp = 0;
1172 	unsigned i;
1173 	int r;
1174 
1175 	WREG32(scratch, 0xCAFEDEAD);
1176 	r = amdgpu_ring_alloc(ring, 3);
1177 	if (r)
1178 		return r;
1179 
1180 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 	amdgpu_ring_write(ring, 0xDEADBEEF);
1183 	amdgpu_ring_commit(ring);
1184 
1185 	for (i = 0; i < adev->usec_timeout; i++) {
1186 		tmp = RREG32(scratch);
1187 		if (tmp == 0xDEADBEEF)
1188 			break;
1189 		udelay(1);
1190 	}
1191 
1192 	if (i >= adev->usec_timeout)
1193 		r = -ETIMEDOUT;
1194 	return r;
1195 }
1196 
gfx_v9_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 	struct amdgpu_device *adev = ring->adev;
1200 	struct amdgpu_ib ib;
1201 	struct dma_fence *f = NULL;
1202 
1203 	unsigned index;
1204 	uint64_t gpu_addr;
1205 	uint32_t tmp;
1206 	long r;
1207 
1208 	r = amdgpu_device_wb_get(adev, &index);
1209 	if (r)
1210 		return r;
1211 
1212 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 	memset(&ib, 0, sizeof(ib));
1215 
1216 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 	if (r)
1218 		goto err1;
1219 
1220 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 	ib.ptr[2] = lower_32_bits(gpu_addr);
1223 	ib.ptr[3] = upper_32_bits(gpu_addr);
1224 	ib.ptr[4] = 0xDEADBEEF;
1225 	ib.length_dw = 5;
1226 
1227 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 	if (r)
1229 		goto err2;
1230 
1231 	r = dma_fence_wait_timeout(f, false, timeout);
1232 	if (r == 0) {
1233 		r = -ETIMEDOUT;
1234 		goto err2;
1235 	} else if (r < 0) {
1236 		goto err2;
1237 	}
1238 
1239 	tmp = adev->wb.wb[index];
1240 	if (tmp == 0xDEADBEEF)
1241 		r = 0;
1242 	else
1243 		r = -EINVAL;
1244 
1245 err2:
1246 	amdgpu_ib_free(adev, &ib, NULL);
1247 	dma_fence_put(f);
1248 err1:
1249 	amdgpu_device_wb_free(adev, index);
1250 	return r;
1251 }
1252 
1253 
gfx_v9_0_free_microcode(struct amdgpu_device * adev)1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 	amdgpu_ucode_release(&adev->gfx.me_fw);
1258 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262 
1263 	kfree(adev->gfx.rlc.register_list_format);
1264 }
1265 
gfx_v9_0_check_fw_write_wait(struct amdgpu_device * adev)1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 	adev->gfx.me_fw_write_wait = false;
1269 	adev->gfx.mec_fw_write_wait = false;
1270 
1271 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273 	     (adev->gfx.mec_feature_version < 46) ||
1274 	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275 	     (adev->gfx.pfp_feature_version < 46)))
1276 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1277 
1278 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279 	case IP_VERSION(9, 0, 1):
1280 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281 		    (adev->gfx.me_feature_version >= 42) &&
1282 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283 		    (adev->gfx.pfp_feature_version >= 42))
1284 			adev->gfx.me_fw_write_wait = true;
1285 
1286 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287 		    (adev->gfx.mec_feature_version >= 42))
1288 			adev->gfx.mec_fw_write_wait = true;
1289 		break;
1290 	case IP_VERSION(9, 2, 1):
1291 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292 		    (adev->gfx.me_feature_version >= 44) &&
1293 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294 		    (adev->gfx.pfp_feature_version >= 44))
1295 			adev->gfx.me_fw_write_wait = true;
1296 
1297 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298 		    (adev->gfx.mec_feature_version >= 44))
1299 			adev->gfx.mec_fw_write_wait = true;
1300 		break;
1301 	case IP_VERSION(9, 4, 0):
1302 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303 		    (adev->gfx.me_feature_version >= 44) &&
1304 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305 		    (adev->gfx.pfp_feature_version >= 44))
1306 			adev->gfx.me_fw_write_wait = true;
1307 
1308 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309 		    (adev->gfx.mec_feature_version >= 44))
1310 			adev->gfx.mec_fw_write_wait = true;
1311 		break;
1312 	case IP_VERSION(9, 1, 0):
1313 	case IP_VERSION(9, 2, 2):
1314 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315 		    (adev->gfx.me_feature_version >= 42) &&
1316 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317 		    (adev->gfx.pfp_feature_version >= 42))
1318 			adev->gfx.me_fw_write_wait = true;
1319 
1320 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321 		    (adev->gfx.mec_feature_version >= 42))
1322 			adev->gfx.mec_fw_write_wait = true;
1323 		break;
1324 	default:
1325 		adev->gfx.me_fw_write_wait = true;
1326 		adev->gfx.mec_fw_write_wait = true;
1327 		break;
1328 	}
1329 }
1330 
1331 struct amdgpu_gfxoff_quirk {
1332 	u16 chip_vendor;
1333 	u16 chip_device;
1334 	u16 subsys_vendor;
1335 	u16 subsys_device;
1336 	u8 revision;
1337 };
1338 
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348 	/* https://bbs.openkylin.top/t/topic/171497 */
1349 	{ 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350 	/* HP 705G4 DM with R5 2400G */
1351 	{ 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352 	{ 0, 0, 0, 0, 0 },
1353 };
1354 
gfx_v9_0_should_disable_gfxoff(struct pci_dev * pdev)1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358 
1359 	while (p && p->chip_device != 0) {
1360 		if (pdev->vendor == p->chip_vendor &&
1361 		    pdev->device == p->chip_device &&
1362 		    pdev->subsystem_vendor == p->subsys_vendor &&
1363 		    pdev->subsystem_device == p->subsys_device &&
1364 		    pdev->revision == p->revision) {
1365 			return true;
1366 		}
1367 		++p;
1368 	}
1369 	return false;
1370 }
1371 
is_raven_kicker(struct amdgpu_device * adev)1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374 	if (adev->pm.fw_version >= 0x41e2b)
1375 		return true;
1376 	else
1377 		return false;
1378 }
1379 
check_if_enlarge_doorbell_range(struct amdgpu_device * adev)1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1384 	    (adev->gfx.me_feature_version >= 52))
1385 		return true;
1386 	else
1387 		return false;
1388 }
1389 
gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device * adev)1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394 
1395 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396 	case IP_VERSION(9, 0, 1):
1397 	case IP_VERSION(9, 2, 1):
1398 	case IP_VERSION(9, 4, 0):
1399 		break;
1400 	case IP_VERSION(9, 2, 2):
1401 	case IP_VERSION(9, 1, 0):
1402 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404 		    ((!is_raven_kicker(adev) &&
1405 		      adev->gfx.rlc_fw_version < 531) ||
1406 		     (adev->gfx.rlc_feature_version < 1) ||
1407 		     !adev->gfx.rlc.is_rlc_v2_1))
1408 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409 
1410 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412 				AMD_PG_SUPPORT_CP |
1413 				AMD_PG_SUPPORT_RLC_SMU_HS;
1414 		break;
1415 	case IP_VERSION(9, 3, 0):
1416 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418 				AMD_PG_SUPPORT_CP |
1419 				AMD_PG_SUPPORT_RLC_SMU_HS;
1420 		break;
1421 	default:
1422 		break;
1423 	}
1424 }
1425 
gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device * adev,char * chip_name)1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427 					  char *chip_name)
1428 {
1429 	int err;
1430 
1431 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432 				   "amdgpu/%s_pfp.bin", chip_name);
1433 	if (err)
1434 		goto out;
1435 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1436 
1437 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1438 				   "amdgpu/%s_me.bin", chip_name);
1439 	if (err)
1440 		goto out;
1441 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1442 
1443 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1444 				   "amdgpu/%s_ce.bin", chip_name);
1445 	if (err)
1446 		goto out;
1447 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1448 
1449 out:
1450 	if (err) {
1451 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1452 		amdgpu_ucode_release(&adev->gfx.me_fw);
1453 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1454 	}
1455 	return err;
1456 }
1457 
gfx_v9_0_init_rlc_microcode(struct amdgpu_device * adev,char * chip_name)1458 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1459 				       char *chip_name)
1460 {
1461 	int err;
1462 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1463 	uint16_t version_major;
1464 	uint16_t version_minor;
1465 	uint32_t smu_version;
1466 
1467 	/*
1468 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1469 	 * instead of picasso_rlc.bin.
1470 	 * Judgment method:
1471 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1472 	 *          or revision >= 0xD8 && revision <= 0xDF
1473 	 * otherwise is PCO FP5
1474 	 */
1475 	if (!strcmp(chip_name, "picasso") &&
1476 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1477 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1478 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1479 					   "amdgpu/%s_rlc_am4.bin", chip_name);
1480 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1481 		(smu_version >= 0x41e2b))
1482 		/**
1483 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1484 		*/
1485 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1486 					   "amdgpu/%s_kicker_rlc.bin", chip_name);
1487 	else
1488 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1489 					   "amdgpu/%s_rlc.bin", chip_name);
1490 	if (err)
1491 		goto out;
1492 
1493 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1494 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1495 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1496 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1497 out:
1498 	if (err)
1499 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1500 
1501 	return err;
1502 }
1503 
gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device * adev)1504 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1505 {
1506 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1507 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1508 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1509 		return false;
1510 
1511 	return true;
1512 }
1513 
gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device * adev,char * chip_name)1514 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1515 					      char *chip_name)
1516 {
1517 	int err;
1518 
1519 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1520 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1521 					   "amdgpu/%s_sjt_mec.bin", chip_name);
1522 	else
1523 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1524 					   "amdgpu/%s_mec.bin", chip_name);
1525 	if (err)
1526 		goto out;
1527 
1528 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1529 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1530 
1531 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1532 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1533 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1534 						   "amdgpu/%s_sjt_mec2.bin", chip_name);
1535 		else
1536 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1537 						   "amdgpu/%s_mec2.bin", chip_name);
1538 		if (!err) {
1539 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1540 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1541 		} else {
1542 			err = 0;
1543 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1544 		}
1545 	} else {
1546 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1547 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1548 	}
1549 
1550 	gfx_v9_0_check_if_need_gfxoff(adev);
1551 	gfx_v9_0_check_fw_write_wait(adev);
1552 
1553 out:
1554 	if (err)
1555 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1556 	return err;
1557 }
1558 
gfx_v9_0_init_microcode(struct amdgpu_device * adev)1559 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1560 {
1561 	char ucode_prefix[30];
1562 	int r;
1563 
1564 	DRM_DEBUG("\n");
1565 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1566 
1567 	/* No CPG in Arcturus */
1568 	if (adev->gfx.num_gfx_rings) {
1569 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1570 		if (r)
1571 			return r;
1572 	}
1573 
1574 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1575 	if (r)
1576 		return r;
1577 
1578 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1579 	if (r)
1580 		return r;
1581 
1582 	return r;
1583 }
1584 
gfx_v9_0_get_csb_size(struct amdgpu_device * adev)1585 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1586 {
1587 	u32 count = 0;
1588 	const struct cs_section_def *sect = NULL;
1589 	const struct cs_extent_def *ext = NULL;
1590 
1591 	/* begin clear state */
1592 	count += 2;
1593 	/* context control state */
1594 	count += 3;
1595 
1596 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1597 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1598 			if (sect->id == SECT_CONTEXT)
1599 				count += 2 + ext->reg_count;
1600 			else
1601 				return 0;
1602 		}
1603 	}
1604 
1605 	/* end clear state */
1606 	count += 2;
1607 	/* clear state */
1608 	count += 2;
1609 
1610 	return count;
1611 }
1612 
gfx_v9_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1613 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1614 				    volatile u32 *buffer)
1615 {
1616 	u32 count = 0, i;
1617 	const struct cs_section_def *sect = NULL;
1618 	const struct cs_extent_def *ext = NULL;
1619 
1620 	if (adev->gfx.rlc.cs_data == NULL)
1621 		return;
1622 	if (buffer == NULL)
1623 		return;
1624 
1625 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1626 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1627 
1628 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1629 	buffer[count++] = cpu_to_le32(0x80000000);
1630 	buffer[count++] = cpu_to_le32(0x80000000);
1631 
1632 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1633 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1634 			if (sect->id == SECT_CONTEXT) {
1635 				buffer[count++] =
1636 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1637 				buffer[count++] = cpu_to_le32(ext->reg_index -
1638 						PACKET3_SET_CONTEXT_REG_START);
1639 				for (i = 0; i < ext->reg_count; i++)
1640 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1641 			} else {
1642 				return;
1643 			}
1644 		}
1645 	}
1646 
1647 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1648 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1649 
1650 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1651 	buffer[count++] = cpu_to_le32(0);
1652 }
1653 
gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device * adev)1654 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1655 {
1656 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1657 	uint32_t pg_always_on_cu_num = 2;
1658 	uint32_t always_on_cu_num;
1659 	uint32_t i, j, k;
1660 	uint32_t mask, cu_bitmap, counter;
1661 
1662 	if (adev->flags & AMD_IS_APU)
1663 		always_on_cu_num = 4;
1664 	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1665 		always_on_cu_num = 8;
1666 	else
1667 		always_on_cu_num = 12;
1668 
1669 	mutex_lock(&adev->grbm_idx_mutex);
1670 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1671 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1672 			mask = 1;
1673 			cu_bitmap = 0;
1674 			counter = 0;
1675 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1676 
1677 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1678 				if (cu_info->bitmap[0][i][j] & mask) {
1679 					if (counter == pg_always_on_cu_num)
1680 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1681 					if (counter < always_on_cu_num)
1682 						cu_bitmap |= mask;
1683 					else
1684 						break;
1685 					counter++;
1686 				}
1687 				mask <<= 1;
1688 			}
1689 
1690 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1691 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1692 		}
1693 	}
1694 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1695 	mutex_unlock(&adev->grbm_idx_mutex);
1696 }
1697 
gfx_v9_0_init_lbpw(struct amdgpu_device * adev)1698 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1699 {
1700 	uint32_t data;
1701 
1702 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1703 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1704 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1705 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1706 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1707 
1708 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1709 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1710 
1711 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1712 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1713 
1714 	mutex_lock(&adev->grbm_idx_mutex);
1715 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1716 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1717 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1718 
1719 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1720 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1721 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1722 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1723 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1724 
1725 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1726 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1727 	data &= 0x0000FFFF;
1728 	data |= 0x00C00000;
1729 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1730 
1731 	/*
1732 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1733 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1734 	 */
1735 
1736 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1737 	 * but used for RLC_LB_CNTL configuration */
1738 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1739 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1740 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1741 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1742 	mutex_unlock(&adev->grbm_idx_mutex);
1743 
1744 	gfx_v9_0_init_always_on_cu_mask(adev);
1745 }
1746 
gfx_v9_4_init_lbpw(struct amdgpu_device * adev)1747 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1748 {
1749 	uint32_t data;
1750 
1751 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1752 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1753 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1754 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1755 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1756 
1757 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1758 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1759 
1760 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1761 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1762 
1763 	mutex_lock(&adev->grbm_idx_mutex);
1764 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1765 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1766 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1767 
1768 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1769 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1770 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1771 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1772 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1773 
1774 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1775 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1776 	data &= 0x0000FFFF;
1777 	data |= 0x00C00000;
1778 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1779 
1780 	/*
1781 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1782 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1783 	 */
1784 
1785 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1786 	 * but used for RLC_LB_CNTL configuration */
1787 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1788 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1789 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1790 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1791 	mutex_unlock(&adev->grbm_idx_mutex);
1792 
1793 	gfx_v9_0_init_always_on_cu_mask(adev);
1794 }
1795 
gfx_v9_0_enable_lbpw(struct amdgpu_device * adev,bool enable)1796 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1797 {
1798 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1799 }
1800 
gfx_v9_0_cp_jump_table_num(struct amdgpu_device * adev)1801 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1802 {
1803 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1804 		return 5;
1805 	else
1806 		return 4;
1807 }
1808 
gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)1809 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1810 {
1811 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1812 
1813 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1814 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1815 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1816 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1817 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1818 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1819 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1820 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1821 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1822 }
1823 
gfx_v9_0_rlc_init(struct amdgpu_device * adev)1824 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1825 {
1826 	const struct cs_section_def *cs_data;
1827 	int r;
1828 
1829 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1830 
1831 	cs_data = adev->gfx.rlc.cs_data;
1832 
1833 	if (cs_data) {
1834 		/* init clear state block */
1835 		r = amdgpu_gfx_rlc_init_csb(adev);
1836 		if (r)
1837 			return r;
1838 	}
1839 
1840 	if (adev->flags & AMD_IS_APU) {
1841 		/* TODO: double check the cp_table_size for RV */
1842 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1843 		r = amdgpu_gfx_rlc_init_cpt(adev);
1844 		if (r)
1845 			return r;
1846 	}
1847 
1848 	return 0;
1849 }
1850 
gfx_v9_0_mec_fini(struct amdgpu_device * adev)1851 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1852 {
1853 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1854 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1855 }
1856 
gfx_v9_0_mec_init(struct amdgpu_device * adev)1857 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1858 {
1859 	int r;
1860 	u32 *hpd;
1861 	const __le32 *fw_data;
1862 	unsigned fw_size;
1863 	u32 *fw;
1864 	size_t mec_hpd_size;
1865 
1866 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1867 
1868 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1869 
1870 	/* take ownership of the relevant compute queues */
1871 	amdgpu_gfx_compute_queue_acquire(adev);
1872 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1873 	if (mec_hpd_size) {
1874 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1875 					      AMDGPU_GEM_DOMAIN_VRAM |
1876 					      AMDGPU_GEM_DOMAIN_GTT,
1877 					      &adev->gfx.mec.hpd_eop_obj,
1878 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1879 					      (void **)&hpd);
1880 		if (r) {
1881 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1882 			gfx_v9_0_mec_fini(adev);
1883 			return r;
1884 		}
1885 
1886 		memset(hpd, 0, mec_hpd_size);
1887 
1888 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1889 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1890 	}
1891 
1892 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1893 
1894 	fw_data = (const __le32 *)
1895 		(adev->gfx.mec_fw->data +
1896 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1897 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1898 
1899 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1900 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1901 				      &adev->gfx.mec.mec_fw_obj,
1902 				      &adev->gfx.mec.mec_fw_gpu_addr,
1903 				      (void **)&fw);
1904 	if (r) {
1905 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1906 		gfx_v9_0_mec_fini(adev);
1907 		return r;
1908 	}
1909 
1910 	memcpy(fw, fw_data, fw_size);
1911 
1912 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1913 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1914 
1915 	return 0;
1916 }
1917 
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)1918 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1919 {
1920 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1921 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1922 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1923 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1924 		(SQ_IND_INDEX__FORCE_READ_MASK));
1925 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1926 }
1927 
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)1928 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1929 			   uint32_t wave, uint32_t thread,
1930 			   uint32_t regno, uint32_t num, uint32_t *out)
1931 {
1932 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1933 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1934 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1935 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1936 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1937 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1938 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1939 	while (num--)
1940 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1941 }
1942 
gfx_v9_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)1943 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1944 {
1945 	/* type 1 wave data */
1946 	dst[(*no_fields)++] = 1;
1947 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1948 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1949 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1950 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1951 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1952 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1953 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1954 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1955 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1956 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1957 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1958 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1959 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1960 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1961 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1962 }
1963 
gfx_v9_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1964 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1965 				     uint32_t wave, uint32_t start,
1966 				     uint32_t size, uint32_t *dst)
1967 {
1968 	wave_read_regs(
1969 		adev, simd, wave, 0,
1970 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1971 }
1972 
gfx_v9_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1973 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1974 				     uint32_t wave, uint32_t thread,
1975 				     uint32_t start, uint32_t size,
1976 				     uint32_t *dst)
1977 {
1978 	wave_read_regs(
1979 		adev, simd, wave, thread,
1980 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1981 }
1982 
gfx_v9_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1983 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1984 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1985 {
1986 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1987 }
1988 
1989 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1990         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1991         .select_se_sh = &gfx_v9_0_select_se_sh,
1992         .read_wave_data = &gfx_v9_0_read_wave_data,
1993         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1994         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1995         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1996 };
1997 
1998 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1999 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
2000 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2001 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2002 };
2003 
2004 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2005 	.ras_block = {
2006 		.hw_ops = &gfx_v9_0_ras_ops,
2007 	},
2008 };
2009 
gfx_v9_0_gpu_early_init(struct amdgpu_device * adev)2010 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2011 {
2012 	u32 gb_addr_config;
2013 	int err;
2014 
2015 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2016 	case IP_VERSION(9, 0, 1):
2017 		adev->gfx.config.max_hw_contexts = 8;
2018 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2019 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2020 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2021 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2022 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2023 		break;
2024 	case IP_VERSION(9, 2, 1):
2025 		adev->gfx.config.max_hw_contexts = 8;
2026 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2031 		DRM_INFO("fix gfx.config for vega12\n");
2032 		break;
2033 	case IP_VERSION(9, 4, 0):
2034 		adev->gfx.ras = &gfx_v9_0_ras;
2035 		adev->gfx.config.max_hw_contexts = 8;
2036 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041 		gb_addr_config &= ~0xf3e777ff;
2042 		gb_addr_config |= 0x22014042;
2043 		/* check vbios table if gpu info is not available */
2044 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2045 		if (err)
2046 			return err;
2047 		break;
2048 	case IP_VERSION(9, 2, 2):
2049 	case IP_VERSION(9, 1, 0):
2050 		adev->gfx.config.max_hw_contexts = 8;
2051 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2052 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2053 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2054 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2055 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2056 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2057 		else
2058 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2059 		break;
2060 	case IP_VERSION(9, 4, 1):
2061 		adev->gfx.ras = &gfx_v9_4_ras;
2062 		adev->gfx.config.max_hw_contexts = 8;
2063 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2064 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2065 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2066 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2067 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2068 		gb_addr_config &= ~0xf3e777ff;
2069 		gb_addr_config |= 0x22014042;
2070 		break;
2071 	case IP_VERSION(9, 3, 0):
2072 		adev->gfx.config.max_hw_contexts = 8;
2073 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2076 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078 		gb_addr_config &= ~0xf3e777ff;
2079 		gb_addr_config |= 0x22010042;
2080 		break;
2081 	case IP_VERSION(9, 4, 2):
2082 		adev->gfx.ras = &gfx_v9_4_2_ras;
2083 		adev->gfx.config.max_hw_contexts = 8;
2084 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2085 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2086 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2087 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2088 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2089 		gb_addr_config &= ~0xf3e777ff;
2090 		gb_addr_config |= 0x22014042;
2091 		/* check vbios table if gpu info is not available */
2092 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2093 		if (err)
2094 			return err;
2095 		break;
2096 	default:
2097 		BUG();
2098 		break;
2099 	}
2100 
2101 	adev->gfx.config.gb_addr_config = gb_addr_config;
2102 
2103 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2104 			REG_GET_FIELD(
2105 					adev->gfx.config.gb_addr_config,
2106 					GB_ADDR_CONFIG,
2107 					NUM_PIPES);
2108 
2109 	adev->gfx.config.max_tile_pipes =
2110 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2111 
2112 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2113 			REG_GET_FIELD(
2114 					adev->gfx.config.gb_addr_config,
2115 					GB_ADDR_CONFIG,
2116 					NUM_BANKS);
2117 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2118 			REG_GET_FIELD(
2119 					adev->gfx.config.gb_addr_config,
2120 					GB_ADDR_CONFIG,
2121 					MAX_COMPRESSED_FRAGS);
2122 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2123 			REG_GET_FIELD(
2124 					adev->gfx.config.gb_addr_config,
2125 					GB_ADDR_CONFIG,
2126 					NUM_RB_PER_SE);
2127 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2128 			REG_GET_FIELD(
2129 					adev->gfx.config.gb_addr_config,
2130 					GB_ADDR_CONFIG,
2131 					NUM_SHADER_ENGINES);
2132 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2133 			REG_GET_FIELD(
2134 					adev->gfx.config.gb_addr_config,
2135 					GB_ADDR_CONFIG,
2136 					PIPE_INTERLEAVE_SIZE));
2137 
2138 	return 0;
2139 }
2140 
gfx_v9_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)2141 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2142 				      int mec, int pipe, int queue)
2143 {
2144 	unsigned irq_type;
2145 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2146 	unsigned int hw_prio;
2147 
2148 	ring = &adev->gfx.compute_ring[ring_id];
2149 
2150 	/* mec0 is me1 */
2151 	ring->me = mec + 1;
2152 	ring->pipe = pipe;
2153 	ring->queue = queue;
2154 
2155 	ring->ring_obj = NULL;
2156 	ring->use_doorbell = true;
2157 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2158 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2159 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2160 	ring->vm_hub = AMDGPU_GFXHUB(0);
2161 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2162 
2163 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2164 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2165 		+ ring->pipe;
2166 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2167 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2168 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2169 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2170 				hw_prio, NULL);
2171 }
2172 
gfx_v9_0_alloc_ip_dump(struct amdgpu_device * adev)2173 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2174 {
2175 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2176 	uint32_t *ptr;
2177 	uint32_t inst;
2178 
2179 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2180 	if (!ptr) {
2181 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2182 		adev->gfx.ip_dump_core = NULL;
2183 	} else {
2184 		adev->gfx.ip_dump_core = ptr;
2185 	}
2186 
2187 	/* Allocate memory for compute queue registers for all the instances */
2188 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2189 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2190 		adev->gfx.mec.num_queue_per_pipe;
2191 
2192 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2193 	if (!ptr) {
2194 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2195 		adev->gfx.ip_dump_compute_queues = NULL;
2196 	} else {
2197 		adev->gfx.ip_dump_compute_queues = ptr;
2198 	}
2199 }
2200 
gfx_v9_0_sw_init(struct amdgpu_ip_block * ip_block)2201 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2202 {
2203 	int i, j, k, r, ring_id;
2204 	int xcc_id = 0;
2205 	struct amdgpu_ring *ring;
2206 	struct amdgpu_device *adev = ip_block->adev;
2207 	unsigned int hw_prio;
2208 
2209 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2210 	case IP_VERSION(9, 0, 1):
2211 	case IP_VERSION(9, 2, 1):
2212 	case IP_VERSION(9, 4, 0):
2213 	case IP_VERSION(9, 2, 2):
2214 	case IP_VERSION(9, 1, 0):
2215 	case IP_VERSION(9, 4, 1):
2216 	case IP_VERSION(9, 3, 0):
2217 	case IP_VERSION(9, 4, 2):
2218 		adev->gfx.mec.num_mec = 2;
2219 		break;
2220 	default:
2221 		adev->gfx.mec.num_mec = 1;
2222 		break;
2223 	}
2224 
2225 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2226 	case IP_VERSION(9, 4, 2):
2227 		adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2228 		adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2229 		if (adev->gfx.mec_fw_version >= 88) {
2230 			adev->gfx.enable_cleaner_shader = true;
2231 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2232 			if (r) {
2233 				adev->gfx.enable_cleaner_shader = false;
2234 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2235 			}
2236 		}
2237 		break;
2238 	default:
2239 		adev->gfx.enable_cleaner_shader = false;
2240 		break;
2241 	}
2242 
2243 	adev->gfx.mec.num_pipe_per_mec = 4;
2244 	adev->gfx.mec.num_queue_per_pipe = 8;
2245 
2246 	/* EOP Event */
2247 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2248 	if (r)
2249 		return r;
2250 
2251 	/* Bad opcode Event */
2252 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2253 			      GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2254 			      &adev->gfx.bad_op_irq);
2255 	if (r)
2256 		return r;
2257 
2258 	/* Privileged reg */
2259 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2260 			      &adev->gfx.priv_reg_irq);
2261 	if (r)
2262 		return r;
2263 
2264 	/* Privileged inst */
2265 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2266 			      &adev->gfx.priv_inst_irq);
2267 	if (r)
2268 		return r;
2269 
2270 	/* ECC error */
2271 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2272 			      &adev->gfx.cp_ecc_error_irq);
2273 	if (r)
2274 		return r;
2275 
2276 	/* FUE error */
2277 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2278 			      &adev->gfx.cp_ecc_error_irq);
2279 	if (r)
2280 		return r;
2281 
2282 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2283 
2284 	if (adev->gfx.rlc.funcs) {
2285 		if (adev->gfx.rlc.funcs->init) {
2286 			r = adev->gfx.rlc.funcs->init(adev);
2287 			if (r) {
2288 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2289 				return r;
2290 			}
2291 		}
2292 	}
2293 
2294 	r = gfx_v9_0_mec_init(adev);
2295 	if (r) {
2296 		DRM_ERROR("Failed to init MEC BOs!\n");
2297 		return r;
2298 	}
2299 
2300 	/* set up the gfx ring */
2301 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2302 		ring = &adev->gfx.gfx_ring[i];
2303 		ring->ring_obj = NULL;
2304 		if (!i)
2305 			sprintf(ring->name, "gfx");
2306 		else
2307 			sprintf(ring->name, "gfx_%d", i);
2308 		ring->use_doorbell = true;
2309 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2310 
2311 		/* disable scheduler on the real ring */
2312 		ring->no_scheduler = adev->gfx.mcbp;
2313 		ring->vm_hub = AMDGPU_GFXHUB(0);
2314 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2315 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2316 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2317 		if (r)
2318 			return r;
2319 	}
2320 
2321 	/* set up the software rings */
2322 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2323 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2324 			ring = &adev->gfx.sw_gfx_ring[i];
2325 			ring->ring_obj = NULL;
2326 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2327 			ring->use_doorbell = true;
2328 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2329 			ring->is_sw_ring = true;
2330 			hw_prio = amdgpu_sw_ring_priority(i);
2331 			ring->vm_hub = AMDGPU_GFXHUB(0);
2332 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2333 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2334 					     NULL);
2335 			if (r)
2336 				return r;
2337 			ring->wptr = 0;
2338 		}
2339 
2340 		/* init the muxer and add software rings */
2341 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2342 					 GFX9_NUM_SW_GFX_RINGS);
2343 		if (r) {
2344 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2345 			return r;
2346 		}
2347 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2348 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2349 							&adev->gfx.sw_gfx_ring[i]);
2350 			if (r) {
2351 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2352 				return r;
2353 			}
2354 		}
2355 	}
2356 
2357 	/* set up the compute queues - allocate horizontally across pipes */
2358 	ring_id = 0;
2359 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2360 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2361 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2362 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2363 								     k, j))
2364 					continue;
2365 
2366 				r = gfx_v9_0_compute_ring_init(adev,
2367 							       ring_id,
2368 							       i, k, j);
2369 				if (r)
2370 					return r;
2371 
2372 				ring_id++;
2373 			}
2374 		}
2375 	}
2376 
2377 	/* TODO: Add queue reset mask when FW fully supports it */
2378 	adev->gfx.gfx_supported_reset =
2379 		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2380 	adev->gfx.compute_supported_reset =
2381 		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2382 
2383 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2384 	if (r) {
2385 		DRM_ERROR("Failed to init KIQ BOs!\n");
2386 		return r;
2387 	}
2388 
2389 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2390 	if (r)
2391 		return r;
2392 
2393 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2394 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2395 	if (r)
2396 		return r;
2397 
2398 	adev->gfx.ce_ram_size = 0x8000;
2399 
2400 	r = gfx_v9_0_gpu_early_init(adev);
2401 	if (r)
2402 		return r;
2403 
2404 	if (amdgpu_gfx_ras_sw_init(adev)) {
2405 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2406 		return -EINVAL;
2407 	}
2408 
2409 	gfx_v9_0_alloc_ip_dump(adev);
2410 
2411 	r = amdgpu_gfx_sysfs_init(adev);
2412 	if (r)
2413 		return r;
2414 
2415 	return 0;
2416 }
2417 
2418 
gfx_v9_0_sw_fini(struct amdgpu_ip_block * ip_block)2419 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2420 {
2421 	int i;
2422 	struct amdgpu_device *adev = ip_block->adev;
2423 
2424 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2425 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2426 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2427 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2428 	}
2429 
2430 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2431 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2432 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2433 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2434 
2435 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2436 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2437 	amdgpu_gfx_kiq_fini(adev, 0);
2438 
2439 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
2440 
2441 	gfx_v9_0_mec_fini(adev);
2442 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2443 				&adev->gfx.rlc.clear_state_gpu_addr,
2444 				(void **)&adev->gfx.rlc.cs_ptr);
2445 	if (adev->flags & AMD_IS_APU) {
2446 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2447 				&adev->gfx.rlc.cp_table_gpu_addr,
2448 				(void **)&adev->gfx.rlc.cp_table_ptr);
2449 	}
2450 	gfx_v9_0_free_microcode(adev);
2451 
2452 	amdgpu_gfx_sysfs_fini(adev);
2453 
2454 	kfree(adev->gfx.ip_dump_core);
2455 	kfree(adev->gfx.ip_dump_compute_queues);
2456 
2457 	return 0;
2458 }
2459 
2460 
gfx_v9_0_tiling_mode_table_init(struct amdgpu_device * adev)2461 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2462 {
2463 	/* TODO */
2464 }
2465 
gfx_v9_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)2466 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2467 			   u32 instance, int xcc_id)
2468 {
2469 	u32 data;
2470 
2471 	if (instance == 0xffffffff)
2472 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2473 	else
2474 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2475 
2476 	if (se_num == 0xffffffff)
2477 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2478 	else
2479 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2480 
2481 	if (sh_num == 0xffffffff)
2482 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2483 	else
2484 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2485 
2486 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2487 }
2488 
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device * adev)2489 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2490 {
2491 	u32 data, mask;
2492 
2493 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2494 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2495 
2496 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2497 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2498 
2499 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2500 					 adev->gfx.config.max_sh_per_se);
2501 
2502 	return (~data) & mask;
2503 }
2504 
gfx_v9_0_setup_rb(struct amdgpu_device * adev)2505 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2506 {
2507 	int i, j;
2508 	u32 data;
2509 	u32 active_rbs = 0;
2510 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2511 					adev->gfx.config.max_sh_per_se;
2512 
2513 	mutex_lock(&adev->grbm_idx_mutex);
2514 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2515 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2516 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2517 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2518 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2519 					       rb_bitmap_width_per_sh);
2520 		}
2521 	}
2522 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2523 	mutex_unlock(&adev->grbm_idx_mutex);
2524 
2525 	adev->gfx.config.backend_enable_mask = active_rbs;
2526 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2527 }
2528 
gfx_v9_0_debug_trap_config_init(struct amdgpu_device * adev,uint32_t first_vmid,uint32_t last_vmid)2529 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2530 				uint32_t first_vmid,
2531 				uint32_t last_vmid)
2532 {
2533 	uint32_t data;
2534 	uint32_t trap_config_vmid_mask = 0;
2535 	int i;
2536 
2537 	/* Calculate trap config vmid mask */
2538 	for (i = first_vmid; i < last_vmid; i++)
2539 		trap_config_vmid_mask |= (1 << i);
2540 
2541 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2542 			VMID_SEL, trap_config_vmid_mask);
2543 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2544 			TRAP_EN, 1);
2545 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2546 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2547 
2548 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2549 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2550 }
2551 
2552 #define DEFAULT_SH_MEM_BASES	(0x6000)
gfx_v9_0_init_compute_vmid(struct amdgpu_device * adev)2553 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2554 {
2555 	int i;
2556 	uint32_t sh_mem_config;
2557 	uint32_t sh_mem_bases;
2558 
2559 	/*
2560 	 * Configure apertures:
2561 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2562 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2563 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2564 	 */
2565 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2566 
2567 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2568 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2569 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2570 
2571 	mutex_lock(&adev->srbm_mutex);
2572 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2573 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2574 		/* CP and shaders */
2575 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2576 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2577 	}
2578 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2579 	mutex_unlock(&adev->srbm_mutex);
2580 
2581 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2582 	   access. These should be enabled by FW for target VMIDs. */
2583 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2584 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2585 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2586 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2587 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2588 	}
2589 }
2590 
gfx_v9_0_init_gds_vmid(struct amdgpu_device * adev)2591 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2592 {
2593 	int vmid;
2594 
2595 	/*
2596 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2597 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2598 	 * the driver can enable them for graphics. VMID0 should maintain
2599 	 * access so that HWS firmware can save/restore entries.
2600 	 */
2601 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2602 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2603 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2604 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2605 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2606 	}
2607 }
2608 
gfx_v9_0_init_sq_config(struct amdgpu_device * adev)2609 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2610 {
2611 	uint32_t tmp;
2612 
2613 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2614 	case IP_VERSION(9, 4, 1):
2615 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2616 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2617 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2618 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2619 		break;
2620 	default:
2621 		break;
2622 	}
2623 }
2624 
gfx_v9_0_constants_init(struct amdgpu_device * adev)2625 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2626 {
2627 	u32 tmp;
2628 	int i;
2629 
2630 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2631 
2632 	gfx_v9_0_tiling_mode_table_init(adev);
2633 
2634 	if (adev->gfx.num_gfx_rings)
2635 		gfx_v9_0_setup_rb(adev);
2636 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2637 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2638 
2639 	/* XXX SH_MEM regs */
2640 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2641 	mutex_lock(&adev->srbm_mutex);
2642 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2643 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2644 		/* CP and shaders */
2645 		if (i == 0) {
2646 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2647 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2648 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2649 					    !!adev->gmc.noretry);
2650 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2651 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2652 		} else {
2653 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2654 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2655 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2656 					    !!adev->gmc.noretry);
2657 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2658 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2659 				(adev->gmc.private_aperture_start >> 48));
2660 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2661 				(adev->gmc.shared_aperture_start >> 48));
2662 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2663 		}
2664 	}
2665 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2666 
2667 	mutex_unlock(&adev->srbm_mutex);
2668 
2669 	gfx_v9_0_init_compute_vmid(adev);
2670 	gfx_v9_0_init_gds_vmid(adev);
2671 	gfx_v9_0_init_sq_config(adev);
2672 }
2673 
gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device * adev)2674 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2675 {
2676 	u32 i, j, k;
2677 	u32 mask;
2678 
2679 	mutex_lock(&adev->grbm_idx_mutex);
2680 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2681 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2682 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2683 			for (k = 0; k < adev->usec_timeout; k++) {
2684 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2685 					break;
2686 				udelay(1);
2687 			}
2688 			if (k == adev->usec_timeout) {
2689 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2690 						      0xffffffff, 0xffffffff, 0);
2691 				mutex_unlock(&adev->grbm_idx_mutex);
2692 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2693 					 i, j);
2694 				return;
2695 			}
2696 		}
2697 	}
2698 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2699 	mutex_unlock(&adev->grbm_idx_mutex);
2700 
2701 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2702 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2703 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2704 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2705 	for (k = 0; k < adev->usec_timeout; k++) {
2706 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2707 			break;
2708 		udelay(1);
2709 	}
2710 }
2711 
gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2712 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2713 					       bool enable)
2714 {
2715 	u32 tmp;
2716 
2717 	/* These interrupts should be enabled to drive DS clock */
2718 
2719 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2720 
2721 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2722 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2723 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2724 	if (adev->gfx.num_gfx_rings)
2725 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2726 
2727 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2728 }
2729 
gfx_v9_0_init_csb(struct amdgpu_device * adev)2730 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2731 {
2732 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2733 	/* csib */
2734 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2735 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2736 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2737 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2738 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2739 			adev->gfx.rlc.clear_state_size);
2740 }
2741 
gfx_v9_1_parse_ind_reg_list(int * register_list_format,int indirect_offset,int list_size,int * unique_indirect_regs,int unique_indirect_reg_count,int * indirect_start_offsets,int * indirect_start_offsets_count,int max_start_offsets_count)2742 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2743 				int indirect_offset,
2744 				int list_size,
2745 				int *unique_indirect_regs,
2746 				int unique_indirect_reg_count,
2747 				int *indirect_start_offsets,
2748 				int *indirect_start_offsets_count,
2749 				int max_start_offsets_count)
2750 {
2751 	int idx;
2752 
2753 	for (; indirect_offset < list_size; indirect_offset++) {
2754 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2755 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2756 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2757 
2758 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2759 			indirect_offset += 2;
2760 
2761 			/* look for the matching indice */
2762 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2763 				if (unique_indirect_regs[idx] ==
2764 					register_list_format[indirect_offset] ||
2765 					!unique_indirect_regs[idx])
2766 					break;
2767 			}
2768 
2769 			BUG_ON(idx >= unique_indirect_reg_count);
2770 
2771 			if (!unique_indirect_regs[idx])
2772 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2773 
2774 			indirect_offset++;
2775 		}
2776 	}
2777 }
2778 
gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device * adev)2779 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2780 {
2781 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2782 	int unique_indirect_reg_count = 0;
2783 
2784 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2785 	int indirect_start_offsets_count = 0;
2786 
2787 	int list_size = 0;
2788 	int i = 0, j = 0;
2789 	u32 tmp = 0;
2790 
2791 	u32 *register_list_format =
2792 		kmemdup(adev->gfx.rlc.register_list_format,
2793 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2794 	if (!register_list_format)
2795 		return -ENOMEM;
2796 
2797 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2798 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2799 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2800 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2801 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2802 				    unique_indirect_regs,
2803 				    unique_indirect_reg_count,
2804 				    indirect_start_offsets,
2805 				    &indirect_start_offsets_count,
2806 				    ARRAY_SIZE(indirect_start_offsets));
2807 
2808 	/* enable auto inc in case it is disabled */
2809 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2810 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2811 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2812 
2813 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2814 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2815 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2816 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2817 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2818 			adev->gfx.rlc.register_restore[i]);
2819 
2820 	/* load indirect register */
2821 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2822 		adev->gfx.rlc.reg_list_format_start);
2823 
2824 	/* direct register portion */
2825 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2826 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2827 			register_list_format[i]);
2828 
2829 	/* indirect register portion */
2830 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2831 		if (register_list_format[i] == 0xFFFFFFFF) {
2832 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2833 			continue;
2834 		}
2835 
2836 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2837 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2838 
2839 		for (j = 0; j < unique_indirect_reg_count; j++) {
2840 			if (register_list_format[i] == unique_indirect_regs[j]) {
2841 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2842 				break;
2843 			}
2844 		}
2845 
2846 		BUG_ON(j >= unique_indirect_reg_count);
2847 
2848 		i++;
2849 	}
2850 
2851 	/* set save/restore list size */
2852 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2853 	list_size = list_size >> 1;
2854 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2855 		adev->gfx.rlc.reg_restore_list_size);
2856 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2857 
2858 	/* write the starting offsets to RLC scratch ram */
2859 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2860 		adev->gfx.rlc.starting_offsets_start);
2861 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2862 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2863 		       indirect_start_offsets[i]);
2864 
2865 	/* load unique indirect regs*/
2866 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2867 		if (unique_indirect_regs[i] != 0) {
2868 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2869 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2870 			       unique_indirect_regs[i] & 0x3FFFF);
2871 
2872 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2873 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2874 			       unique_indirect_regs[i] >> 20);
2875 		}
2876 	}
2877 
2878 	kfree(register_list_format);
2879 	return 0;
2880 }
2881 
gfx_v9_0_enable_save_restore_machine(struct amdgpu_device * adev)2882 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2883 {
2884 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2885 }
2886 
pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device * adev,bool enable)2887 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2888 					     bool enable)
2889 {
2890 	uint32_t data = 0;
2891 	uint32_t default_data = 0;
2892 
2893 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2894 	if (enable) {
2895 		/* enable GFXIP control over CGPG */
2896 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2897 		if(default_data != data)
2898 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2899 
2900 		/* update status */
2901 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2902 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2903 		if(default_data != data)
2904 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2905 	} else {
2906 		/* restore GFXIP control over GCPG */
2907 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2908 		if(default_data != data)
2909 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2910 	}
2911 }
2912 
gfx_v9_0_init_gfx_power_gating(struct amdgpu_device * adev)2913 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2914 {
2915 	uint32_t data = 0;
2916 
2917 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2918 			      AMD_PG_SUPPORT_GFX_SMG |
2919 			      AMD_PG_SUPPORT_GFX_DMG)) {
2920 		/* init IDLE_POLL_COUNT = 60 */
2921 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2922 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2923 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2924 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2925 
2926 		/* init RLC PG Delay */
2927 		data = 0;
2928 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2929 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2930 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2931 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2932 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2933 
2934 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2935 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2936 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2937 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2938 
2939 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2940 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2941 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2942 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2943 
2944 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2945 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2946 
2947 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2948 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2949 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2950 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2951 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2952 	}
2953 }
2954 
gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)2955 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2956 						bool enable)
2957 {
2958 	uint32_t data = 0;
2959 	uint32_t default_data = 0;
2960 
2961 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2962 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2963 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2964 			     enable ? 1 : 0);
2965 	if (default_data != data)
2966 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2967 }
2968 
gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)2969 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2970 						bool enable)
2971 {
2972 	uint32_t data = 0;
2973 	uint32_t default_data = 0;
2974 
2975 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2976 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2977 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2978 			     enable ? 1 : 0);
2979 	if(default_data != data)
2980 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2981 }
2982 
gfx_v9_0_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)2983 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2984 					bool enable)
2985 {
2986 	uint32_t data = 0;
2987 	uint32_t default_data = 0;
2988 
2989 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2990 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2991 			     CP_PG_DISABLE,
2992 			     enable ? 0 : 1);
2993 	if(default_data != data)
2994 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2995 }
2996 
gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)2997 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2998 						bool enable)
2999 {
3000 	uint32_t data, default_data;
3001 
3002 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3003 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3004 			     GFX_POWER_GATING_ENABLE,
3005 			     enable ? 1 : 0);
3006 	if(default_data != data)
3007 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3008 }
3009 
gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device * adev,bool enable)3010 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3011 						bool enable)
3012 {
3013 	uint32_t data, default_data;
3014 
3015 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3016 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3017 			     GFX_PIPELINE_PG_ENABLE,
3018 			     enable ? 1 : 0);
3019 	if(default_data != data)
3020 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3021 
3022 	if (!enable)
3023 		/* read any GFX register to wake up GFX */
3024 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3025 }
3026 
gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)3027 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3028 						       bool enable)
3029 {
3030 	uint32_t data, default_data;
3031 
3032 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3033 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3034 			     STATIC_PER_CU_PG_ENABLE,
3035 			     enable ? 1 : 0);
3036 	if(default_data != data)
3037 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3038 }
3039 
gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)3040 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3041 						bool enable)
3042 {
3043 	uint32_t data, default_data;
3044 
3045 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3046 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3047 			     DYN_PER_CU_PG_ENABLE,
3048 			     enable ? 1 : 0);
3049 	if(default_data != data)
3050 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3051 }
3052 
gfx_v9_0_init_pg(struct amdgpu_device * adev)3053 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3054 {
3055 	gfx_v9_0_init_csb(adev);
3056 
3057 	/*
3058 	 * Rlc save restore list is workable since v2_1.
3059 	 * And it's needed by gfxoff feature.
3060 	 */
3061 	if (adev->gfx.rlc.is_rlc_v2_1) {
3062 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3063 			    IP_VERSION(9, 2, 1) ||
3064 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3065 			gfx_v9_1_init_rlc_save_restore_list(adev);
3066 		gfx_v9_0_enable_save_restore_machine(adev);
3067 	}
3068 
3069 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3070 			      AMD_PG_SUPPORT_GFX_SMG |
3071 			      AMD_PG_SUPPORT_GFX_DMG |
3072 			      AMD_PG_SUPPORT_CP |
3073 			      AMD_PG_SUPPORT_GDS |
3074 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3075 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3076 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3077 		gfx_v9_0_init_gfx_power_gating(adev);
3078 	}
3079 }
3080 
gfx_v9_0_rlc_stop(struct amdgpu_device * adev)3081 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3082 {
3083 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3084 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3085 	gfx_v9_0_wait_for_rlc_serdes(adev);
3086 }
3087 
gfx_v9_0_rlc_reset(struct amdgpu_device * adev)3088 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3089 {
3090 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3091 	udelay(50);
3092 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3093 	udelay(50);
3094 }
3095 
gfx_v9_0_rlc_start(struct amdgpu_device * adev)3096 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3097 {
3098 #ifdef AMDGPU_RLC_DEBUG_RETRY
3099 	u32 rlc_ucode_ver;
3100 #endif
3101 
3102 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3103 	udelay(50);
3104 
3105 	/* carrizo do enable cp interrupt after cp inited */
3106 	if (!(adev->flags & AMD_IS_APU)) {
3107 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3108 		udelay(50);
3109 	}
3110 
3111 #ifdef AMDGPU_RLC_DEBUG_RETRY
3112 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3113 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3114 	if(rlc_ucode_ver == 0x108) {
3115 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3116 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3117 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3118 		 * default is 0x9C4 to create a 100us interval */
3119 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3120 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3121 		 * to disable the page fault retry interrupts, default is
3122 		 * 0x100 (256) */
3123 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3124 	}
3125 #endif
3126 }
3127 
gfx_v9_0_rlc_load_microcode(struct amdgpu_device * adev)3128 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3129 {
3130 	const struct rlc_firmware_header_v2_0 *hdr;
3131 	const __le32 *fw_data;
3132 	unsigned i, fw_size;
3133 
3134 	if (!adev->gfx.rlc_fw)
3135 		return -EINVAL;
3136 
3137 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3138 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3139 
3140 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3141 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3142 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3143 
3144 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3145 			RLCG_UCODE_LOADING_START_ADDRESS);
3146 	for (i = 0; i < fw_size; i++)
3147 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3148 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3149 
3150 	return 0;
3151 }
3152 
gfx_v9_0_rlc_resume(struct amdgpu_device * adev)3153 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3154 {
3155 	int r;
3156 
3157 	if (amdgpu_sriov_vf(adev)) {
3158 		gfx_v9_0_init_csb(adev);
3159 		return 0;
3160 	}
3161 
3162 	adev->gfx.rlc.funcs->stop(adev);
3163 
3164 	/* disable CG */
3165 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3166 
3167 	gfx_v9_0_init_pg(adev);
3168 
3169 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3170 		/* legacy rlc firmware loading */
3171 		r = gfx_v9_0_rlc_load_microcode(adev);
3172 		if (r)
3173 			return r;
3174 	}
3175 
3176 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3177 	case IP_VERSION(9, 2, 2):
3178 	case IP_VERSION(9, 1, 0):
3179 		gfx_v9_0_init_lbpw(adev);
3180 		if (amdgpu_lbpw == 0)
3181 			gfx_v9_0_enable_lbpw(adev, false);
3182 		else
3183 			gfx_v9_0_enable_lbpw(adev, true);
3184 		break;
3185 	case IP_VERSION(9, 4, 0):
3186 		gfx_v9_4_init_lbpw(adev);
3187 		if (amdgpu_lbpw > 0)
3188 			gfx_v9_0_enable_lbpw(adev, true);
3189 		else
3190 			gfx_v9_0_enable_lbpw(adev, false);
3191 		break;
3192 	default:
3193 		break;
3194 	}
3195 
3196 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3197 
3198 	adev->gfx.rlc.funcs->start(adev);
3199 
3200 	return 0;
3201 }
3202 
gfx_v9_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)3203 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3204 {
3205 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3206 
3207 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3208 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3209 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3210 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3211 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3212 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3213 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3214 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3215 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3216 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3217 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3218 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3219 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3220 	udelay(50);
3221 }
3222 
gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device * adev)3223 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3224 {
3225 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3226 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3227 	const struct gfx_firmware_header_v1_0 *me_hdr;
3228 	const __le32 *fw_data;
3229 	unsigned i, fw_size;
3230 
3231 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3232 		return -EINVAL;
3233 
3234 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3235 		adev->gfx.pfp_fw->data;
3236 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3237 		adev->gfx.ce_fw->data;
3238 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3239 		adev->gfx.me_fw->data;
3240 
3241 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3242 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3243 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3244 
3245 	gfx_v9_0_cp_gfx_enable(adev, false);
3246 
3247 	/* PFP */
3248 	fw_data = (const __le32 *)
3249 		(adev->gfx.pfp_fw->data +
3250 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3251 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3252 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3253 	for (i = 0; i < fw_size; i++)
3254 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3255 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3256 
3257 	/* CE */
3258 	fw_data = (const __le32 *)
3259 		(adev->gfx.ce_fw->data +
3260 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3261 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3262 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3263 	for (i = 0; i < fw_size; i++)
3264 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3265 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3266 
3267 	/* ME */
3268 	fw_data = (const __le32 *)
3269 		(adev->gfx.me_fw->data +
3270 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3271 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3272 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3273 	for (i = 0; i < fw_size; i++)
3274 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3275 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3276 
3277 	return 0;
3278 }
3279 
gfx_v9_0_cp_gfx_start(struct amdgpu_device * adev)3280 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3281 {
3282 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3283 	const struct cs_section_def *sect = NULL;
3284 	const struct cs_extent_def *ext = NULL;
3285 	int r, i, tmp;
3286 
3287 	/* init the CP */
3288 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3289 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3290 
3291 	gfx_v9_0_cp_gfx_enable(adev, true);
3292 
3293 	/* Now only limit the quirk on the APU gfx9 series and already
3294 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3295 	 */
3296 	if (adev->flags & AMD_IS_APU &&
3297 			adev->in_s3 && !pm_resume_via_firmware()) {
3298 		DRM_INFO("Will skip the CSB packet resubmit\n");
3299 		return 0;
3300 	}
3301 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3302 	if (r) {
3303 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3304 		return r;
3305 	}
3306 
3307 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3308 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3309 
3310 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3311 	amdgpu_ring_write(ring, 0x80000000);
3312 	amdgpu_ring_write(ring, 0x80000000);
3313 
3314 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3315 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3316 			if (sect->id == SECT_CONTEXT) {
3317 				amdgpu_ring_write(ring,
3318 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3319 					       ext->reg_count));
3320 				amdgpu_ring_write(ring,
3321 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3322 				for (i = 0; i < ext->reg_count; i++)
3323 					amdgpu_ring_write(ring, ext->extent[i]);
3324 			}
3325 		}
3326 	}
3327 
3328 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3329 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3330 
3331 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3332 	amdgpu_ring_write(ring, 0);
3333 
3334 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3335 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3336 	amdgpu_ring_write(ring, 0x8000);
3337 	amdgpu_ring_write(ring, 0x8000);
3338 
3339 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3340 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3341 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3342 	amdgpu_ring_write(ring, tmp);
3343 	amdgpu_ring_write(ring, 0);
3344 
3345 	amdgpu_ring_commit(ring);
3346 
3347 	return 0;
3348 }
3349 
gfx_v9_0_cp_gfx_resume(struct amdgpu_device * adev)3350 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3351 {
3352 	struct amdgpu_ring *ring;
3353 	u32 tmp;
3354 	u32 rb_bufsz;
3355 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3356 
3357 	/* Set the write pointer delay */
3358 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3359 
3360 	/* set the RB to use vmid 0 */
3361 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3362 
3363 	/* Set ring buffer size */
3364 	ring = &adev->gfx.gfx_ring[0];
3365 	rb_bufsz = order_base_2(ring->ring_size / 8);
3366 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3367 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3368 #ifdef __BIG_ENDIAN
3369 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3370 #endif
3371 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3372 
3373 	/* Initialize the ring buffer's write pointers */
3374 	ring->wptr = 0;
3375 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3376 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3377 
3378 	/* set the wb address whether it's enabled or not */
3379 	rptr_addr = ring->rptr_gpu_addr;
3380 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3381 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3382 
3383 	wptr_gpu_addr = ring->wptr_gpu_addr;
3384 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3385 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3386 
3387 	mdelay(1);
3388 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3389 
3390 	rb_addr = ring->gpu_addr >> 8;
3391 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3392 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3393 
3394 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3395 	if (ring->use_doorbell) {
3396 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3397 				    DOORBELL_OFFSET, ring->doorbell_index);
3398 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3399 				    DOORBELL_EN, 1);
3400 	} else {
3401 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3402 	}
3403 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3404 
3405 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3406 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3407 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3408 
3409 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3410 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3411 
3412 
3413 	/* start the ring */
3414 	gfx_v9_0_cp_gfx_start(adev);
3415 
3416 	return 0;
3417 }
3418 
gfx_v9_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3419 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3420 {
3421 	if (enable) {
3422 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3423 	} else {
3424 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3425 				 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3426 				  CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3427 				  CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3428 				  CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3429 				  CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3430 				  CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3431 				  CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3432 				  CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3433 				  CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3434 		adev->gfx.kiq[0].ring.sched.ready = false;
3435 	}
3436 	udelay(50);
3437 }
3438 
gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device * adev)3439 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3440 {
3441 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3442 	const __le32 *fw_data;
3443 	unsigned i;
3444 	u32 tmp;
3445 
3446 	if (!adev->gfx.mec_fw)
3447 		return -EINVAL;
3448 
3449 	gfx_v9_0_cp_compute_enable(adev, false);
3450 
3451 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3452 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3453 
3454 	fw_data = (const __le32 *)
3455 		(adev->gfx.mec_fw->data +
3456 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3457 	tmp = 0;
3458 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3459 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3460 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3461 
3462 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3463 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3464 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3465 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3466 
3467 	/* MEC1 */
3468 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3469 			 mec_hdr->jt_offset);
3470 	for (i = 0; i < mec_hdr->jt_size; i++)
3471 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3472 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3473 
3474 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3475 			adev->gfx.mec_fw_version);
3476 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3477 
3478 	return 0;
3479 }
3480 
3481 /* KIQ functions */
gfx_v9_0_kiq_setting(struct amdgpu_ring * ring)3482 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3483 {
3484 	uint32_t tmp;
3485 	struct amdgpu_device *adev = ring->adev;
3486 
3487 	/* tell RLC which is KIQ queue */
3488 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3489 	tmp &= 0xffffff00;
3490 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3491 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3492 	tmp |= 0x80;
3493 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3494 }
3495 
gfx_v9_0_mqd_set_priority(struct amdgpu_ring * ring,struct v9_mqd * mqd)3496 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3497 {
3498 	struct amdgpu_device *adev = ring->adev;
3499 
3500 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3501 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3502 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3503 			mqd->cp_hqd_queue_priority =
3504 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3505 		}
3506 	}
3507 }
3508 
gfx_v9_0_mqd_init(struct amdgpu_ring * ring)3509 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3510 {
3511 	struct amdgpu_device *adev = ring->adev;
3512 	struct v9_mqd *mqd = ring->mqd_ptr;
3513 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3514 	uint32_t tmp;
3515 
3516 	mqd->header = 0xC0310800;
3517 	mqd->compute_pipelinestat_enable = 0x00000001;
3518 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3519 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3520 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3521 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3522 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3523 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3524 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3525 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3526 	mqd->compute_misc_reserved = 0x00000003;
3527 
3528 	mqd->dynamic_cu_mask_addr_lo =
3529 		lower_32_bits(ring->mqd_gpu_addr
3530 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3531 	mqd->dynamic_cu_mask_addr_hi =
3532 		upper_32_bits(ring->mqd_gpu_addr
3533 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3534 
3535 	eop_base_addr = ring->eop_gpu_addr >> 8;
3536 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3537 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3538 
3539 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3540 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3541 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3542 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3543 
3544 	mqd->cp_hqd_eop_control = tmp;
3545 
3546 	/* enable doorbell? */
3547 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3548 
3549 	if (ring->use_doorbell) {
3550 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3551 				    DOORBELL_OFFSET, ring->doorbell_index);
3552 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3553 				    DOORBELL_EN, 1);
3554 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3555 				    DOORBELL_SOURCE, 0);
3556 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3557 				    DOORBELL_HIT, 0);
3558 	} else {
3559 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3560 					 DOORBELL_EN, 0);
3561 	}
3562 
3563 	mqd->cp_hqd_pq_doorbell_control = tmp;
3564 
3565 	/* disable the queue if it's active */
3566 	ring->wptr = 0;
3567 	mqd->cp_hqd_dequeue_request = 0;
3568 	mqd->cp_hqd_pq_rptr = 0;
3569 	mqd->cp_hqd_pq_wptr_lo = 0;
3570 	mqd->cp_hqd_pq_wptr_hi = 0;
3571 
3572 	/* set the pointer to the MQD */
3573 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3574 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3575 
3576 	/* set MQD vmid to 0 */
3577 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3578 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3579 	mqd->cp_mqd_control = tmp;
3580 
3581 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3582 	hqd_gpu_addr = ring->gpu_addr >> 8;
3583 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3584 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3585 
3586 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3587 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3588 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3589 			    (order_base_2(ring->ring_size / 4) - 1));
3590 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3591 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3592 #ifdef __BIG_ENDIAN
3593 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3594 #endif
3595 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3596 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3597 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3598 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3599 	mqd->cp_hqd_pq_control = tmp;
3600 
3601 	/* set the wb address whether it's enabled or not */
3602 	wb_gpu_addr = ring->rptr_gpu_addr;
3603 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3604 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3605 		upper_32_bits(wb_gpu_addr) & 0xffff;
3606 
3607 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3608 	wb_gpu_addr = ring->wptr_gpu_addr;
3609 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3610 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3611 
3612 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3613 	ring->wptr = 0;
3614 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3615 
3616 	/* set the vmid for the queue */
3617 	mqd->cp_hqd_vmid = 0;
3618 
3619 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3620 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3621 	mqd->cp_hqd_persistent_state = tmp;
3622 
3623 	/* set MIN_IB_AVAIL_SIZE */
3624 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3625 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3626 	mqd->cp_hqd_ib_control = tmp;
3627 
3628 	/* set static priority for a queue/ring */
3629 	gfx_v9_0_mqd_set_priority(ring, mqd);
3630 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3631 
3632 	/* map_queues packet doesn't need activate the queue,
3633 	 * so only kiq need set this field.
3634 	 */
3635 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3636 		mqd->cp_hqd_active = 1;
3637 
3638 	return 0;
3639 }
3640 
gfx_v9_0_kiq_init_register(struct amdgpu_ring * ring)3641 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3642 {
3643 	struct amdgpu_device *adev = ring->adev;
3644 	struct v9_mqd *mqd = ring->mqd_ptr;
3645 	int j;
3646 
3647 	/* disable wptr polling */
3648 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3649 
3650 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3651 	       mqd->cp_hqd_eop_base_addr_lo);
3652 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3653 	       mqd->cp_hqd_eop_base_addr_hi);
3654 
3655 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3656 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3657 	       mqd->cp_hqd_eop_control);
3658 
3659 	/* enable doorbell? */
3660 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3661 	       mqd->cp_hqd_pq_doorbell_control);
3662 
3663 	/* disable the queue if it's active */
3664 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3665 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3666 		for (j = 0; j < adev->usec_timeout; j++) {
3667 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3668 				break;
3669 			udelay(1);
3670 		}
3671 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3672 		       mqd->cp_hqd_dequeue_request);
3673 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3674 		       mqd->cp_hqd_pq_rptr);
3675 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3676 		       mqd->cp_hqd_pq_wptr_lo);
3677 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3678 		       mqd->cp_hqd_pq_wptr_hi);
3679 	}
3680 
3681 	/* set the pointer to the MQD */
3682 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3683 	       mqd->cp_mqd_base_addr_lo);
3684 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3685 	       mqd->cp_mqd_base_addr_hi);
3686 
3687 	/* set MQD vmid to 0 */
3688 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3689 	       mqd->cp_mqd_control);
3690 
3691 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3692 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3693 	       mqd->cp_hqd_pq_base_lo);
3694 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3695 	       mqd->cp_hqd_pq_base_hi);
3696 
3697 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3698 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3699 	       mqd->cp_hqd_pq_control);
3700 
3701 	/* set the wb address whether it's enabled or not */
3702 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3703 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3704 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3705 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3706 
3707 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3708 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3709 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3710 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3711 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3712 
3713 	/* enable the doorbell if requested */
3714 	if (ring->use_doorbell) {
3715 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3716 					(adev->doorbell_index.kiq * 2) << 2);
3717 		/* If GC has entered CGPG, ringing doorbell > first page
3718 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3719 		 * workaround this issue. And this change has to align with firmware
3720 		 * update.
3721 		 */
3722 		if (check_if_enlarge_doorbell_range(adev))
3723 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3724 					(adev->doorbell.size - 4));
3725 		else
3726 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3727 					(adev->doorbell_index.userqueue_end * 2) << 2);
3728 	}
3729 
3730 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3731 	       mqd->cp_hqd_pq_doorbell_control);
3732 
3733 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3734 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3735 	       mqd->cp_hqd_pq_wptr_lo);
3736 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3737 	       mqd->cp_hqd_pq_wptr_hi);
3738 
3739 	/* set the vmid for the queue */
3740 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3741 
3742 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3743 	       mqd->cp_hqd_persistent_state);
3744 
3745 	/* activate the queue */
3746 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3747 	       mqd->cp_hqd_active);
3748 
3749 	if (ring->use_doorbell)
3750 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3751 
3752 	return 0;
3753 }
3754 
gfx_v9_0_kiq_fini_register(struct amdgpu_ring * ring)3755 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3756 {
3757 	struct amdgpu_device *adev = ring->adev;
3758 	int j;
3759 
3760 	/* disable the queue if it's active */
3761 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3762 
3763 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3764 
3765 		for (j = 0; j < adev->usec_timeout; j++) {
3766 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3767 				break;
3768 			udelay(1);
3769 		}
3770 
3771 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3772 			DRM_DEBUG("KIQ dequeue request failed.\n");
3773 
3774 			/* Manual disable if dequeue request times out */
3775 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3776 		}
3777 
3778 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3779 		      0);
3780 	}
3781 
3782 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3783 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3784 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3785 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3786 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3787 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3788 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3789 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3790 
3791 	return 0;
3792 }
3793 
gfx_v9_0_kiq_init_queue(struct amdgpu_ring * ring)3794 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3795 {
3796 	struct amdgpu_device *adev = ring->adev;
3797 	struct v9_mqd *mqd = ring->mqd_ptr;
3798 	struct v9_mqd *tmp_mqd;
3799 
3800 	gfx_v9_0_kiq_setting(ring);
3801 
3802 	/* GPU could be in bad state during probe, driver trigger the reset
3803 	 * after load the SMU, in this case , the mqd is not be initialized.
3804 	 * driver need to re-init the mqd.
3805 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3806 	 */
3807 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3808 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3809 		/* for GPU_RESET case , reset MQD to a clean status */
3810 		if (adev->gfx.kiq[0].mqd_backup)
3811 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3812 
3813 		/* reset ring buffer */
3814 		ring->wptr = 0;
3815 		amdgpu_ring_clear_ring(ring);
3816 
3817 		mutex_lock(&adev->srbm_mutex);
3818 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3819 		gfx_v9_0_kiq_init_register(ring);
3820 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3821 		mutex_unlock(&adev->srbm_mutex);
3822 	} else {
3823 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3824 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3825 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3826 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3827 			amdgpu_ring_clear_ring(ring);
3828 		mutex_lock(&adev->srbm_mutex);
3829 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3830 		gfx_v9_0_mqd_init(ring);
3831 		gfx_v9_0_kiq_init_register(ring);
3832 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3833 		mutex_unlock(&adev->srbm_mutex);
3834 
3835 		if (adev->gfx.kiq[0].mqd_backup)
3836 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3837 	}
3838 
3839 	return 0;
3840 }
3841 
gfx_v9_0_kcq_init_queue(struct amdgpu_ring * ring,bool restore)3842 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3843 {
3844 	struct amdgpu_device *adev = ring->adev;
3845 	struct v9_mqd *mqd = ring->mqd_ptr;
3846 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3847 	struct v9_mqd *tmp_mqd;
3848 
3849 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3850 	 * is not be initialized before
3851 	 */
3852 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3853 
3854 	if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3855 	    (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3856 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3857 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3858 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3859 		mutex_lock(&adev->srbm_mutex);
3860 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3861 		gfx_v9_0_mqd_init(ring);
3862 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3863 		mutex_unlock(&adev->srbm_mutex);
3864 
3865 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3866 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3867 	} else {
3868 		/* restore MQD to a clean status */
3869 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3870 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3871 		/* reset ring buffer */
3872 		ring->wptr = 0;
3873 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3874 		amdgpu_ring_clear_ring(ring);
3875 	}
3876 
3877 	return 0;
3878 }
3879 
gfx_v9_0_kiq_resume(struct amdgpu_device * adev)3880 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3881 {
3882 	struct amdgpu_ring *ring;
3883 	int r;
3884 
3885 	ring = &adev->gfx.kiq[0].ring;
3886 
3887 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3888 	if (unlikely(r != 0))
3889 		return r;
3890 
3891 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3892 	if (unlikely(r != 0)) {
3893 		amdgpu_bo_unreserve(ring->mqd_obj);
3894 		return r;
3895 	}
3896 
3897 	gfx_v9_0_kiq_init_queue(ring);
3898 	amdgpu_bo_kunmap(ring->mqd_obj);
3899 	ring->mqd_ptr = NULL;
3900 	amdgpu_bo_unreserve(ring->mqd_obj);
3901 	return 0;
3902 }
3903 
gfx_v9_0_kcq_resume(struct amdgpu_device * adev)3904 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3905 {
3906 	struct amdgpu_ring *ring = NULL;
3907 	int r = 0, i;
3908 
3909 	gfx_v9_0_cp_compute_enable(adev, true);
3910 
3911 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3912 		ring = &adev->gfx.compute_ring[i];
3913 
3914 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3915 		if (unlikely(r != 0))
3916 			goto done;
3917 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3918 		if (!r) {
3919 			r = gfx_v9_0_kcq_init_queue(ring, false);
3920 			amdgpu_bo_kunmap(ring->mqd_obj);
3921 			ring->mqd_ptr = NULL;
3922 		}
3923 		amdgpu_bo_unreserve(ring->mqd_obj);
3924 		if (r)
3925 			goto done;
3926 	}
3927 
3928 	r = amdgpu_gfx_enable_kcq(adev, 0);
3929 done:
3930 	return r;
3931 }
3932 
gfx_v9_0_cp_resume(struct amdgpu_device * adev)3933 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3934 {
3935 	int r, i;
3936 	struct amdgpu_ring *ring;
3937 
3938 	if (!(adev->flags & AMD_IS_APU))
3939 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3940 
3941 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3942 		if (adev->gfx.num_gfx_rings) {
3943 			/* legacy firmware loading */
3944 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3945 			if (r)
3946 				return r;
3947 		}
3948 
3949 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3950 		if (r)
3951 			return r;
3952 	}
3953 
3954 	if (adev->gfx.num_gfx_rings)
3955 		gfx_v9_0_cp_gfx_enable(adev, false);
3956 	gfx_v9_0_cp_compute_enable(adev, false);
3957 
3958 	r = gfx_v9_0_kiq_resume(adev);
3959 	if (r)
3960 		return r;
3961 
3962 	if (adev->gfx.num_gfx_rings) {
3963 		r = gfx_v9_0_cp_gfx_resume(adev);
3964 		if (r)
3965 			return r;
3966 	}
3967 
3968 	r = gfx_v9_0_kcq_resume(adev);
3969 	if (r)
3970 		return r;
3971 
3972 	if (adev->gfx.num_gfx_rings) {
3973 		ring = &adev->gfx.gfx_ring[0];
3974 		r = amdgpu_ring_test_helper(ring);
3975 		if (r)
3976 			return r;
3977 	}
3978 
3979 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3980 		ring = &adev->gfx.compute_ring[i];
3981 		amdgpu_ring_test_helper(ring);
3982 	}
3983 
3984 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3985 
3986 	return 0;
3987 }
3988 
gfx_v9_0_init_tcp_config(struct amdgpu_device * adev)3989 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3990 {
3991 	u32 tmp;
3992 
3993 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3994 	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3995 		return;
3996 
3997 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3998 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3999 				adev->df.hash_status.hash_64k);
4000 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4001 				adev->df.hash_status.hash_2m);
4002 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4003 				adev->df.hash_status.hash_1g);
4004 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4005 }
4006 
gfx_v9_0_cp_enable(struct amdgpu_device * adev,bool enable)4007 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4008 {
4009 	if (adev->gfx.num_gfx_rings)
4010 		gfx_v9_0_cp_gfx_enable(adev, enable);
4011 	gfx_v9_0_cp_compute_enable(adev, enable);
4012 }
4013 
gfx_v9_0_hw_init(struct amdgpu_ip_block * ip_block)4014 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4015 {
4016 	int r;
4017 	struct amdgpu_device *adev = ip_block->adev;
4018 
4019 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4020 				       adev->gfx.cleaner_shader_ptr);
4021 
4022 	if (!amdgpu_sriov_vf(adev))
4023 		gfx_v9_0_init_golden_registers(adev);
4024 
4025 	gfx_v9_0_constants_init(adev);
4026 
4027 	gfx_v9_0_init_tcp_config(adev);
4028 
4029 	r = adev->gfx.rlc.funcs->resume(adev);
4030 	if (r)
4031 		return r;
4032 
4033 	r = gfx_v9_0_cp_resume(adev);
4034 	if (r)
4035 		return r;
4036 
4037 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4038 		gfx_v9_4_2_set_power_brake_sequence(adev);
4039 
4040 	return r;
4041 }
4042 
gfx_v9_0_hw_fini(struct amdgpu_ip_block * ip_block)4043 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4044 {
4045 	struct amdgpu_device *adev = ip_block->adev;
4046 
4047 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4048 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4049 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4050 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4051 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4052 
4053 	/* DF freeze and kcq disable will fail */
4054 	if (!amdgpu_ras_intr_triggered())
4055 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4056 		amdgpu_gfx_disable_kcq(adev, 0);
4057 
4058 	if (amdgpu_sriov_vf(adev)) {
4059 		gfx_v9_0_cp_gfx_enable(adev, false);
4060 		/* must disable polling for SRIOV when hw finished, otherwise
4061 		 * CPC engine may still keep fetching WB address which is already
4062 		 * invalid after sw finished and trigger DMAR reading error in
4063 		 * hypervisor side.
4064 		 */
4065 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4066 		return 0;
4067 	}
4068 
4069 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4070 	 * otherwise KIQ is hanging when binding back
4071 	 */
4072 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4073 		mutex_lock(&adev->srbm_mutex);
4074 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4075 				adev->gfx.kiq[0].ring.pipe,
4076 				adev->gfx.kiq[0].ring.queue, 0, 0);
4077 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4078 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4079 		mutex_unlock(&adev->srbm_mutex);
4080 	}
4081 
4082 	gfx_v9_0_cp_enable(adev, false);
4083 
4084 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4085 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4086 	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4087 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4088 		return 0;
4089 	}
4090 
4091 	adev->gfx.rlc.funcs->stop(adev);
4092 	return 0;
4093 }
4094 
gfx_v9_0_suspend(struct amdgpu_ip_block * ip_block)4095 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4096 {
4097 	return gfx_v9_0_hw_fini(ip_block);
4098 }
4099 
gfx_v9_0_resume(struct amdgpu_ip_block * ip_block)4100 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4101 {
4102 	return gfx_v9_0_hw_init(ip_block);
4103 }
4104 
gfx_v9_0_is_idle(void * handle)4105 static bool gfx_v9_0_is_idle(void *handle)
4106 {
4107 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4108 
4109 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4110 				GRBM_STATUS, GUI_ACTIVE))
4111 		return false;
4112 	else
4113 		return true;
4114 }
4115 
gfx_v9_0_wait_for_idle(struct amdgpu_ip_block * ip_block)4116 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4117 {
4118 	unsigned i;
4119 	struct amdgpu_device *adev = ip_block->adev;
4120 
4121 	for (i = 0; i < adev->usec_timeout; i++) {
4122 		if (gfx_v9_0_is_idle(adev))
4123 			return 0;
4124 		udelay(1);
4125 	}
4126 	return -ETIMEDOUT;
4127 }
4128 
gfx_v9_0_soft_reset(struct amdgpu_ip_block * ip_block)4129 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4130 {
4131 	u32 grbm_soft_reset = 0;
4132 	u32 tmp;
4133 	struct amdgpu_device *adev = ip_block->adev;
4134 
4135 	/* GRBM_STATUS */
4136 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4137 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4138 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4139 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4140 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4141 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4142 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4143 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4144 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4145 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4146 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4147 	}
4148 
4149 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4150 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4151 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4152 	}
4153 
4154 	/* GRBM_STATUS2 */
4155 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4156 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4157 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4158 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4159 
4160 
4161 	if (grbm_soft_reset) {
4162 		/* stop the rlc */
4163 		adev->gfx.rlc.funcs->stop(adev);
4164 
4165 		if (adev->gfx.num_gfx_rings)
4166 			/* Disable GFX parsing/prefetching */
4167 			gfx_v9_0_cp_gfx_enable(adev, false);
4168 
4169 		/* Disable MEC parsing/prefetching */
4170 		gfx_v9_0_cp_compute_enable(adev, false);
4171 
4172 		if (grbm_soft_reset) {
4173 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4174 			tmp |= grbm_soft_reset;
4175 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4176 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4177 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4178 
4179 			udelay(50);
4180 
4181 			tmp &= ~grbm_soft_reset;
4182 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4183 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4184 		}
4185 
4186 		/* Wait a little for things to settle down */
4187 		udelay(50);
4188 	}
4189 	return 0;
4190 }
4191 
gfx_v9_0_kiq_read_clock(struct amdgpu_device * adev)4192 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4193 {
4194 	signed long r, cnt = 0;
4195 	unsigned long flags;
4196 	uint32_t seq, reg_val_offs = 0;
4197 	uint64_t value = 0;
4198 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4199 	struct amdgpu_ring *ring = &kiq->ring;
4200 
4201 	BUG_ON(!ring->funcs->emit_rreg);
4202 
4203 	spin_lock_irqsave(&kiq->ring_lock, flags);
4204 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4205 		pr_err("critical bug! too many kiq readers\n");
4206 		goto failed_unlock;
4207 	}
4208 	amdgpu_ring_alloc(ring, 32);
4209 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4210 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4211 				(5 << 8) |	/* dst: memory */
4212 				(1 << 16) |	/* count sel */
4213 				(1 << 20));	/* write confirm */
4214 	amdgpu_ring_write(ring, 0);
4215 	amdgpu_ring_write(ring, 0);
4216 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4217 				reg_val_offs * 4));
4218 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4219 				reg_val_offs * 4));
4220 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4221 	if (r)
4222 		goto failed_undo;
4223 
4224 	amdgpu_ring_commit(ring);
4225 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4226 
4227 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4228 
4229 	/* don't wait anymore for gpu reset case because this way may
4230 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4231 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4232 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4233 	 * gpu_recover() hang there.
4234 	 *
4235 	 * also don't wait anymore for IRQ context
4236 	 * */
4237 	if (r < 1 && (amdgpu_in_reset(adev)))
4238 		goto failed_kiq_read;
4239 
4240 	might_sleep();
4241 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4242 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4243 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4244 	}
4245 
4246 	if (cnt > MAX_KIQ_REG_TRY)
4247 		goto failed_kiq_read;
4248 
4249 	mb();
4250 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4251 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4252 	amdgpu_device_wb_free(adev, reg_val_offs);
4253 	return value;
4254 
4255 failed_undo:
4256 	amdgpu_ring_undo(ring);
4257 failed_unlock:
4258 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4259 failed_kiq_read:
4260 	if (reg_val_offs)
4261 		amdgpu_device_wb_free(adev, reg_val_offs);
4262 	pr_err("failed to read gpu clock\n");
4263 	return ~0;
4264 }
4265 
gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device * adev)4266 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4267 {
4268 	uint64_t clock, clock_lo, clock_hi, hi_check;
4269 
4270 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4271 	case IP_VERSION(9, 3, 0):
4272 		preempt_disable();
4273 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4274 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4275 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4276 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4277 		 * roughly every 42 seconds.
4278 		 */
4279 		if (hi_check != clock_hi) {
4280 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4281 			clock_hi = hi_check;
4282 		}
4283 		preempt_enable();
4284 		clock = clock_lo | (clock_hi << 32ULL);
4285 		break;
4286 	default:
4287 		amdgpu_gfx_off_ctrl(adev, false);
4288 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4289 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4290 			    IP_VERSION(9, 0, 1) &&
4291 		    amdgpu_sriov_runtime(adev)) {
4292 			clock = gfx_v9_0_kiq_read_clock(adev);
4293 		} else {
4294 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4295 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4296 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4297 		}
4298 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4299 		amdgpu_gfx_off_ctrl(adev, true);
4300 		break;
4301 	}
4302 	return clock;
4303 }
4304 
gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)4305 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4306 					  uint32_t vmid,
4307 					  uint32_t gds_base, uint32_t gds_size,
4308 					  uint32_t gws_base, uint32_t gws_size,
4309 					  uint32_t oa_base, uint32_t oa_size)
4310 {
4311 	struct amdgpu_device *adev = ring->adev;
4312 
4313 	/* GDS Base */
4314 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4315 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4316 				   gds_base);
4317 
4318 	/* GDS Size */
4319 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4320 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4321 				   gds_size);
4322 
4323 	/* GWS */
4324 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4325 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4326 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4327 
4328 	/* OA */
4329 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4330 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4331 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4332 }
4333 
4334 static const u32 vgpr_init_compute_shader[] =
4335 {
4336 	0xb07c0000, 0xbe8000ff,
4337 	0x000000f8, 0xbf110800,
4338 	0x7e000280, 0x7e020280,
4339 	0x7e040280, 0x7e060280,
4340 	0x7e080280, 0x7e0a0280,
4341 	0x7e0c0280, 0x7e0e0280,
4342 	0x80808800, 0xbe803200,
4343 	0xbf84fff5, 0xbf9c0000,
4344 	0xd28c0001, 0x0001007f,
4345 	0xd28d0001, 0x0002027e,
4346 	0x10020288, 0xb8810904,
4347 	0xb7814000, 0xd1196a01,
4348 	0x00000301, 0xbe800087,
4349 	0xbefc00c1, 0xd89c4000,
4350 	0x00020201, 0xd89cc080,
4351 	0x00040401, 0x320202ff,
4352 	0x00000800, 0x80808100,
4353 	0xbf84fff8, 0x7e020280,
4354 	0xbf810000, 0x00000000,
4355 };
4356 
4357 static const u32 sgpr_init_compute_shader[] =
4358 {
4359 	0xb07c0000, 0xbe8000ff,
4360 	0x0000005f, 0xbee50080,
4361 	0xbe812c65, 0xbe822c65,
4362 	0xbe832c65, 0xbe842c65,
4363 	0xbe852c65, 0xb77c0005,
4364 	0x80808500, 0xbf84fff8,
4365 	0xbe800080, 0xbf810000,
4366 };
4367 
4368 static const u32 vgpr_init_compute_shader_arcturus[] = {
4369 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4370 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4371 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4372 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4373 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4374 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4375 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4376 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4377 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4378 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4379 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4380 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4381 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4382 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4383 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4384 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4385 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4386 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4387 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4388 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4389 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4390 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4391 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4392 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4393 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4394 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4395 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4396 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4397 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4398 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4399 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4400 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4401 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4402 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4403 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4404 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4405 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4406 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4407 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4408 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4409 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4410 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4411 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4412 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4413 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4414 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4415 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4416 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4417 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4418 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4419 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4420 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4421 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4422 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4423 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4424 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4425 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4426 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4427 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4428 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4429 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4430 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4431 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4432 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4433 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4434 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4435 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4436 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4437 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4438 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4439 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4440 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4441 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4442 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4443 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4444 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4445 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4446 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4447 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4448 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4449 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4450 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4451 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4452 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4453 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4454 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4455 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4456 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4457 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4458 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4459 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4460 	0xbf84fff8, 0xbf810000,
4461 };
4462 
4463 /* When below register arrays changed, please update gpr_reg_size,
4464   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4465   to cover all gfx9 ASICs */
4466 static const struct soc15_reg_entry vgpr_init_regs[] = {
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4481 };
4482 
4483 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4491    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4492    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4493    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4494    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4498 };
4499 
4500 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4506    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4507    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4508    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4509    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4510    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4511    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4512    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4513    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4514    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4515 };
4516 
4517 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4518    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4519    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4520    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4521    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4522    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4523    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4524    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4525    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4526    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4527    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4528    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4529    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4530    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4531    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4532 };
4533 
4534 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4535    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4536    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4537    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4538    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4539    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4540    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4541    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4542    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4543    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4544    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4545    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4546    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4547    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4548    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4549    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4550    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4551    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4552    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4553    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4554    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4555    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4556    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4557    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4558    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4559    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4560    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4561    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4562    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4563    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4564    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4565    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4566    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4567    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4568 };
4569 
gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device * adev)4570 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4571 {
4572 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4573 	int i, r;
4574 
4575 	/* only support when RAS is enabled */
4576 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4577 		return 0;
4578 
4579 	r = amdgpu_ring_alloc(ring, 7);
4580 	if (r) {
4581 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4582 			ring->name, r);
4583 		return r;
4584 	}
4585 
4586 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4587 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4588 
4589 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4590 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4591 				PACKET3_DMA_DATA_DST_SEL(1) |
4592 				PACKET3_DMA_DATA_SRC_SEL(2) |
4593 				PACKET3_DMA_DATA_ENGINE(0)));
4594 	amdgpu_ring_write(ring, 0);
4595 	amdgpu_ring_write(ring, 0);
4596 	amdgpu_ring_write(ring, 0);
4597 	amdgpu_ring_write(ring, 0);
4598 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4599 				adev->gds.gds_size);
4600 
4601 	amdgpu_ring_commit(ring);
4602 
4603 	for (i = 0; i < adev->usec_timeout; i++) {
4604 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4605 			break;
4606 		udelay(1);
4607 	}
4608 
4609 	if (i >= adev->usec_timeout)
4610 		r = -ETIMEDOUT;
4611 
4612 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4613 
4614 	return r;
4615 }
4616 
gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)4617 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4618 {
4619 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4620 	struct amdgpu_ib ib;
4621 	struct dma_fence *f = NULL;
4622 	int r, i;
4623 	unsigned total_size, vgpr_offset, sgpr_offset;
4624 	u64 gpu_addr;
4625 
4626 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4627 						adev->gfx.config.max_cu_per_sh *
4628 						adev->gfx.config.max_sh_per_se;
4629 	int sgpr_work_group_size = 5;
4630 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4631 	int vgpr_init_shader_size;
4632 	const u32 *vgpr_init_shader_ptr;
4633 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4634 
4635 	/* only support when RAS is enabled */
4636 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4637 		return 0;
4638 
4639 	/* bail if the compute ring is not ready */
4640 	if (!ring->sched.ready)
4641 		return 0;
4642 
4643 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4644 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4645 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4646 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4647 	} else {
4648 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4649 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4650 		vgpr_init_regs_ptr = vgpr_init_regs;
4651 	}
4652 
4653 	total_size =
4654 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4655 	total_size +=
4656 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4657 	total_size +=
4658 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4659 	total_size = ALIGN(total_size, 256);
4660 	vgpr_offset = total_size;
4661 	total_size += ALIGN(vgpr_init_shader_size, 256);
4662 	sgpr_offset = total_size;
4663 	total_size += sizeof(sgpr_init_compute_shader);
4664 
4665 	/* allocate an indirect buffer to put the commands in */
4666 	memset(&ib, 0, sizeof(ib));
4667 	r = amdgpu_ib_get(adev, NULL, total_size,
4668 					AMDGPU_IB_POOL_DIRECT, &ib);
4669 	if (r) {
4670 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4671 		return r;
4672 	}
4673 
4674 	/* load the compute shaders */
4675 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4676 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4677 
4678 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4679 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4680 
4681 	/* init the ib length to 0 */
4682 	ib.length_dw = 0;
4683 
4684 	/* VGPR */
4685 	/* write the register state for the compute dispatch */
4686 	for (i = 0; i < gpr_reg_size; i++) {
4687 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4688 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4689 								- PACKET3_SET_SH_REG_START;
4690 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4691 	}
4692 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4693 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4694 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4695 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4696 							- PACKET3_SET_SH_REG_START;
4697 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4698 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4699 
4700 	/* write dispatch packet */
4701 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4702 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4703 	ib.ptr[ib.length_dw++] = 1; /* y */
4704 	ib.ptr[ib.length_dw++] = 1; /* z */
4705 	ib.ptr[ib.length_dw++] =
4706 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4707 
4708 	/* write CS partial flush packet */
4709 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4710 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4711 
4712 	/* SGPR1 */
4713 	/* write the register state for the compute dispatch */
4714 	for (i = 0; i < gpr_reg_size; i++) {
4715 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4716 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4717 								- PACKET3_SET_SH_REG_START;
4718 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4719 	}
4720 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4721 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4722 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4723 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4724 							- PACKET3_SET_SH_REG_START;
4725 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4726 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4727 
4728 	/* write dispatch packet */
4729 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4730 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4731 	ib.ptr[ib.length_dw++] = 1; /* y */
4732 	ib.ptr[ib.length_dw++] = 1; /* z */
4733 	ib.ptr[ib.length_dw++] =
4734 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4735 
4736 	/* write CS partial flush packet */
4737 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4738 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4739 
4740 	/* SGPR2 */
4741 	/* write the register state for the compute dispatch */
4742 	for (i = 0; i < gpr_reg_size; i++) {
4743 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4744 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4745 								- PACKET3_SET_SH_REG_START;
4746 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4747 	}
4748 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4749 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4750 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4751 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4752 							- PACKET3_SET_SH_REG_START;
4753 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4754 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4755 
4756 	/* write dispatch packet */
4757 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4758 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4759 	ib.ptr[ib.length_dw++] = 1; /* y */
4760 	ib.ptr[ib.length_dw++] = 1; /* z */
4761 	ib.ptr[ib.length_dw++] =
4762 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4763 
4764 	/* write CS partial flush packet */
4765 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4766 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4767 
4768 	/* shedule the ib on the ring */
4769 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4770 	if (r) {
4771 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4772 		goto fail;
4773 	}
4774 
4775 	/* wait for the GPU to finish processing the IB */
4776 	r = dma_fence_wait(f, false);
4777 	if (r) {
4778 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4779 		goto fail;
4780 	}
4781 
4782 fail:
4783 	amdgpu_ib_free(adev, &ib, NULL);
4784 	dma_fence_put(f);
4785 
4786 	return r;
4787 }
4788 
gfx_v9_0_early_init(struct amdgpu_ip_block * ip_block)4789 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4790 {
4791 	struct amdgpu_device *adev = ip_block->adev;
4792 
4793 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4794 
4795 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4796 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4797 		adev->gfx.num_gfx_rings = 0;
4798 	else
4799 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4800 	adev->gfx.xcc_mask = 1;
4801 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4802 					  AMDGPU_MAX_COMPUTE_RINGS);
4803 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4804 	gfx_v9_0_set_ring_funcs(adev);
4805 	gfx_v9_0_set_irq_funcs(adev);
4806 	gfx_v9_0_set_gds_init(adev);
4807 	gfx_v9_0_set_rlc_funcs(adev);
4808 
4809 	/* init rlcg reg access ctrl */
4810 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4811 
4812 	return gfx_v9_0_init_microcode(adev);
4813 }
4814 
gfx_v9_0_ecc_late_init(struct amdgpu_ip_block * ip_block)4815 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4816 {
4817 	struct amdgpu_device *adev = ip_block->adev;
4818 	int r;
4819 
4820 	/*
4821 	 * Temp workaround to fix the issue that CP firmware fails to
4822 	 * update read pointer when CPDMA is writing clearing operation
4823 	 * to GDS in suspend/resume sequence on several cards. So just
4824 	 * limit this operation in cold boot sequence.
4825 	 */
4826 	if ((!adev->in_suspend) &&
4827 	    (adev->gds.gds_size)) {
4828 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4829 		if (r)
4830 			return r;
4831 	}
4832 
4833 	/* requires IBs so do in late init after IB pool is initialized */
4834 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4835 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4836 	else
4837 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4838 
4839 	if (r)
4840 		return r;
4841 
4842 	if (adev->gfx.ras &&
4843 	    adev->gfx.ras->enable_watchdog_timer)
4844 		adev->gfx.ras->enable_watchdog_timer(adev);
4845 
4846 	return 0;
4847 }
4848 
gfx_v9_0_late_init(struct amdgpu_ip_block * ip_block)4849 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4850 {
4851 	struct amdgpu_device *adev = ip_block->adev;
4852 	int r;
4853 
4854 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4855 	if (r)
4856 		return r;
4857 
4858 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4859 	if (r)
4860 		return r;
4861 
4862 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4863 	if (r)
4864 		return r;
4865 
4866 	r = gfx_v9_0_ecc_late_init(ip_block);
4867 	if (r)
4868 		return r;
4869 
4870 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4871 		gfx_v9_4_2_debug_trap_config_init(adev,
4872 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4873 	else
4874 		gfx_v9_0_debug_trap_config_init(adev,
4875 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4876 
4877 	return 0;
4878 }
4879 
gfx_v9_0_is_rlc_enabled(struct amdgpu_device * adev)4880 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4881 {
4882 	uint32_t rlc_setting;
4883 
4884 	/* if RLC is not enabled, do nothing */
4885 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4886 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4887 		return false;
4888 
4889 	return true;
4890 }
4891 
gfx_v9_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)4892 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4893 {
4894 	uint32_t data;
4895 	unsigned i;
4896 
4897 	data = RLC_SAFE_MODE__CMD_MASK;
4898 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4899 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4900 
4901 	/* wait for RLC_SAFE_MODE */
4902 	for (i = 0; i < adev->usec_timeout; i++) {
4903 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4904 			break;
4905 		udelay(1);
4906 	}
4907 }
4908 
gfx_v9_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)4909 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4910 {
4911 	uint32_t data;
4912 
4913 	data = RLC_SAFE_MODE__CMD_MASK;
4914 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4915 }
4916 
gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)4917 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4918 						bool enable)
4919 {
4920 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4921 
4922 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4923 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4924 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4925 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4926 	} else {
4927 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4928 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4929 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4930 	}
4931 
4932 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4933 }
4934 
gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device * adev,bool enable)4935 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4936 						bool enable)
4937 {
4938 	/* TODO: double check if we need to perform under safe mode */
4939 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4940 
4941 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4942 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4943 	else
4944 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4945 
4946 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4947 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4948 	else
4949 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4950 
4951 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4952 }
4953 
gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)4954 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4955 						      bool enable)
4956 {
4957 	uint32_t data, def;
4958 
4959 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4960 
4961 	/* It is disabled by HW by default */
4962 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4963 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4964 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4965 
4966 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4967 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4968 
4969 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4970 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4971 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4972 
4973 		/* only for Vega10 & Raven1 */
4974 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4975 
4976 		if (def != data)
4977 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4978 
4979 		/* MGLS is a global flag to control all MGLS in GFX */
4980 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4981 			/* 2 - RLC memory Light sleep */
4982 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4983 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4984 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4985 				if (def != data)
4986 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4987 			}
4988 			/* 3 - CP memory Light sleep */
4989 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4990 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4991 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4992 				if (def != data)
4993 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4994 			}
4995 		}
4996 	} else {
4997 		/* 1 - MGCG_OVERRIDE */
4998 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4999 
5000 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
5001 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5002 
5003 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5004 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5005 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5006 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5007 
5008 		if (def != data)
5009 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5010 
5011 		/* 2 - disable MGLS in RLC */
5012 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5013 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5014 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5015 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5016 		}
5017 
5018 		/* 3 - disable MGLS in CP */
5019 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5020 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5021 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5022 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5023 		}
5024 	}
5025 
5026 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5027 }
5028 
gfx_v9_0_update_3d_clock_gating(struct amdgpu_device * adev,bool enable)5029 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5030 					   bool enable)
5031 {
5032 	uint32_t data, def;
5033 
5034 	if (!adev->gfx.num_gfx_rings)
5035 		return;
5036 
5037 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5038 
5039 	/* Enable 3D CGCG/CGLS */
5040 	if (enable) {
5041 		/* write cmd to clear cgcg/cgls ov */
5042 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5043 		/* unset CGCG override */
5044 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5045 		/* update CGCG and CGLS override bits */
5046 		if (def != data)
5047 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5048 
5049 		/* enable 3Dcgcg FSM(0x0000363f) */
5050 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5051 
5052 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5053 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5054 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5055 		else
5056 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5057 
5058 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5059 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5060 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5061 		if (def != data)
5062 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5063 
5064 		/* set IDLE_POLL_COUNT(0x00900100) */
5065 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5066 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5067 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5068 		if (def != data)
5069 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5070 	} else {
5071 		/* Disable CGCG/CGLS */
5072 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5073 		/* disable cgcg, cgls should be disabled */
5074 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5075 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5076 		/* disable cgcg and cgls in FSM */
5077 		if (def != data)
5078 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5079 	}
5080 
5081 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5082 }
5083 
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5084 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5085 						      bool enable)
5086 {
5087 	uint32_t def, data;
5088 
5089 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5090 
5091 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5092 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5093 		/* unset CGCG override */
5094 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5095 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5096 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5097 		else
5098 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5099 		/* update CGCG and CGLS override bits */
5100 		if (def != data)
5101 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5102 
5103 		/* enable cgcg FSM(0x0000363F) */
5104 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5105 
5106 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5107 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5108 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5109 		else
5110 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5111 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5112 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5113 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5114 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5115 		if (def != data)
5116 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5117 
5118 		/* set IDLE_POLL_COUNT(0x00900100) */
5119 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5120 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5121 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5122 		if (def != data)
5123 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5124 	} else {
5125 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5126 		/* reset CGCG/CGLS bits */
5127 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5128 		/* disable cgcg and cgls in FSM */
5129 		if (def != data)
5130 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5131 	}
5132 
5133 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5134 }
5135 
gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5136 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5137 					    bool enable)
5138 {
5139 	if (enable) {
5140 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5141 		 * ===  MGCG + MGLS ===
5142 		 */
5143 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5144 		/* ===  CGCG /CGLS for GFX 3D Only === */
5145 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5146 		/* ===  CGCG + CGLS === */
5147 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5148 	} else {
5149 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5150 		 * ===  CGCG + CGLS ===
5151 		 */
5152 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5153 		/* ===  CGCG /CGLS for GFX 3D Only === */
5154 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5155 		/* ===  MGCG + MGLS === */
5156 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5157 	}
5158 	return 0;
5159 }
5160 
gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device * adev,unsigned int vmid)5161 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5162 					      unsigned int vmid)
5163 {
5164 	u32 reg, data;
5165 
5166 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5167 	if (amdgpu_sriov_is_pp_one_vf(adev))
5168 		data = RREG32_NO_KIQ(reg);
5169 	else
5170 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5171 
5172 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5173 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5174 
5175 	if (amdgpu_sriov_is_pp_one_vf(adev))
5176 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5177 	else
5178 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5179 }
5180 
gfx_v9_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned int vmid)5181 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5182 {
5183 	amdgpu_gfx_off_ctrl(adev, false);
5184 
5185 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5186 
5187 	amdgpu_gfx_off_ctrl(adev, true);
5188 }
5189 
gfx_v9_0_check_rlcg_range(struct amdgpu_device * adev,uint32_t offset,struct soc15_reg_rlcg * entries,int arr_size)5190 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5191 					uint32_t offset,
5192 					struct soc15_reg_rlcg *entries, int arr_size)
5193 {
5194 	int i;
5195 	uint32_t reg;
5196 
5197 	if (!entries)
5198 		return false;
5199 
5200 	for (i = 0; i < arr_size; i++) {
5201 		const struct soc15_reg_rlcg *entry;
5202 
5203 		entry = &entries[i];
5204 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5205 		if (offset == reg)
5206 			return true;
5207 	}
5208 
5209 	return false;
5210 }
5211 
gfx_v9_0_is_rlcg_access_range(struct amdgpu_device * adev,u32 offset)5212 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5213 {
5214 	return gfx_v9_0_check_rlcg_range(adev, offset,
5215 					(void *)rlcg_access_gc_9_0,
5216 					ARRAY_SIZE(rlcg_access_gc_9_0));
5217 }
5218 
5219 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5220 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5221 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5222 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5223 	.init = gfx_v9_0_rlc_init,
5224 	.get_csb_size = gfx_v9_0_get_csb_size,
5225 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5226 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5227 	.resume = gfx_v9_0_rlc_resume,
5228 	.stop = gfx_v9_0_rlc_stop,
5229 	.reset = gfx_v9_0_rlc_reset,
5230 	.start = gfx_v9_0_rlc_start,
5231 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5232 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5233 };
5234 
gfx_v9_0_set_powergating_state(void * handle,enum amd_powergating_state state)5235 static int gfx_v9_0_set_powergating_state(void *handle,
5236 					  enum amd_powergating_state state)
5237 {
5238 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239 	bool enable = (state == AMD_PG_STATE_GATE);
5240 
5241 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5242 	case IP_VERSION(9, 2, 2):
5243 	case IP_VERSION(9, 1, 0):
5244 	case IP_VERSION(9, 3, 0):
5245 		if (!enable)
5246 			amdgpu_gfx_off_ctrl(adev, false);
5247 
5248 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5249 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5250 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5251 		} else {
5252 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5253 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5254 		}
5255 
5256 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5257 			gfx_v9_0_enable_cp_power_gating(adev, true);
5258 		else
5259 			gfx_v9_0_enable_cp_power_gating(adev, false);
5260 
5261 		/* update gfx cgpg state */
5262 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5263 
5264 		/* update mgcg state */
5265 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5266 
5267 		if (enable)
5268 			amdgpu_gfx_off_ctrl(adev, true);
5269 		break;
5270 	case IP_VERSION(9, 2, 1):
5271 		amdgpu_gfx_off_ctrl(adev, enable);
5272 		break;
5273 	default:
5274 		break;
5275 	}
5276 
5277 	return 0;
5278 }
5279 
gfx_v9_0_set_clockgating_state(void * handle,enum amd_clockgating_state state)5280 static int gfx_v9_0_set_clockgating_state(void *handle,
5281 					  enum amd_clockgating_state state)
5282 {
5283 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5284 
5285 	if (amdgpu_sriov_vf(adev))
5286 		return 0;
5287 
5288 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5289 	case IP_VERSION(9, 0, 1):
5290 	case IP_VERSION(9, 2, 1):
5291 	case IP_VERSION(9, 4, 0):
5292 	case IP_VERSION(9, 2, 2):
5293 	case IP_VERSION(9, 1, 0):
5294 	case IP_VERSION(9, 4, 1):
5295 	case IP_VERSION(9, 3, 0):
5296 	case IP_VERSION(9, 4, 2):
5297 		gfx_v9_0_update_gfx_clock_gating(adev,
5298 						 state == AMD_CG_STATE_GATE);
5299 		break;
5300 	default:
5301 		break;
5302 	}
5303 	return 0;
5304 }
5305 
gfx_v9_0_get_clockgating_state(void * handle,u64 * flags)5306 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5307 {
5308 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5309 	int data;
5310 
5311 	if (amdgpu_sriov_vf(adev))
5312 		*flags = 0;
5313 
5314 	/* AMD_CG_SUPPORT_GFX_MGCG */
5315 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5316 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5317 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5318 
5319 	/* AMD_CG_SUPPORT_GFX_CGCG */
5320 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5321 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5322 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5323 
5324 	/* AMD_CG_SUPPORT_GFX_CGLS */
5325 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5326 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5327 
5328 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5329 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5330 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5331 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5332 
5333 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5334 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5335 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5336 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5337 
5338 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5339 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5340 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5341 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5342 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5343 
5344 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5345 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5346 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5347 	}
5348 }
5349 
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5350 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5351 {
5352 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5353 }
5354 
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5355 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5356 {
5357 	struct amdgpu_device *adev = ring->adev;
5358 	u64 wptr;
5359 
5360 	/* XXX check if swapping is necessary on BE */
5361 	if (ring->use_doorbell) {
5362 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5363 	} else {
5364 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5365 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5366 	}
5367 
5368 	return wptr;
5369 }
5370 
gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5371 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5372 {
5373 	struct amdgpu_device *adev = ring->adev;
5374 
5375 	if (ring->use_doorbell) {
5376 		/* XXX check if swapping is necessary on BE */
5377 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5378 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5379 	} else {
5380 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5381 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5382 	}
5383 }
5384 
gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5385 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5386 {
5387 	struct amdgpu_device *adev = ring->adev;
5388 	u32 ref_and_mask, reg_mem_engine;
5389 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5390 
5391 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5392 		switch (ring->me) {
5393 		case 1:
5394 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5395 			break;
5396 		case 2:
5397 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5398 			break;
5399 		default:
5400 			return;
5401 		}
5402 		reg_mem_engine = 0;
5403 	} else {
5404 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5405 		reg_mem_engine = 1; /* pfp */
5406 	}
5407 
5408 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5409 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5410 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5411 			      ref_and_mask, ref_and_mask, 0x20);
5412 }
5413 
gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5414 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5415 					struct amdgpu_job *job,
5416 					struct amdgpu_ib *ib,
5417 					uint32_t flags)
5418 {
5419 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5420 	u32 header, control = 0;
5421 
5422 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5423 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5424 	else
5425 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5426 
5427 	control |= ib->length_dw | (vmid << 24);
5428 
5429 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5430 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5431 
5432 		if (flags & AMDGPU_IB_PREEMPTED)
5433 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5434 
5435 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5436 			gfx_v9_0_ring_emit_de_meta(ring,
5437 						   (!amdgpu_sriov_vf(ring->adev) &&
5438 						   flags & AMDGPU_IB_PREEMPTED) ?
5439 						   true : false,
5440 						   job->gds_size > 0 && job->gds_base != 0);
5441 	}
5442 
5443 	amdgpu_ring_write(ring, header);
5444 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5445 	amdgpu_ring_write(ring,
5446 #ifdef __BIG_ENDIAN
5447 		(2 << 0) |
5448 #endif
5449 		lower_32_bits(ib->gpu_addr));
5450 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5451 	amdgpu_ring_ib_on_emit_cntl(ring);
5452 	amdgpu_ring_write(ring, control);
5453 }
5454 
gfx_v9_0_ring_patch_cntl(struct amdgpu_ring * ring,unsigned offset)5455 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5456 				     unsigned offset)
5457 {
5458 	u32 control = ring->ring[offset];
5459 
5460 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5461 	ring->ring[offset] = control;
5462 }
5463 
gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring * ring,unsigned offset)5464 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5465 					unsigned offset)
5466 {
5467 	struct amdgpu_device *adev = ring->adev;
5468 	void *ce_payload_cpu_addr;
5469 	uint64_t payload_offset, payload_size;
5470 
5471 	payload_size = sizeof(struct v9_ce_ib_state);
5472 
5473 	if (ring->is_mes_queue) {
5474 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5475 					  gfx[0].gfx_meta_data) +
5476 			offsetof(struct v9_gfx_meta_data, ce_payload);
5477 		ce_payload_cpu_addr =
5478 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5479 	} else {
5480 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5481 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5482 	}
5483 
5484 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5485 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5486 	} else {
5487 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5488 		       (ring->buf_mask + 1 - offset) << 2);
5489 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5490 		memcpy((void *)&ring->ring[0],
5491 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5492 		       payload_size);
5493 	}
5494 }
5495 
gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring * ring,unsigned offset)5496 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5497 					unsigned offset)
5498 {
5499 	struct amdgpu_device *adev = ring->adev;
5500 	void *de_payload_cpu_addr;
5501 	uint64_t payload_offset, payload_size;
5502 
5503 	payload_size = sizeof(struct v9_de_ib_state);
5504 
5505 	if (ring->is_mes_queue) {
5506 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5507 					  gfx[0].gfx_meta_data) +
5508 			offsetof(struct v9_gfx_meta_data, de_payload);
5509 		de_payload_cpu_addr =
5510 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5511 	} else {
5512 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5513 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5514 	}
5515 
5516 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5517 		IB_COMPLETION_STATUS_PREEMPTED;
5518 
5519 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5520 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5521 	} else {
5522 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5523 		       (ring->buf_mask + 1 - offset) << 2);
5524 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5525 		memcpy((void *)&ring->ring[0],
5526 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5527 		       payload_size);
5528 	}
5529 }
5530 
gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5531 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5532 					  struct amdgpu_job *job,
5533 					  struct amdgpu_ib *ib,
5534 					  uint32_t flags)
5535 {
5536 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5537 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5538 
5539 	/* Currently, there is a high possibility to get wave ID mismatch
5540 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5541 	 * different wave IDs than the GDS expects. This situation happens
5542 	 * randomly when at least 5 compute pipes use GDS ordered append.
5543 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5544 	 * Those are probably bugs somewhere else in the kernel driver.
5545 	 *
5546 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5547 	 * GDS to 0 for this ring (me/pipe).
5548 	 */
5549 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5550 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5551 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5552 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5553 	}
5554 
5555 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5556 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5557 	amdgpu_ring_write(ring,
5558 #ifdef __BIG_ENDIAN
5559 				(2 << 0) |
5560 #endif
5561 				lower_32_bits(ib->gpu_addr));
5562 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5563 	amdgpu_ring_write(ring, control);
5564 }
5565 
gfx_v9_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5566 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5567 				     u64 seq, unsigned flags)
5568 {
5569 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5570 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5571 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5572 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5573 	uint32_t dw2 = 0;
5574 
5575 	/* RELEASE_MEM - flush caches, send int */
5576 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5577 
5578 	if (writeback) {
5579 		dw2 = EOP_TC_NC_ACTION_EN;
5580 	} else {
5581 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5582 				EOP_TC_MD_ACTION_EN;
5583 	}
5584 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5585 				EVENT_INDEX(5);
5586 	if (exec)
5587 		dw2 |= EOP_EXEC;
5588 
5589 	amdgpu_ring_write(ring, dw2);
5590 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5591 
5592 	/*
5593 	 * the address should be Qword aligned if 64bit write, Dword
5594 	 * aligned if only send 32bit data low (discard data high)
5595 	 */
5596 	if (write64bit)
5597 		BUG_ON(addr & 0x7);
5598 	else
5599 		BUG_ON(addr & 0x3);
5600 	amdgpu_ring_write(ring, lower_32_bits(addr));
5601 	amdgpu_ring_write(ring, upper_32_bits(addr));
5602 	amdgpu_ring_write(ring, lower_32_bits(seq));
5603 	amdgpu_ring_write(ring, upper_32_bits(seq));
5604 	amdgpu_ring_write(ring, 0);
5605 }
5606 
gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5607 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5608 {
5609 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5610 	uint32_t seq = ring->fence_drv.sync_seq;
5611 	uint64_t addr = ring->fence_drv.gpu_addr;
5612 
5613 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5614 			      lower_32_bits(addr), upper_32_bits(addr),
5615 			      seq, 0xffffffff, 4);
5616 }
5617 
gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5618 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5619 					unsigned vmid, uint64_t pd_addr)
5620 {
5621 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5622 
5623 	/* compute doesn't have PFP */
5624 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5625 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5626 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5627 		amdgpu_ring_write(ring, 0x0);
5628 	}
5629 }
5630 
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5631 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5632 {
5633 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5634 }
5635 
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5636 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5637 {
5638 	u64 wptr;
5639 
5640 	/* XXX check if swapping is necessary on BE */
5641 	if (ring->use_doorbell)
5642 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5643 	else
5644 		BUG();
5645 	return wptr;
5646 }
5647 
gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5648 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5649 {
5650 	struct amdgpu_device *adev = ring->adev;
5651 
5652 	/* XXX check if swapping is necessary on BE */
5653 	if (ring->use_doorbell) {
5654 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5655 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5656 	} else{
5657 		BUG(); /* only DOORBELL method supported on gfx9 now */
5658 	}
5659 }
5660 
gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5661 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5662 					 u64 seq, unsigned int flags)
5663 {
5664 	struct amdgpu_device *adev = ring->adev;
5665 
5666 	/* we only allocate 32bit for each seq wb address */
5667 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5668 
5669 	/* write fence seq to the "addr" */
5670 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5671 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5672 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5673 	amdgpu_ring_write(ring, lower_32_bits(addr));
5674 	amdgpu_ring_write(ring, upper_32_bits(addr));
5675 	amdgpu_ring_write(ring, lower_32_bits(seq));
5676 
5677 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5678 		/* set register to trigger INT */
5679 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5680 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5681 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5682 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5683 		amdgpu_ring_write(ring, 0);
5684 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5685 	}
5686 }
5687 
gfx_v9_ring_emit_sb(struct amdgpu_ring * ring)5688 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5689 {
5690 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5691 	amdgpu_ring_write(ring, 0);
5692 }
5693 
gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring * ring,bool resume)5694 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5695 {
5696 	struct amdgpu_device *adev = ring->adev;
5697 	struct v9_ce_ib_state ce_payload = {0};
5698 	uint64_t offset, ce_payload_gpu_addr;
5699 	void *ce_payload_cpu_addr;
5700 	int cnt;
5701 
5702 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5703 
5704 	if (ring->is_mes_queue) {
5705 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5706 				  gfx[0].gfx_meta_data) +
5707 			offsetof(struct v9_gfx_meta_data, ce_payload);
5708 		ce_payload_gpu_addr =
5709 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5710 		ce_payload_cpu_addr =
5711 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5712 	} else {
5713 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5714 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5715 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5716 	}
5717 
5718 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5719 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5720 				 WRITE_DATA_DST_SEL(8) |
5721 				 WR_CONFIRM) |
5722 				 WRITE_DATA_CACHE_POLICY(0));
5723 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5724 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5725 
5726 	amdgpu_ring_ib_on_emit_ce(ring);
5727 
5728 	if (resume)
5729 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5730 					   sizeof(ce_payload) >> 2);
5731 	else
5732 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5733 					   sizeof(ce_payload) >> 2);
5734 }
5735 
gfx_v9_0_ring_preempt_ib(struct amdgpu_ring * ring)5736 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5737 {
5738 	int i, r = 0;
5739 	struct amdgpu_device *adev = ring->adev;
5740 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5741 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5742 	unsigned long flags;
5743 
5744 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5745 		return -EINVAL;
5746 
5747 	spin_lock_irqsave(&kiq->ring_lock, flags);
5748 
5749 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5750 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5751 		return -ENOMEM;
5752 	}
5753 
5754 	/* assert preemption condition */
5755 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5756 
5757 	ring->trail_seq += 1;
5758 	amdgpu_ring_alloc(ring, 13);
5759 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5760 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5761 
5762 	/* assert IB preemption, emit the trailing fence */
5763 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5764 				   ring->trail_fence_gpu_addr,
5765 				   ring->trail_seq);
5766 
5767 	amdgpu_ring_commit(kiq_ring);
5768 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5769 
5770 	/* poll the trailing fence */
5771 	for (i = 0; i < adev->usec_timeout; i++) {
5772 		if (ring->trail_seq ==
5773 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5774 			break;
5775 		udelay(1);
5776 	}
5777 
5778 	if (i >= adev->usec_timeout) {
5779 		r = -EINVAL;
5780 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5781 	}
5782 
5783 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5784 	amdgpu_ring_emit_wreg(ring,
5785 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5786 			      0x0);
5787 	amdgpu_ring_commit(ring);
5788 
5789 	/* deassert preemption condition */
5790 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5791 	return r;
5792 }
5793 
gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume,bool usegds)5794 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5795 {
5796 	struct amdgpu_device *adev = ring->adev;
5797 	struct v9_de_ib_state de_payload = {0};
5798 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5799 	void *de_payload_cpu_addr;
5800 	int cnt;
5801 
5802 	if (ring->is_mes_queue) {
5803 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5804 				  gfx[0].gfx_meta_data) +
5805 			offsetof(struct v9_gfx_meta_data, de_payload);
5806 		de_payload_gpu_addr =
5807 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5808 		de_payload_cpu_addr =
5809 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5810 
5811 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5812 				  gfx[0].gds_backup) +
5813 			offsetof(struct v9_gfx_meta_data, de_payload);
5814 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5815 	} else {
5816 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5817 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5818 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5819 
5820 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5821 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5822 				 PAGE_SIZE);
5823 	}
5824 
5825 	if (usegds) {
5826 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5827 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5828 	}
5829 
5830 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5831 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5832 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5833 				 WRITE_DATA_DST_SEL(8) |
5834 				 WR_CONFIRM) |
5835 				 WRITE_DATA_CACHE_POLICY(0));
5836 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5837 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5838 
5839 	amdgpu_ring_ib_on_emit_de(ring);
5840 	if (resume)
5841 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5842 					   sizeof(de_payload) >> 2);
5843 	else
5844 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5845 					   sizeof(de_payload) >> 2);
5846 }
5847 
gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)5848 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5849 				   bool secure)
5850 {
5851 	uint32_t v = secure ? FRAME_TMZ : 0;
5852 
5853 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5854 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5855 }
5856 
gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5857 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5858 {
5859 	uint32_t dw2 = 0;
5860 
5861 	gfx_v9_0_ring_emit_ce_meta(ring,
5862 				   (!amdgpu_sriov_vf(ring->adev) &&
5863 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5864 
5865 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5866 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5867 		/* set load_global_config & load_global_uconfig */
5868 		dw2 |= 0x8001;
5869 		/* set load_cs_sh_regs */
5870 		dw2 |= 0x01000000;
5871 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5872 		dw2 |= 0x10002;
5873 
5874 		/* set load_ce_ram if preamble presented */
5875 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5876 			dw2 |= 0x10000000;
5877 	} else {
5878 		/* still load_ce_ram if this is the first time preamble presented
5879 		 * although there is no context switch happens.
5880 		 */
5881 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5882 			dw2 |= 0x10000000;
5883 	}
5884 
5885 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5886 	amdgpu_ring_write(ring, dw2);
5887 	amdgpu_ring_write(ring, 0);
5888 }
5889 
gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)5890 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5891 						  uint64_t addr)
5892 {
5893 	unsigned ret;
5894 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5895 	amdgpu_ring_write(ring, lower_32_bits(addr));
5896 	amdgpu_ring_write(ring, upper_32_bits(addr));
5897 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5898 	amdgpu_ring_write(ring, 0);
5899 	ret = ring->wptr & ring->buf_mask;
5900 	/* patch dummy value later */
5901 	amdgpu_ring_write(ring, 0);
5902 	return ret;
5903 }
5904 
gfx_v9_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)5905 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5906 				    uint32_t reg_val_offs)
5907 {
5908 	struct amdgpu_device *adev = ring->adev;
5909 
5910 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5911 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5912 				(5 << 8) |	/* dst: memory */
5913 				(1 << 20));	/* write confirm */
5914 	amdgpu_ring_write(ring, reg);
5915 	amdgpu_ring_write(ring, 0);
5916 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5917 				reg_val_offs * 4));
5918 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5919 				reg_val_offs * 4));
5920 }
5921 
gfx_v9_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)5922 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5923 				    uint32_t val)
5924 {
5925 	uint32_t cmd = 0;
5926 
5927 	switch (ring->funcs->type) {
5928 	case AMDGPU_RING_TYPE_GFX:
5929 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5930 		break;
5931 	case AMDGPU_RING_TYPE_KIQ:
5932 		cmd = (1 << 16); /* no inc addr */
5933 		break;
5934 	default:
5935 		cmd = WR_CONFIRM;
5936 		break;
5937 	}
5938 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5939 	amdgpu_ring_write(ring, cmd);
5940 	amdgpu_ring_write(ring, reg);
5941 	amdgpu_ring_write(ring, 0);
5942 	amdgpu_ring_write(ring, val);
5943 }
5944 
gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)5945 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5946 					uint32_t val, uint32_t mask)
5947 {
5948 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5949 }
5950 
gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)5951 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5952 						  uint32_t reg0, uint32_t reg1,
5953 						  uint32_t ref, uint32_t mask)
5954 {
5955 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5956 	struct amdgpu_device *adev = ring->adev;
5957 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5958 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5959 
5960 	if (fw_version_ok)
5961 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5962 				      ref, mask, 0x20);
5963 	else
5964 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5965 							   ref, mask);
5966 }
5967 
gfx_v9_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)5968 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5969 {
5970 	struct amdgpu_device *adev = ring->adev;
5971 	uint32_t value = 0;
5972 
5973 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5974 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5975 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5976 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5977 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5978 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5979 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5980 }
5981 
gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)5982 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5983 						 enum amdgpu_interrupt_state state)
5984 {
5985 	switch (state) {
5986 	case AMDGPU_IRQ_STATE_DISABLE:
5987 	case AMDGPU_IRQ_STATE_ENABLE:
5988 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5989 			       TIME_STAMP_INT_ENABLE,
5990 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5991 		break;
5992 	default:
5993 		break;
5994 	}
5995 }
5996 
gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)5997 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5998 						     int me, int pipe,
5999 						     enum amdgpu_interrupt_state state)
6000 {
6001 	u32 mec_int_cntl, mec_int_cntl_reg;
6002 
6003 	/*
6004 	 * amdgpu controls only the first MEC. That's why this function only
6005 	 * handles the setting of interrupts for this specific MEC. All other
6006 	 * pipes' interrupts are set by amdkfd.
6007 	 */
6008 
6009 	if (me == 1) {
6010 		switch (pipe) {
6011 		case 0:
6012 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6013 			break;
6014 		case 1:
6015 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6016 			break;
6017 		case 2:
6018 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6019 			break;
6020 		case 3:
6021 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6022 			break;
6023 		default:
6024 			DRM_DEBUG("invalid pipe %d\n", pipe);
6025 			return;
6026 		}
6027 	} else {
6028 		DRM_DEBUG("invalid me %d\n", me);
6029 		return;
6030 	}
6031 
6032 	switch (state) {
6033 	case AMDGPU_IRQ_STATE_DISABLE:
6034 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6035 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6036 					     TIME_STAMP_INT_ENABLE, 0);
6037 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6038 		break;
6039 	case AMDGPU_IRQ_STATE_ENABLE:
6040 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6041 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6042 					     TIME_STAMP_INT_ENABLE, 1);
6043 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6044 		break;
6045 	default:
6046 		break;
6047 	}
6048 }
6049 
gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device * adev,int me,int pipe)6050 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6051 				     int me, int pipe)
6052 {
6053 	/*
6054 	 * amdgpu controls only the first MEC. That's why this function only
6055 	 * handles the setting of interrupts for this specific MEC. All other
6056 	 * pipes' interrupts are set by amdkfd.
6057 	 */
6058 	if (me != 1)
6059 		return 0;
6060 
6061 	switch (pipe) {
6062 	case 0:
6063 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6064 	case 1:
6065 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6066 	case 2:
6067 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6068 	case 3:
6069 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6070 	default:
6071 		return 0;
6072 	}
6073 }
6074 
gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6075 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6076 					     struct amdgpu_irq_src *source,
6077 					     unsigned type,
6078 					     enum amdgpu_interrupt_state state)
6079 {
6080 	u32 cp_int_cntl_reg, cp_int_cntl;
6081 	int i, j;
6082 
6083 	switch (state) {
6084 	case AMDGPU_IRQ_STATE_DISABLE:
6085 	case AMDGPU_IRQ_STATE_ENABLE:
6086 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6087 			       PRIV_REG_INT_ENABLE,
6088 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6089 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6090 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6091 				/* MECs start at 1 */
6092 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6093 
6094 				if (cp_int_cntl_reg) {
6095 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6096 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6097 								    PRIV_REG_INT_ENABLE,
6098 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6099 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6100 				}
6101 			}
6102 		}
6103 		break;
6104 	default:
6105 		break;
6106 	}
6107 
6108 	return 0;
6109 }
6110 
gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6111 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6112 					   struct amdgpu_irq_src *source,
6113 					   unsigned type,
6114 					   enum amdgpu_interrupt_state state)
6115 {
6116 	u32 cp_int_cntl_reg, cp_int_cntl;
6117 	int i, j;
6118 
6119 	switch (state) {
6120 	case AMDGPU_IRQ_STATE_DISABLE:
6121 	case AMDGPU_IRQ_STATE_ENABLE:
6122 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6123 			       OPCODE_ERROR_INT_ENABLE,
6124 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6125 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6126 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6127 				/* MECs start at 1 */
6128 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6129 
6130 				if (cp_int_cntl_reg) {
6131 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6132 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6133 								    OPCODE_ERROR_INT_ENABLE,
6134 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6135 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6136 				}
6137 			}
6138 		}
6139 		break;
6140 	default:
6141 		break;
6142 	}
6143 
6144 	return 0;
6145 }
6146 
gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6147 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6148 					      struct amdgpu_irq_src *source,
6149 					      unsigned type,
6150 					      enum amdgpu_interrupt_state state)
6151 {
6152 	switch (state) {
6153 	case AMDGPU_IRQ_STATE_DISABLE:
6154 	case AMDGPU_IRQ_STATE_ENABLE:
6155 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6156 			       PRIV_INSTR_INT_ENABLE,
6157 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6158 		break;
6159 	default:
6160 		break;
6161 	}
6162 
6163 	return 0;
6164 }
6165 
6166 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
6167 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6168 			CP_ECC_ERROR_INT_ENABLE, 1)
6169 
6170 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
6171 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6172 			CP_ECC_ERROR_INT_ENABLE, 0)
6173 
gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6174 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6175 					      struct amdgpu_irq_src *source,
6176 					      unsigned type,
6177 					      enum amdgpu_interrupt_state state)
6178 {
6179 	switch (state) {
6180 	case AMDGPU_IRQ_STATE_DISABLE:
6181 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6182 				CP_ECC_ERROR_INT_ENABLE, 0);
6183 		DISABLE_ECC_ON_ME_PIPE(1, 0);
6184 		DISABLE_ECC_ON_ME_PIPE(1, 1);
6185 		DISABLE_ECC_ON_ME_PIPE(1, 2);
6186 		DISABLE_ECC_ON_ME_PIPE(1, 3);
6187 		break;
6188 
6189 	case AMDGPU_IRQ_STATE_ENABLE:
6190 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6191 				CP_ECC_ERROR_INT_ENABLE, 1);
6192 		ENABLE_ECC_ON_ME_PIPE(1, 0);
6193 		ENABLE_ECC_ON_ME_PIPE(1, 1);
6194 		ENABLE_ECC_ON_ME_PIPE(1, 2);
6195 		ENABLE_ECC_ON_ME_PIPE(1, 3);
6196 		break;
6197 	default:
6198 		break;
6199 	}
6200 
6201 	return 0;
6202 }
6203 
6204 
gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6205 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6206 					    struct amdgpu_irq_src *src,
6207 					    unsigned type,
6208 					    enum amdgpu_interrupt_state state)
6209 {
6210 	switch (type) {
6211 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6212 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6213 		break;
6214 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6215 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6216 		break;
6217 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6218 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6219 		break;
6220 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6221 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6222 		break;
6223 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6224 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6225 		break;
6226 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6227 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6228 		break;
6229 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6230 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6231 		break;
6232 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6233 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6234 		break;
6235 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6236 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6237 		break;
6238 	default:
6239 		break;
6240 	}
6241 	return 0;
6242 }
6243 
gfx_v9_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6244 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6245 			    struct amdgpu_irq_src *source,
6246 			    struct amdgpu_iv_entry *entry)
6247 {
6248 	int i;
6249 	u8 me_id, pipe_id, queue_id;
6250 	struct amdgpu_ring *ring;
6251 
6252 	DRM_DEBUG("IH: CP EOP\n");
6253 	me_id = (entry->ring_id & 0x0c) >> 2;
6254 	pipe_id = (entry->ring_id & 0x03) >> 0;
6255 	queue_id = (entry->ring_id & 0x70) >> 4;
6256 
6257 	switch (me_id) {
6258 	case 0:
6259 		if (adev->gfx.num_gfx_rings) {
6260 			if (!adev->gfx.mcbp) {
6261 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6262 			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6263 				/* Fence signals are handled on the software rings*/
6264 				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6265 					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6266 			}
6267 		}
6268 		break;
6269 	case 1:
6270 	case 2:
6271 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6272 			ring = &adev->gfx.compute_ring[i];
6273 			/* Per-queue interrupt is supported for MEC starting from VI.
6274 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6275 			  */
6276 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6277 				amdgpu_fence_process(ring);
6278 		}
6279 		break;
6280 	}
6281 	return 0;
6282 }
6283 
gfx_v9_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6284 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6285 			   struct amdgpu_iv_entry *entry)
6286 {
6287 	u8 me_id, pipe_id, queue_id;
6288 	struct amdgpu_ring *ring;
6289 	int i;
6290 
6291 	me_id = (entry->ring_id & 0x0c) >> 2;
6292 	pipe_id = (entry->ring_id & 0x03) >> 0;
6293 	queue_id = (entry->ring_id & 0x70) >> 4;
6294 
6295 	switch (me_id) {
6296 	case 0:
6297 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6298 		break;
6299 	case 1:
6300 	case 2:
6301 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6302 			ring = &adev->gfx.compute_ring[i];
6303 			if (ring->me == me_id && ring->pipe == pipe_id &&
6304 			    ring->queue == queue_id)
6305 				drm_sched_fault(&ring->sched);
6306 		}
6307 		break;
6308 	}
6309 }
6310 
gfx_v9_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6311 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6312 				 struct amdgpu_irq_src *source,
6313 				 struct amdgpu_iv_entry *entry)
6314 {
6315 	DRM_ERROR("Illegal register access in command stream\n");
6316 	gfx_v9_0_fault(adev, entry);
6317 	return 0;
6318 }
6319 
gfx_v9_0_bad_op_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6320 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6321 			       struct amdgpu_irq_src *source,
6322 			       struct amdgpu_iv_entry *entry)
6323 {
6324 	DRM_ERROR("Illegal opcode in command stream\n");
6325 	gfx_v9_0_fault(adev, entry);
6326 	return 0;
6327 }
6328 
gfx_v9_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6329 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6330 				  struct amdgpu_irq_src *source,
6331 				  struct amdgpu_iv_entry *entry)
6332 {
6333 	DRM_ERROR("Illegal instruction in command stream\n");
6334 	gfx_v9_0_fault(adev, entry);
6335 	return 0;
6336 }
6337 
6338 
6339 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6340 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6341 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6342 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6343 	},
6344 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6345 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6346 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6347 	},
6348 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6349 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6350 	  0, 0
6351 	},
6352 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6353 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6354 	  0, 0
6355 	},
6356 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6357 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6358 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6359 	},
6360 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6361 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6362 	  0, 0
6363 	},
6364 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6365 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6366 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6367 	},
6368 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6369 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6370 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6371 	},
6372 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6373 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6374 	  0, 0
6375 	},
6376 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6377 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6378 	  0, 0
6379 	},
6380 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6381 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6382 	  0, 0
6383 	},
6384 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6385 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6386 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6387 	},
6388 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6389 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6390 	  0, 0
6391 	},
6392 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6393 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6394 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6395 	},
6396 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6397 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6398 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6399 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6400 	},
6401 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6402 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6403 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6404 	  0, 0
6405 	},
6406 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6407 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6408 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6409 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6410 	},
6411 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6412 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6413 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6414 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6415 	},
6416 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6417 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6418 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6419 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6420 	},
6421 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6422 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6423 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6424 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6425 	},
6426 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6427 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6428 	  0, 0
6429 	},
6430 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6431 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6432 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6433 	},
6434 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6435 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6436 	  0, 0
6437 	},
6438 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6439 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6440 	  0, 0
6441 	},
6442 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6443 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6444 	  0, 0
6445 	},
6446 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6447 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6448 	  0, 0
6449 	},
6450 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6451 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6452 	  0, 0
6453 	},
6454 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6455 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6456 	  0, 0
6457 	},
6458 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6459 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6460 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6461 	},
6462 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6463 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6464 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6465 	},
6466 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6467 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6468 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6469 	},
6470 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6471 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6472 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6473 	},
6474 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6475 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6476 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6477 	},
6478 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6479 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6480 	  0, 0
6481 	},
6482 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6483 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6484 	  0, 0
6485 	},
6486 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6487 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6488 	  0, 0
6489 	},
6490 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6491 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6492 	  0, 0
6493 	},
6494 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6495 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6496 	  0, 0
6497 	},
6498 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6499 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6500 	  0, 0
6501 	},
6502 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6503 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6504 	  0, 0
6505 	},
6506 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6507 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6508 	  0, 0
6509 	},
6510 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6511 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6512 	  0, 0
6513 	},
6514 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6515 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6516 	  0, 0
6517 	},
6518 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6519 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6520 	  0, 0
6521 	},
6522 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6523 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6524 	  0, 0
6525 	},
6526 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6527 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6528 	  0, 0
6529 	},
6530 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6531 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6532 	  0, 0
6533 	},
6534 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6535 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6536 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6537 	},
6538 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6539 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6540 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6541 	},
6542 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6543 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6544 	  0, 0
6545 	},
6546 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6547 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6548 	  0, 0
6549 	},
6550 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6551 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6552 	  0, 0
6553 	},
6554 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6555 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6556 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6557 	},
6558 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6559 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6560 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6561 	},
6562 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6563 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6564 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6565 	},
6566 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6567 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6568 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6569 	},
6570 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6571 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6572 	  0, 0
6573 	},
6574 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6575 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6576 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6577 	},
6578 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6579 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6580 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6581 	},
6582 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6583 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6584 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6585 	},
6586 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6587 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6588 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6589 	},
6590 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6591 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6592 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6593 	},
6594 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6595 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6596 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6597 	},
6598 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6599 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6600 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6601 	},
6602 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6603 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6604 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6605 	},
6606 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6607 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6608 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6609 	},
6610 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6611 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6612 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6613 	},
6614 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6615 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6616 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6617 	},
6618 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6619 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6620 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6621 	},
6622 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6623 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6624 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6625 	},
6626 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6627 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6628 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6629 	},
6630 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6631 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6632 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6633 	},
6634 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6635 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6636 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6637 	},
6638 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6639 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6640 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6641 	},
6642 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6643 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6644 	  0, 0
6645 	},
6646 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6647 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6648 	  0, 0
6649 	},
6650 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6651 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6652 	  0, 0
6653 	},
6654 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6655 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6656 	  0, 0
6657 	},
6658 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6659 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6660 	  0, 0
6661 	},
6662 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6663 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6664 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6665 	},
6666 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6667 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6668 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6669 	},
6670 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6671 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6672 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6673 	},
6674 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6675 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6676 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6677 	},
6678 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6679 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6680 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6681 	},
6682 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6683 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6684 	  0, 0
6685 	},
6686 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6687 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6688 	  0, 0
6689 	},
6690 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6691 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6692 	  0, 0
6693 	},
6694 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6695 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6696 	  0, 0
6697 	},
6698 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6699 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6700 	  0, 0
6701 	},
6702 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6703 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6704 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6705 	},
6706 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6707 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6708 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6709 	},
6710 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6711 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6712 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6713 	},
6714 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6715 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6716 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6717 	},
6718 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6719 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6720 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6721 	},
6722 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6723 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6724 	  0, 0
6725 	},
6726 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6727 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6728 	  0, 0
6729 	},
6730 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6731 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6732 	  0, 0
6733 	},
6734 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6735 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6736 	  0, 0
6737 	},
6738 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6739 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6740 	  0, 0
6741 	},
6742 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6743 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6744 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6745 	},
6746 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6747 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6748 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6749 	},
6750 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6751 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6752 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6753 	},
6754 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6755 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6756 	  0, 0
6757 	},
6758 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6759 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6760 	  0, 0
6761 	},
6762 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6763 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6764 	  0, 0
6765 	},
6766 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6767 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6768 	  0, 0
6769 	},
6770 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6771 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6772 	  0, 0
6773 	},
6774 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6775 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6776 	  0, 0
6777 	}
6778 };
6779 
gfx_v9_0_ras_error_inject(struct amdgpu_device * adev,void * inject_if,uint32_t instance_mask)6780 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6781 				     void *inject_if, uint32_t instance_mask)
6782 {
6783 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6784 	int ret;
6785 	struct ta_ras_trigger_error_input block_info = { 0 };
6786 
6787 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6788 		return -EINVAL;
6789 
6790 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6791 		return -EINVAL;
6792 
6793 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6794 		return -EPERM;
6795 
6796 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6797 	      info->head.type)) {
6798 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6799 			ras_gfx_subblocks[info->head.sub_block_index].name,
6800 			info->head.type);
6801 		return -EPERM;
6802 	}
6803 
6804 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6805 	      info->head.type)) {
6806 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6807 			ras_gfx_subblocks[info->head.sub_block_index].name,
6808 			info->head.type);
6809 		return -EPERM;
6810 	}
6811 
6812 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6813 	block_info.sub_block_index =
6814 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6815 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6816 	block_info.address = info->address;
6817 	block_info.value = info->value;
6818 
6819 	mutex_lock(&adev->grbm_idx_mutex);
6820 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6821 	mutex_unlock(&adev->grbm_idx_mutex);
6822 
6823 	return ret;
6824 }
6825 
6826 static const char * const vml2_mems[] = {
6827 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6828 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6829 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6830 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6831 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6832 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6833 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6834 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6835 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6836 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6837 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6838 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6839 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6840 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6841 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6842 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6843 };
6844 
6845 static const char * const vml2_walker_mems[] = {
6846 	"UTC_VML2_CACHE_PDE0_MEM0",
6847 	"UTC_VML2_CACHE_PDE0_MEM1",
6848 	"UTC_VML2_CACHE_PDE1_MEM0",
6849 	"UTC_VML2_CACHE_PDE1_MEM1",
6850 	"UTC_VML2_CACHE_PDE2_MEM0",
6851 	"UTC_VML2_CACHE_PDE2_MEM1",
6852 	"UTC_VML2_RDIF_LOG_FIFO",
6853 };
6854 
6855 static const char * const atc_l2_cache_2m_mems[] = {
6856 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6857 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6858 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6859 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6860 };
6861 
6862 static const char *atc_l2_cache_4k_mems[] = {
6863 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6864 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6865 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6866 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6867 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6868 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6869 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6870 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6871 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6872 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6873 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6874 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6875 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6876 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6877 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6878 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6879 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6880 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6881 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6882 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6883 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6884 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6885 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6886 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6887 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6888 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6889 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6890 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6891 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6892 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6893 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6894 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6895 };
6896 
gfx_v9_0_query_utc_edc_status(struct amdgpu_device * adev,struct ras_err_data * err_data)6897 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6898 					 struct ras_err_data *err_data)
6899 {
6900 	uint32_t i, data;
6901 	uint32_t sec_count, ded_count;
6902 
6903 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6904 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6905 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6906 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6907 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6908 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6909 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6910 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6911 
6912 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6913 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6914 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6915 
6916 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6917 		if (sec_count) {
6918 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6919 				"SEC %d\n", i, vml2_mems[i], sec_count);
6920 			err_data->ce_count += sec_count;
6921 		}
6922 
6923 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6924 		if (ded_count) {
6925 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6926 				"DED %d\n", i, vml2_mems[i], ded_count);
6927 			err_data->ue_count += ded_count;
6928 		}
6929 	}
6930 
6931 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6932 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6933 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6934 
6935 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6936 						SEC_COUNT);
6937 		if (sec_count) {
6938 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6939 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6940 			err_data->ce_count += sec_count;
6941 		}
6942 
6943 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6944 						DED_COUNT);
6945 		if (ded_count) {
6946 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6947 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6948 			err_data->ue_count += ded_count;
6949 		}
6950 	}
6951 
6952 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6953 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6954 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6955 
6956 		sec_count = (data & 0x00006000L) >> 0xd;
6957 		if (sec_count) {
6958 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6959 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6960 				sec_count);
6961 			err_data->ce_count += sec_count;
6962 		}
6963 	}
6964 
6965 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6966 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6967 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6968 
6969 		sec_count = (data & 0x00006000L) >> 0xd;
6970 		if (sec_count) {
6971 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6972 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6973 				sec_count);
6974 			err_data->ce_count += sec_count;
6975 		}
6976 
6977 		ded_count = (data & 0x00018000L) >> 0xf;
6978 		if (ded_count) {
6979 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6980 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6981 				ded_count);
6982 			err_data->ue_count += ded_count;
6983 		}
6984 	}
6985 
6986 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6987 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6988 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6989 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6990 
6991 	return 0;
6992 }
6993 
gfx_v9_0_ras_error_count(struct amdgpu_device * adev,const struct soc15_reg_entry * reg,uint32_t se_id,uint32_t inst_id,uint32_t value,uint32_t * sec_count,uint32_t * ded_count)6994 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6995 	const struct soc15_reg_entry *reg,
6996 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6997 	uint32_t *sec_count, uint32_t *ded_count)
6998 {
6999 	uint32_t i;
7000 	uint32_t sec_cnt, ded_cnt;
7001 
7002 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
7003 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
7004 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
7005 			gfx_v9_0_ras_fields[i].inst != reg->inst)
7006 			continue;
7007 
7008 		sec_cnt = (value &
7009 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
7010 				gfx_v9_0_ras_fields[i].sec_count_shift;
7011 		if (sec_cnt) {
7012 			dev_info(adev->dev, "GFX SubBlock %s, "
7013 				"Instance[%d][%d], SEC %d\n",
7014 				gfx_v9_0_ras_fields[i].name,
7015 				se_id, inst_id,
7016 				sec_cnt);
7017 			*sec_count += sec_cnt;
7018 		}
7019 
7020 		ded_cnt = (value &
7021 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
7022 				gfx_v9_0_ras_fields[i].ded_count_shift;
7023 		if (ded_cnt) {
7024 			dev_info(adev->dev, "GFX SubBlock %s, "
7025 				"Instance[%d][%d], DED %d\n",
7026 				gfx_v9_0_ras_fields[i].name,
7027 				se_id, inst_id,
7028 				ded_cnt);
7029 			*ded_count += ded_cnt;
7030 		}
7031 	}
7032 
7033 	return 0;
7034 }
7035 
gfx_v9_0_reset_ras_error_count(struct amdgpu_device * adev)7036 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7037 {
7038 	int i, j, k;
7039 
7040 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7041 		return;
7042 
7043 	/* read back registers to clear the counters */
7044 	mutex_lock(&adev->grbm_idx_mutex);
7045 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7046 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7047 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7048 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7049 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7050 			}
7051 		}
7052 	}
7053 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7054 	mutex_unlock(&adev->grbm_idx_mutex);
7055 
7056 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7057 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7058 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7059 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7060 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7061 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7062 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7063 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7064 
7065 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7066 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7067 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7068 	}
7069 
7070 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7071 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7072 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7073 	}
7074 
7075 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7076 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7077 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7078 	}
7079 
7080 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7081 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7082 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7083 	}
7084 
7085 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7086 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7087 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7088 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7089 }
7090 
gfx_v9_0_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)7091 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7092 					  void *ras_error_status)
7093 {
7094 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7095 	uint32_t sec_count = 0, ded_count = 0;
7096 	uint32_t i, j, k;
7097 	uint32_t reg_value;
7098 
7099 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7100 		return;
7101 
7102 	err_data->ue_count = 0;
7103 	err_data->ce_count = 0;
7104 
7105 	mutex_lock(&adev->grbm_idx_mutex);
7106 
7107 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7108 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7109 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7110 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7111 				reg_value =
7112 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7113 				if (reg_value)
7114 					gfx_v9_0_ras_error_count(adev,
7115 						&gfx_v9_0_edc_counter_regs[i],
7116 						j, k, reg_value,
7117 						&sec_count, &ded_count);
7118 			}
7119 		}
7120 	}
7121 
7122 	err_data->ce_count += sec_count;
7123 	err_data->ue_count += ded_count;
7124 
7125 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7126 	mutex_unlock(&adev->grbm_idx_mutex);
7127 
7128 	gfx_v9_0_query_utc_edc_status(adev, err_data);
7129 }
7130 
gfx_v9_0_emit_mem_sync(struct amdgpu_ring * ring)7131 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7132 {
7133 	const unsigned int cp_coher_cntl =
7134 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7135 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7136 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7137 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7138 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7139 
7140 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7141 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7142 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7143 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7144 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7145 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7146 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7147 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7148 }
7149 
gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)7150 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7151 					uint32_t pipe, bool enable)
7152 {
7153 	struct amdgpu_device *adev = ring->adev;
7154 	uint32_t val;
7155 	uint32_t wcl_cs_reg;
7156 
7157 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7158 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7159 
7160 	switch (pipe) {
7161 	case 0:
7162 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7163 		break;
7164 	case 1:
7165 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7166 		break;
7167 	case 2:
7168 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7169 		break;
7170 	case 3:
7171 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7172 		break;
7173 	default:
7174 		DRM_DEBUG("invalid pipe %d\n", pipe);
7175 		return;
7176 	}
7177 
7178 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7179 
7180 }
gfx_v9_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)7181 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7182 {
7183 	struct amdgpu_device *adev = ring->adev;
7184 	uint32_t val;
7185 	int i;
7186 
7187 
7188 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7189 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7190 	 * around 25% of gpu resources.
7191 	 */
7192 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7193 	amdgpu_ring_emit_wreg(ring,
7194 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7195 			      val);
7196 
7197 	/* Restrict waves for normal/low priority compute queues as well
7198 	 * to get best QoS for high priority compute jobs.
7199 	 *
7200 	 * amdgpu controls only 1st ME(0-3 CS pipes).
7201 	 */
7202 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7203 		if (i != ring->pipe)
7204 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7205 
7206 	}
7207 }
7208 
gfx_v9_ring_insert_nop(struct amdgpu_ring * ring,uint32_t num_nop)7209 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7210 {
7211 	/* Header itself is a NOP packet */
7212 	if (num_nop == 1) {
7213 		amdgpu_ring_write(ring, ring->funcs->nop);
7214 		return;
7215 	}
7216 
7217 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7218 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7219 
7220 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
7221 	amdgpu_ring_insert_nop(ring, num_nop - 1);
7222 }
7223 
gfx_v9_0_reset_kgq(struct amdgpu_ring * ring,unsigned int vmid)7224 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7225 {
7226 	struct amdgpu_device *adev = ring->adev;
7227 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7228 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7229 	unsigned long flags;
7230 	u32 tmp;
7231 	int r;
7232 
7233 	if (amdgpu_sriov_vf(adev))
7234 		return -EINVAL;
7235 
7236 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7237 		return -EINVAL;
7238 
7239 	spin_lock_irqsave(&kiq->ring_lock, flags);
7240 
7241 	if (amdgpu_ring_alloc(kiq_ring, 5)) {
7242 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7243 		return -ENOMEM;
7244 	}
7245 
7246 	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7247 	gfx_v9_0_ring_emit_wreg(kiq_ring,
7248 				 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7249 	amdgpu_ring_commit(kiq_ring);
7250 
7251 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7252 
7253 	r = amdgpu_ring_test_ring(kiq_ring);
7254 	if (r)
7255 		return r;
7256 
7257 	if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7258 		return -ENOMEM;
7259 	gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7260 				 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7261 	gfx_v9_0_ring_emit_reg_wait(ring,
7262 				    SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7263 	gfx_v9_0_ring_emit_wreg(ring,
7264 				SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7265 
7266 	return amdgpu_ring_test_ring(ring);
7267 }
7268 
gfx_v9_0_reset_kcq(struct amdgpu_ring * ring,unsigned int vmid)7269 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7270 			      unsigned int vmid)
7271 {
7272 	struct amdgpu_device *adev = ring->adev;
7273 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7274 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7275 	unsigned long flags;
7276 	int i, r;
7277 
7278 	if (amdgpu_sriov_vf(adev))
7279 		return -EINVAL;
7280 
7281 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7282 		return -EINVAL;
7283 
7284 	spin_lock_irqsave(&kiq->ring_lock, flags);
7285 
7286 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7287 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7288 		return -ENOMEM;
7289 	}
7290 
7291 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7292 				   0, 0);
7293 	amdgpu_ring_commit(kiq_ring);
7294 
7295 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7296 
7297 	r = amdgpu_ring_test_ring(kiq_ring);
7298 	if (r)
7299 		return r;
7300 
7301 	/* make sure dequeue is complete*/
7302 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7303 	mutex_lock(&adev->srbm_mutex);
7304 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7305 	for (i = 0; i < adev->usec_timeout; i++) {
7306 		if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7307 			break;
7308 		udelay(1);
7309 	}
7310 	if (i >= adev->usec_timeout)
7311 		r = -ETIMEDOUT;
7312 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7313 	mutex_unlock(&adev->srbm_mutex);
7314 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7315 	if (r) {
7316 		dev_err(adev->dev, "fail to wait on hqd deactive\n");
7317 		return r;
7318 	}
7319 
7320 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
7321 	if (unlikely(r != 0)){
7322 		dev_err(adev->dev, "fail to resv mqd_obj\n");
7323 		return r;
7324 	}
7325 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7326 	if (!r) {
7327 		r = gfx_v9_0_kcq_init_queue(ring, true);
7328 		amdgpu_bo_kunmap(ring->mqd_obj);
7329 		ring->mqd_ptr = NULL;
7330 	}
7331 	amdgpu_bo_unreserve(ring->mqd_obj);
7332 	if (r) {
7333 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
7334 		return r;
7335 	}
7336 	spin_lock_irqsave(&kiq->ring_lock, flags);
7337 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7338 	if (r) {
7339 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7340 		return -ENOMEM;
7341 	}
7342 	kiq->pmf->kiq_map_queues(kiq_ring, ring);
7343 	amdgpu_ring_commit(kiq_ring);
7344 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7345 	r = amdgpu_ring_test_ring(kiq_ring);
7346 	if (r) {
7347 		DRM_ERROR("fail to remap queue\n");
7348 		return r;
7349 	}
7350 	return amdgpu_ring_test_ring(ring);
7351 }
7352 
gfx_v9_ip_print(struct amdgpu_ip_block * ip_block,struct drm_printer * p)7353 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7354 {
7355 	struct amdgpu_device *adev = ip_block->adev;
7356 	uint32_t i, j, k, reg, index = 0;
7357 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7358 
7359 	if (!adev->gfx.ip_dump_core)
7360 		return;
7361 
7362 	for (i = 0; i < reg_count; i++)
7363 		drm_printf(p, "%-50s \t 0x%08x\n",
7364 			   gc_reg_list_9[i].reg_name,
7365 			   adev->gfx.ip_dump_core[i]);
7366 
7367 	/* print compute queue registers for all instances */
7368 	if (!adev->gfx.ip_dump_compute_queues)
7369 		return;
7370 
7371 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7372 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7373 		   adev->gfx.mec.num_mec,
7374 		   adev->gfx.mec.num_pipe_per_mec,
7375 		   adev->gfx.mec.num_queue_per_pipe);
7376 
7377 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7378 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7379 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7380 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7381 				for (reg = 0; reg < reg_count; reg++) {
7382 					drm_printf(p, "%-50s \t 0x%08x\n",
7383 						   gc_cp_reg_list_9[reg].reg_name,
7384 						   adev->gfx.ip_dump_compute_queues[index + reg]);
7385 				}
7386 				index += reg_count;
7387 			}
7388 		}
7389 	}
7390 
7391 }
7392 
gfx_v9_ip_dump(struct amdgpu_ip_block * ip_block)7393 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7394 {
7395 	struct amdgpu_device *adev = ip_block->adev;
7396 	uint32_t i, j, k, reg, index = 0;
7397 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7398 
7399 	if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7400 		return;
7401 
7402 	amdgpu_gfx_off_ctrl(adev, false);
7403 	for (i = 0; i < reg_count; i++)
7404 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7405 	amdgpu_gfx_off_ctrl(adev, true);
7406 
7407 	/* dump compute queue registers for all instances */
7408 	if (!adev->gfx.ip_dump_compute_queues)
7409 		return;
7410 
7411 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7412 	amdgpu_gfx_off_ctrl(adev, false);
7413 	mutex_lock(&adev->srbm_mutex);
7414 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7415 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7416 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7417 				/* ME0 is for GFX so start from 1 for CP */
7418 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7419 
7420 				for (reg = 0; reg < reg_count; reg++) {
7421 					adev->gfx.ip_dump_compute_queues[index + reg] =
7422 						RREG32(SOC15_REG_ENTRY_OFFSET(
7423 							gc_cp_reg_list_9[reg]));
7424 				}
7425 				index += reg_count;
7426 			}
7427 		}
7428 	}
7429 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7430 	mutex_unlock(&adev->srbm_mutex);
7431 	amdgpu_gfx_off_ctrl(adev, true);
7432 
7433 }
7434 
gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring * ring)7435 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7436 {
7437 	/* Emit the cleaner shader */
7438 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7439 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7440 }
7441 
7442 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7443 	.name = "gfx_v9_0",
7444 	.early_init = gfx_v9_0_early_init,
7445 	.late_init = gfx_v9_0_late_init,
7446 	.sw_init = gfx_v9_0_sw_init,
7447 	.sw_fini = gfx_v9_0_sw_fini,
7448 	.hw_init = gfx_v9_0_hw_init,
7449 	.hw_fini = gfx_v9_0_hw_fini,
7450 	.suspend = gfx_v9_0_suspend,
7451 	.resume = gfx_v9_0_resume,
7452 	.is_idle = gfx_v9_0_is_idle,
7453 	.wait_for_idle = gfx_v9_0_wait_for_idle,
7454 	.soft_reset = gfx_v9_0_soft_reset,
7455 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
7456 	.set_powergating_state = gfx_v9_0_set_powergating_state,
7457 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
7458 	.dump_ip_state = gfx_v9_ip_dump,
7459 	.print_ip_state = gfx_v9_ip_print,
7460 };
7461 
7462 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7463 	.type = AMDGPU_RING_TYPE_GFX,
7464 	.align_mask = 0xff,
7465 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7466 	.support_64bit_ptrs = true,
7467 	.secure_submission_supported = true,
7468 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7469 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7470 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7471 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7472 		5 +  /* COND_EXEC */
7473 		7 +  /* PIPELINE_SYNC */
7474 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7475 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7476 		2 + /* VM_FLUSH */
7477 		8 +  /* FENCE for VM_FLUSH */
7478 		20 + /* GDS switch */
7479 		4 + /* double SWITCH_BUFFER,
7480 		       the first COND_EXEC jump to the place just
7481 			   prior to this double SWITCH_BUFFER  */
7482 		5 + /* COND_EXEC */
7483 		7 +	 /*	HDP_flush */
7484 		4 +	 /*	VGT_flush */
7485 		14 + /*	CE_META */
7486 		31 + /*	DE_META */
7487 		3 + /* CNTX_CTRL */
7488 		5 + /* HDP_INVL */
7489 		8 + 8 + /* FENCE x2 */
7490 		2 + /* SWITCH_BUFFER */
7491 		7 + /* gfx_v9_0_emit_mem_sync */
7492 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7493 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7494 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7495 	.emit_fence = gfx_v9_0_ring_emit_fence,
7496 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7497 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7498 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7499 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7500 	.test_ring = gfx_v9_0_ring_test_ring,
7501 	.insert_nop = gfx_v9_ring_insert_nop,
7502 	.pad_ib = amdgpu_ring_generic_pad_ib,
7503 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7504 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7505 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7506 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
7507 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7508 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7509 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7510 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7511 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7512 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7513 	.reset = gfx_v9_0_reset_kgq,
7514 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7515 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7516 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7517 };
7518 
7519 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7520 	.type = AMDGPU_RING_TYPE_GFX,
7521 	.align_mask = 0xff,
7522 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7523 	.support_64bit_ptrs = true,
7524 	.secure_submission_supported = true,
7525 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7526 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7527 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7528 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7529 		5 +  /* COND_EXEC */
7530 		7 +  /* PIPELINE_SYNC */
7531 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7532 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7533 		2 + /* VM_FLUSH */
7534 		8 +  /* FENCE for VM_FLUSH */
7535 		20 + /* GDS switch */
7536 		4 + /* double SWITCH_BUFFER,
7537 		     * the first COND_EXEC jump to the place just
7538 		     * prior to this double SWITCH_BUFFER
7539 		     */
7540 		5 + /* COND_EXEC */
7541 		7 +	 /*	HDP_flush */
7542 		4 +	 /*	VGT_flush */
7543 		14 + /*	CE_META */
7544 		31 + /*	DE_META */
7545 		3 + /* CNTX_CTRL */
7546 		5 + /* HDP_INVL */
7547 		8 + 8 + /* FENCE x2 */
7548 		2 + /* SWITCH_BUFFER */
7549 		7 + /* gfx_v9_0_emit_mem_sync */
7550 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7551 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7552 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7553 	.emit_fence = gfx_v9_0_ring_emit_fence,
7554 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7555 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7556 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7557 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7558 	.test_ring = gfx_v9_0_ring_test_ring,
7559 	.test_ib = gfx_v9_0_ring_test_ib,
7560 	.insert_nop = gfx_v9_ring_insert_nop,
7561 	.pad_ib = amdgpu_ring_generic_pad_ib,
7562 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7563 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7564 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7565 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7566 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7567 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7568 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7569 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7570 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7571 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
7572 	.patch_de = gfx_v9_0_ring_patch_de_meta,
7573 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
7574 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7575 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7576 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7577 };
7578 
7579 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7580 	.type = AMDGPU_RING_TYPE_COMPUTE,
7581 	.align_mask = 0xff,
7582 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7583 	.support_64bit_ptrs = true,
7584 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7585 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7586 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7587 	.emit_frame_size =
7588 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7589 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7590 		5 + /* hdp invalidate */
7591 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7592 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7593 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7594 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7595 		7 + /* gfx_v9_0_emit_mem_sync */
7596 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7597 		15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7598 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7599 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7600 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7601 	.emit_fence = gfx_v9_0_ring_emit_fence,
7602 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7603 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7604 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7605 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7606 	.test_ring = gfx_v9_0_ring_test_ring,
7607 	.test_ib = gfx_v9_0_ring_test_ib,
7608 	.insert_nop = gfx_v9_ring_insert_nop,
7609 	.pad_ib = amdgpu_ring_generic_pad_ib,
7610 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7611 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7612 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7613 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7614 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7615 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7616 	.reset = gfx_v9_0_reset_kcq,
7617 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7618 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7619 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7620 };
7621 
7622 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7623 	.type = AMDGPU_RING_TYPE_KIQ,
7624 	.align_mask = 0xff,
7625 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7626 	.support_64bit_ptrs = true,
7627 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7628 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7629 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7630 	.emit_frame_size =
7631 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7632 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7633 		5 + /* hdp invalidate */
7634 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7635 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7636 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7637 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7638 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7639 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7640 	.test_ring = gfx_v9_0_ring_test_ring,
7641 	.insert_nop = amdgpu_ring_insert_nop,
7642 	.pad_ib = amdgpu_ring_generic_pad_ib,
7643 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7644 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7645 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7646 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7647 };
7648 
gfx_v9_0_set_ring_funcs(struct amdgpu_device * adev)7649 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7650 {
7651 	int i;
7652 
7653 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7654 
7655 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7656 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7657 
7658 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7659 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7660 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7661 	}
7662 
7663 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7664 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7665 }
7666 
7667 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7668 	.set = gfx_v9_0_set_eop_interrupt_state,
7669 	.process = gfx_v9_0_eop_irq,
7670 };
7671 
7672 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7673 	.set = gfx_v9_0_set_priv_reg_fault_state,
7674 	.process = gfx_v9_0_priv_reg_irq,
7675 };
7676 
7677 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7678 	.set = gfx_v9_0_set_bad_op_fault_state,
7679 	.process = gfx_v9_0_bad_op_irq,
7680 };
7681 
7682 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7683 	.set = gfx_v9_0_set_priv_inst_fault_state,
7684 	.process = gfx_v9_0_priv_inst_irq,
7685 };
7686 
7687 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7688 	.set = gfx_v9_0_set_cp_ecc_error_state,
7689 	.process = amdgpu_gfx_cp_ecc_error_irq,
7690 };
7691 
7692 
gfx_v9_0_set_irq_funcs(struct amdgpu_device * adev)7693 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7694 {
7695 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7696 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7697 
7698 	adev->gfx.priv_reg_irq.num_types = 1;
7699 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7700 
7701 	adev->gfx.bad_op_irq.num_types = 1;
7702 	adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7703 
7704 	adev->gfx.priv_inst_irq.num_types = 1;
7705 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7706 
7707 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7708 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7709 }
7710 
gfx_v9_0_set_rlc_funcs(struct amdgpu_device * adev)7711 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7712 {
7713 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7714 	case IP_VERSION(9, 0, 1):
7715 	case IP_VERSION(9, 2, 1):
7716 	case IP_VERSION(9, 4, 0):
7717 	case IP_VERSION(9, 2, 2):
7718 	case IP_VERSION(9, 1, 0):
7719 	case IP_VERSION(9, 4, 1):
7720 	case IP_VERSION(9, 3, 0):
7721 	case IP_VERSION(9, 4, 2):
7722 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7723 		break;
7724 	default:
7725 		break;
7726 	}
7727 }
7728 
gfx_v9_0_set_gds_init(struct amdgpu_device * adev)7729 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7730 {
7731 	/* init asci gds info */
7732 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7733 	case IP_VERSION(9, 0, 1):
7734 	case IP_VERSION(9, 2, 1):
7735 	case IP_VERSION(9, 4, 0):
7736 		adev->gds.gds_size = 0x10000;
7737 		break;
7738 	case IP_VERSION(9, 2, 2):
7739 	case IP_VERSION(9, 1, 0):
7740 	case IP_VERSION(9, 4, 1):
7741 		adev->gds.gds_size = 0x1000;
7742 		break;
7743 	case IP_VERSION(9, 4, 2):
7744 		/* aldebaran removed all the GDS internal memory,
7745 		 * only support GWS opcode in kernel, like barrier
7746 		 * semaphore.etc */
7747 		adev->gds.gds_size = 0;
7748 		break;
7749 	default:
7750 		adev->gds.gds_size = 0x10000;
7751 		break;
7752 	}
7753 
7754 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7755 	case IP_VERSION(9, 0, 1):
7756 	case IP_VERSION(9, 4, 0):
7757 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7758 		break;
7759 	case IP_VERSION(9, 2, 1):
7760 		adev->gds.gds_compute_max_wave_id = 0x27f;
7761 		break;
7762 	case IP_VERSION(9, 2, 2):
7763 	case IP_VERSION(9, 1, 0):
7764 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7765 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7766 		else
7767 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7768 		break;
7769 	case IP_VERSION(9, 4, 1):
7770 		adev->gds.gds_compute_max_wave_id = 0xfff;
7771 		break;
7772 	case IP_VERSION(9, 4, 2):
7773 		/* deprecated for Aldebaran, no usage at all */
7774 		adev->gds.gds_compute_max_wave_id = 0;
7775 		break;
7776 	default:
7777 		/* this really depends on the chip */
7778 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7779 		break;
7780 	}
7781 
7782 	adev->gds.gws_size = 64;
7783 	adev->gds.oa_size = 16;
7784 }
7785 
gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7786 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7787 						 u32 bitmap)
7788 {
7789 	u32 data;
7790 
7791 	if (!bitmap)
7792 		return;
7793 
7794 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7795 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7796 
7797 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7798 }
7799 
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device * adev)7800 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7801 {
7802 	u32 data, mask;
7803 
7804 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7805 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7806 
7807 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7808 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7809 
7810 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7811 
7812 	return (~data) & mask;
7813 }
7814 
gfx_v9_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7815 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7816 				 struct amdgpu_cu_info *cu_info)
7817 {
7818 	int i, j, k, counter, active_cu_number = 0;
7819 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7820 	unsigned disable_masks[4 * 4];
7821 
7822 	if (!adev || !cu_info)
7823 		return -EINVAL;
7824 
7825 	/*
7826 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7827 	 */
7828 	if (adev->gfx.config.max_shader_engines *
7829 		adev->gfx.config.max_sh_per_se > 16)
7830 		return -EINVAL;
7831 
7832 	amdgpu_gfx_parse_disable_cu(disable_masks,
7833 				    adev->gfx.config.max_shader_engines,
7834 				    adev->gfx.config.max_sh_per_se);
7835 
7836 	mutex_lock(&adev->grbm_idx_mutex);
7837 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7838 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7839 			mask = 1;
7840 			ao_bitmap = 0;
7841 			counter = 0;
7842 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7843 			gfx_v9_0_set_user_cu_inactive_bitmap(
7844 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7845 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7846 
7847 			/*
7848 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7849 			 * 4x4 size array, and it's usually suitable for Vega
7850 			 * ASICs which has 4*2 SE/SH layout.
7851 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7852 			 * To mostly reduce the impact, we make it compatible
7853 			 * with current bitmap array as below:
7854 			 *    SE4,SH0 --> bitmap[0][1]
7855 			 *    SE5,SH0 --> bitmap[1][1]
7856 			 *    SE6,SH0 --> bitmap[2][1]
7857 			 *    SE7,SH0 --> bitmap[3][1]
7858 			 */
7859 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7860 
7861 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7862 				if (bitmap & mask) {
7863 					if (counter < adev->gfx.config.max_cu_per_sh)
7864 						ao_bitmap |= mask;
7865 					counter ++;
7866 				}
7867 				mask <<= 1;
7868 			}
7869 			active_cu_number += counter;
7870 			if (i < 2 && j < 2)
7871 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7872 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7873 		}
7874 	}
7875 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7876 	mutex_unlock(&adev->grbm_idx_mutex);
7877 
7878 	cu_info->number = active_cu_number;
7879 	cu_info->ao_cu_mask = ao_cu_mask;
7880 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7881 
7882 	return 0;
7883 }
7884 
7885 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7886 {
7887 	.type = AMD_IP_BLOCK_TYPE_GFX,
7888 	.major = 9,
7889 	.minor = 0,
7890 	.rev = 0,
7891 	.funcs = &gfx_v9_0_ip_funcs,
7892 };
7893