xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 69f22c5b454f7a3d77f323ed96b4ad6ac7bbe378)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55 
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59 
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65 
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68 
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75 
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82 
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89 
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104 
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128 
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134 
135 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
137 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
139 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
141 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
143 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
145 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
147 
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152 
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 	SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 	/* cp header registers */
229 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 	/* SE status registers */
235 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240 
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 	/* compute queue registers */
243 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281 
282 enum ta_ras_gfx_subblock {
283 	/*CPC*/
284 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 	TA_RAS_BLOCK__GFX_CPC_UCODE,
287 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 	/* CPF*/
295 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 	TA_RAS_BLOCK__GFX_CPF_TAG,
299 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 	/* CPG*/
301 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 	TA_RAS_BLOCK__GFX_CPG_TAG,
305 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 	/* GDS*/
307 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 	/* SPI*/
315 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 	/* SQ*/
317 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 	/* SQC (3 ranges)*/
324 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 	/* SQC range 0*/
326 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 	/* SQC range 1*/
338 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 	/* SQC range 2*/
352 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 	/* TA*/
367 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 	/* TCA*/
375 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 	/* TCC (5 sub-ranges)*/
380 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 	/* TCC range 0*/
382 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 	/* TCC range 1*/
393 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 	/* TCC range 2*/
399 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 	/* TCC range 3*/
411 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 	/* TCC range 4*/
417 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 	/* TCI*/
425 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 	/* TCP*/
427 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 	/* TD*/
437 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 	/* EA (3 sub-ranges)*/
443 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 	/* EA range 0*/
445 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 	/* EA range 1*/
456 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 	/* EA range 2*/
466 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 	/* UTC VM L2 bank*/
474 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 	/* UTC VM walker*/
476 	TA_RAS_BLOCK__UTC_VML2_WALKER,
477 	/* UTC ATC L2 2MB cache*/
478 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 	/* UTC ATC L2 4KB cache*/
480 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 	TA_RAS_BLOCK__GFX_MAX
482 };
483 
484 struct ras_gfx_subblock {
485 	unsigned char *name;
486 	int ta_subblock;
487 	int hw_supported_error_type;
488 	int sw_supported_error_type;
489 };
490 
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493 		#subblock,                                                     \
494 		TA_RAS_BLOCK__##subblock,                                      \
495 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497 	}
498 
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 			     0),
518 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 			     0),
520 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 			     0, 0),
529 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 			     0),
531 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 			     0, 0),
533 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 			     0),
535 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 			     0, 0),
537 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 			     0),
539 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 			     1),
541 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 			     0, 0, 0),
543 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 			     0),
545 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 			     0),
547 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 			     0),
549 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 			     0),
551 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 			     0),
553 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 			     0, 0),
555 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 			     0),
557 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 			     0),
559 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 			     0, 0, 0),
561 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 			     0),
563 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 			     0),
565 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 			     0),
567 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 			     0),
569 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 			     0),
571 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 			     0, 0),
573 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 			     0),
575 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 			     1),
585 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 			     1),
587 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 			     1),
589 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 			     0),
591 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 			     0),
593 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 			     0),
606 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 			     0),
609 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 			     0, 0),
611 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 			     0),
613 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672 
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694 
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709 
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737 
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748 
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771 
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787 
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794 
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814 
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831 
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846 
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851 
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863 
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875 
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880 
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 				struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 					  void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 				     void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 					      unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899 
900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 				uint64_t queue_mask)
902 {
903 	struct amdgpu_device *adev = kiq_ring->adev;
904 	u64 shader_mc_addr;
905 
906 	/* Cleaner shader MC address */
907 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908 
909 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 	amdgpu_ring_write(kiq_ring,
911 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 		/* vmid_mask:0* queue_type:0 (KIQ) */
913 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 	amdgpu_ring_write(kiq_ring,
915 			lower_32_bits(queue_mask));	/* queue mask lo */
916 	amdgpu_ring_write(kiq_ring,
917 			upper_32_bits(queue_mask));	/* queue mask hi */
918 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
921 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
922 }
923 
924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 				 struct amdgpu_ring *ring)
926 {
927 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 	uint64_t wptr_addr = ring->wptr_gpu_addr;
929 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930 
931 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 			 /*queue_type: normal compute queue */
940 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 			 /* alloc format: all_on_one_pipe */
942 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 			 /* num_queues: must be 1 */
945 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 	amdgpu_ring_write(kiq_ring,
947 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953 
954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 				   struct amdgpu_ring *ring,
956 				   enum amdgpu_unmap_queues_action action,
957 				   u64 gpu_addr, u64 seq)
958 {
959 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960 
961 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
964 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 	amdgpu_ring_write(kiq_ring,
968 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969 
970 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 		amdgpu_ring_write(kiq_ring, 0);
973 		amdgpu_ring_write(kiq_ring, 0);
974 
975 	} else {
976 		amdgpu_ring_write(kiq_ring, 0);
977 		amdgpu_ring_write(kiq_ring, 0);
978 		amdgpu_ring_write(kiq_ring, 0);
979 	}
980 }
981 
982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 				   struct amdgpu_ring *ring,
984 				   u64 addr,
985 				   u64 seq)
986 {
987 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988 
989 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 	amdgpu_ring_write(kiq_ring,
991 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 			  PACKET3_QUERY_STATUS_COMMAND(2));
994 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 	amdgpu_ring_write(kiq_ring,
996 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003 
1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 				uint16_t pasid, uint32_t flush_type,
1006 				bool all_hub)
1007 {
1008 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 	amdgpu_ring_write(kiq_ring,
1010 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015 
1016 
1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 					uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 					uint32_t xcc_id, uint32_t vmid)
1020 {
1021 	struct amdgpu_device *adev = kiq_ring->adev;
1022 	unsigned i;
1023 
1024 	/* enter save mode */
1025 	amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 	mutex_lock(&adev->srbm_mutex);
1027 	soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028 
1029 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 		WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 		/* wait till dequeue take effects */
1033 		for (i = 0; i < adev->usec_timeout; i++) {
1034 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 				break;
1036 			udelay(1);
1037 		}
1038 		if (i >= adev->usec_timeout)
1039 			dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 	} else {
1041 		dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 	}
1043 
1044 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 	mutex_unlock(&adev->srbm_mutex);
1046 	/* exit safe mode */
1047 	amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049 
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1055 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 	.kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 	.set_resources_size = 8,
1058 	.map_queues_size = 7,
1059 	.unmap_queues_size = 6,
1060 	.query_status_size = 7,
1061 	.invalidate_tlbs_size = 2,
1062 };
1063 
1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068 
1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 	case IP_VERSION(9, 0, 1):
1073 		soc15_program_register_sequence(adev,
1074 						golden_settings_gc_9_0,
1075 						ARRAY_SIZE(golden_settings_gc_9_0));
1076 		soc15_program_register_sequence(adev,
1077 						golden_settings_gc_9_0_vg10,
1078 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 		break;
1080 	case IP_VERSION(9, 2, 1):
1081 		soc15_program_register_sequence(adev,
1082 						golden_settings_gc_9_2_1,
1083 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 		soc15_program_register_sequence(adev,
1085 						golden_settings_gc_9_2_1_vg12,
1086 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 		break;
1088 	case IP_VERSION(9, 4, 0):
1089 		soc15_program_register_sequence(adev,
1090 						golden_settings_gc_9_0,
1091 						ARRAY_SIZE(golden_settings_gc_9_0));
1092 		soc15_program_register_sequence(adev,
1093 						golden_settings_gc_9_0_vg20,
1094 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 		break;
1096 	case IP_VERSION(9, 4, 1):
1097 		soc15_program_register_sequence(adev,
1098 						golden_settings_gc_9_4_1_arct,
1099 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 		break;
1101 	case IP_VERSION(9, 2, 2):
1102 	case IP_VERSION(9, 1, 0):
1103 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 						ARRAY_SIZE(golden_settings_gc_9_1));
1105 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 			soc15_program_register_sequence(adev,
1107 							golden_settings_gc_9_1_rv2,
1108 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 		else
1110 			soc15_program_register_sequence(adev,
1111 							golden_settings_gc_9_1_rv1,
1112 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 		break;
1114 	 case IP_VERSION(9, 3, 0):
1115 		soc15_program_register_sequence(adev,
1116 						golden_settings_gc_9_1_rn,
1117 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 		return; /* for renoir, don't need common goldensetting */
1119 	case IP_VERSION(9, 4, 2):
1120 		gfx_v9_4_2_init_golden_registers(adev,
1121 						 adev->smuio.funcs->get_die_id(adev));
1122 		break;
1123 	default:
1124 		break;
1125 	}
1126 
1127 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132 
1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 				       bool wc, uint32_t reg, uint32_t val)
1135 {
1136 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 				WRITE_DATA_DST_SEL(0) |
1139 				(wc ? WR_CONFIRM : 0));
1140 	amdgpu_ring_write(ring, reg);
1141 	amdgpu_ring_write(ring, 0);
1142 	amdgpu_ring_write(ring, val);
1143 }
1144 
1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 				  int mem_space, int opt, uint32_t addr0,
1147 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1148 				  uint32_t inv)
1149 {
1150 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 	amdgpu_ring_write(ring,
1152 				 /* memory (1) or register (0) */
1153 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1157 
1158 	if (mem_space)
1159 		BUG_ON(addr0 & 0x3); /* Dword align */
1160 	amdgpu_ring_write(ring, addr0);
1161 	amdgpu_ring_write(ring, addr1);
1162 	amdgpu_ring_write(ring, ref);
1163 	amdgpu_ring_write(ring, mask);
1164 	amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166 
1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 	struct amdgpu_device *adev = ring->adev;
1170 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 	uint32_t tmp = 0;
1172 	unsigned i;
1173 	int r;
1174 
1175 	WREG32(scratch, 0xCAFEDEAD);
1176 	r = amdgpu_ring_alloc(ring, 3);
1177 	if (r)
1178 		return r;
1179 
1180 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 	amdgpu_ring_write(ring, 0xDEADBEEF);
1183 	amdgpu_ring_commit(ring);
1184 
1185 	for (i = 0; i < adev->usec_timeout; i++) {
1186 		tmp = RREG32(scratch);
1187 		if (tmp == 0xDEADBEEF)
1188 			break;
1189 		udelay(1);
1190 	}
1191 
1192 	if (i >= adev->usec_timeout)
1193 		r = -ETIMEDOUT;
1194 	return r;
1195 }
1196 
1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 	struct amdgpu_device *adev = ring->adev;
1200 	struct amdgpu_ib ib;
1201 	struct dma_fence *f = NULL;
1202 
1203 	unsigned index;
1204 	uint64_t gpu_addr;
1205 	uint32_t tmp;
1206 	long r;
1207 
1208 	r = amdgpu_device_wb_get(adev, &index);
1209 	if (r)
1210 		return r;
1211 
1212 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 	memset(&ib, 0, sizeof(ib));
1215 
1216 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 	if (r)
1218 		goto err1;
1219 
1220 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 	ib.ptr[2] = lower_32_bits(gpu_addr);
1223 	ib.ptr[3] = upper_32_bits(gpu_addr);
1224 	ib.ptr[4] = 0xDEADBEEF;
1225 	ib.length_dw = 5;
1226 
1227 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 	if (r)
1229 		goto err2;
1230 
1231 	r = dma_fence_wait_timeout(f, false, timeout);
1232 	if (r == 0) {
1233 		r = -ETIMEDOUT;
1234 		goto err2;
1235 	} else if (r < 0) {
1236 		goto err2;
1237 	}
1238 
1239 	tmp = adev->wb.wb[index];
1240 	if (tmp == 0xDEADBEEF)
1241 		r = 0;
1242 	else
1243 		r = -EINVAL;
1244 
1245 err2:
1246 	amdgpu_ib_free(adev, &ib, NULL);
1247 	dma_fence_put(f);
1248 err1:
1249 	amdgpu_device_wb_free(adev, index);
1250 	return r;
1251 }
1252 
1253 
1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 	amdgpu_ucode_release(&adev->gfx.me_fw);
1258 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262 
1263 	kfree(adev->gfx.rlc.register_list_format);
1264 }
1265 
1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 	adev->gfx.me_fw_write_wait = false;
1269 	adev->gfx.mec_fw_write_wait = false;
1270 
1271 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273 	     (adev->gfx.mec_feature_version < 46) ||
1274 	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275 	     (adev->gfx.pfp_feature_version < 46)))
1276 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1277 
1278 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279 	case IP_VERSION(9, 0, 1):
1280 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281 		    (adev->gfx.me_feature_version >= 42) &&
1282 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283 		    (adev->gfx.pfp_feature_version >= 42))
1284 			adev->gfx.me_fw_write_wait = true;
1285 
1286 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287 		    (adev->gfx.mec_feature_version >= 42))
1288 			adev->gfx.mec_fw_write_wait = true;
1289 		break;
1290 	case IP_VERSION(9, 2, 1):
1291 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292 		    (adev->gfx.me_feature_version >= 44) &&
1293 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294 		    (adev->gfx.pfp_feature_version >= 44))
1295 			adev->gfx.me_fw_write_wait = true;
1296 
1297 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298 		    (adev->gfx.mec_feature_version >= 44))
1299 			adev->gfx.mec_fw_write_wait = true;
1300 		break;
1301 	case IP_VERSION(9, 4, 0):
1302 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303 		    (adev->gfx.me_feature_version >= 44) &&
1304 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305 		    (adev->gfx.pfp_feature_version >= 44))
1306 			adev->gfx.me_fw_write_wait = true;
1307 
1308 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309 		    (adev->gfx.mec_feature_version >= 44))
1310 			adev->gfx.mec_fw_write_wait = true;
1311 		break;
1312 	case IP_VERSION(9, 1, 0):
1313 	case IP_VERSION(9, 2, 2):
1314 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315 		    (adev->gfx.me_feature_version >= 42) &&
1316 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317 		    (adev->gfx.pfp_feature_version >= 42))
1318 			adev->gfx.me_fw_write_wait = true;
1319 
1320 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321 		    (adev->gfx.mec_feature_version >= 42))
1322 			adev->gfx.mec_fw_write_wait = true;
1323 		break;
1324 	default:
1325 		adev->gfx.me_fw_write_wait = true;
1326 		adev->gfx.mec_fw_write_wait = true;
1327 		break;
1328 	}
1329 }
1330 
1331 struct amdgpu_gfxoff_quirk {
1332 	u16 chip_vendor;
1333 	u16 chip_device;
1334 	u16 subsys_vendor;
1335 	u16 subsys_device;
1336 	u8 revision;
1337 };
1338 
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348 	/* https://bbs.openkylin.top/t/topic/171497 */
1349 	{ 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1350 	/* HP 705G4 DM with R5 2400G */
1351 	{ 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1352 	{ 0, 0, 0, 0, 0 },
1353 };
1354 
1355 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1356 {
1357 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1358 
1359 	while (p && p->chip_device != 0) {
1360 		if (pdev->vendor == p->chip_vendor &&
1361 		    pdev->device == p->chip_device &&
1362 		    pdev->subsystem_vendor == p->subsys_vendor &&
1363 		    pdev->subsystem_device == p->subsys_device &&
1364 		    pdev->revision == p->revision) {
1365 			return true;
1366 		}
1367 		++p;
1368 	}
1369 	return false;
1370 }
1371 
1372 static bool is_raven_kicker(struct amdgpu_device *adev)
1373 {
1374 	if (adev->pm.fw_version >= 0x41e2b)
1375 		return true;
1376 	else
1377 		return false;
1378 }
1379 
1380 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1381 {
1382 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1383 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1384 	    (adev->gfx.me_feature_version >= 52))
1385 		return true;
1386 	else
1387 		return false;
1388 }
1389 
1390 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1391 {
1392 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1393 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1394 
1395 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1396 	case IP_VERSION(9, 0, 1):
1397 	case IP_VERSION(9, 2, 1):
1398 	case IP_VERSION(9, 4, 0):
1399 		break;
1400 	case IP_VERSION(9, 2, 2):
1401 	case IP_VERSION(9, 1, 0):
1402 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1403 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1404 		    ((!is_raven_kicker(adev) &&
1405 		      adev->gfx.rlc_fw_version < 531) ||
1406 		     (adev->gfx.rlc_feature_version < 1) ||
1407 		     !adev->gfx.rlc.is_rlc_v2_1))
1408 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1409 
1410 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1411 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1412 				AMD_PG_SUPPORT_CP |
1413 				AMD_PG_SUPPORT_RLC_SMU_HS;
1414 		break;
1415 	case IP_VERSION(9, 3, 0):
1416 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1417 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1418 				AMD_PG_SUPPORT_CP |
1419 				AMD_PG_SUPPORT_RLC_SMU_HS;
1420 		break;
1421 	default:
1422 		break;
1423 	}
1424 }
1425 
1426 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1427 					  char *chip_name)
1428 {
1429 	int err;
1430 
1431 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1432 				   "amdgpu/%s_pfp.bin", chip_name);
1433 	if (err)
1434 		goto out;
1435 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1436 
1437 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1438 				   "amdgpu/%s_me.bin", chip_name);
1439 	if (err)
1440 		goto out;
1441 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1442 
1443 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1444 				   "amdgpu/%s_ce.bin", chip_name);
1445 	if (err)
1446 		goto out;
1447 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1448 
1449 out:
1450 	if (err) {
1451 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1452 		amdgpu_ucode_release(&adev->gfx.me_fw);
1453 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1454 	}
1455 	return err;
1456 }
1457 
1458 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1459 				       char *chip_name)
1460 {
1461 	int err;
1462 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1463 	uint16_t version_major;
1464 	uint16_t version_minor;
1465 	uint32_t smu_version;
1466 
1467 	/*
1468 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1469 	 * instead of picasso_rlc.bin.
1470 	 * Judgment method:
1471 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1472 	 *          or revision >= 0xD8 && revision <= 0xDF
1473 	 * otherwise is PCO FP5
1474 	 */
1475 	if (!strcmp(chip_name, "picasso") &&
1476 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1477 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1478 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1479 					   "amdgpu/%s_rlc_am4.bin", chip_name);
1480 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1481 		(smu_version >= 0x41e2b))
1482 		/**
1483 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1484 		*/
1485 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1486 					   "amdgpu/%s_kicker_rlc.bin", chip_name);
1487 	else
1488 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1489 					   "amdgpu/%s_rlc.bin", chip_name);
1490 	if (err)
1491 		goto out;
1492 
1493 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1494 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1495 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1496 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1497 out:
1498 	if (err)
1499 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1500 
1501 	return err;
1502 }
1503 
1504 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1505 {
1506 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1507 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1508 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1509 		return false;
1510 
1511 	return true;
1512 }
1513 
1514 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1515 					      char *chip_name)
1516 {
1517 	int err;
1518 
1519 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1520 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1521 					   "amdgpu/%s_sjt_mec.bin", chip_name);
1522 	else
1523 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1524 					   "amdgpu/%s_mec.bin", chip_name);
1525 	if (err)
1526 		goto out;
1527 
1528 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1529 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1530 
1531 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1532 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1533 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1534 						   "amdgpu/%s_sjt_mec2.bin", chip_name);
1535 		else
1536 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1537 						   "amdgpu/%s_mec2.bin", chip_name);
1538 		if (!err) {
1539 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1540 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1541 		} else {
1542 			err = 0;
1543 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1544 		}
1545 	} else {
1546 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1547 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1548 	}
1549 
1550 	gfx_v9_0_check_if_need_gfxoff(adev);
1551 	gfx_v9_0_check_fw_write_wait(adev);
1552 
1553 out:
1554 	if (err)
1555 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1556 	return err;
1557 }
1558 
1559 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1560 {
1561 	char ucode_prefix[30];
1562 	int r;
1563 
1564 	DRM_DEBUG("\n");
1565 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1566 
1567 	/* No CPG in Arcturus */
1568 	if (adev->gfx.num_gfx_rings) {
1569 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1570 		if (r)
1571 			return r;
1572 	}
1573 
1574 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1575 	if (r)
1576 		return r;
1577 
1578 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1579 	if (r)
1580 		return r;
1581 
1582 	return r;
1583 }
1584 
1585 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1586 {
1587 	u32 count = 0;
1588 	const struct cs_section_def *sect = NULL;
1589 	const struct cs_extent_def *ext = NULL;
1590 
1591 	/* begin clear state */
1592 	count += 2;
1593 	/* context control state */
1594 	count += 3;
1595 
1596 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1597 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1598 			if (sect->id == SECT_CONTEXT)
1599 				count += 2 + ext->reg_count;
1600 			else
1601 				return 0;
1602 		}
1603 	}
1604 
1605 	/* end clear state */
1606 	count += 2;
1607 	/* clear state */
1608 	count += 2;
1609 
1610 	return count;
1611 }
1612 
1613 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1614 				    volatile u32 *buffer)
1615 {
1616 	u32 count = 0, i;
1617 	const struct cs_section_def *sect = NULL;
1618 	const struct cs_extent_def *ext = NULL;
1619 
1620 	if (adev->gfx.rlc.cs_data == NULL)
1621 		return;
1622 	if (buffer == NULL)
1623 		return;
1624 
1625 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1626 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1627 
1628 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1629 	buffer[count++] = cpu_to_le32(0x80000000);
1630 	buffer[count++] = cpu_to_le32(0x80000000);
1631 
1632 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1633 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1634 			if (sect->id == SECT_CONTEXT) {
1635 				buffer[count++] =
1636 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1637 				buffer[count++] = cpu_to_le32(ext->reg_index -
1638 						PACKET3_SET_CONTEXT_REG_START);
1639 				for (i = 0; i < ext->reg_count; i++)
1640 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1641 			} else {
1642 				return;
1643 			}
1644 		}
1645 	}
1646 
1647 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1648 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1649 
1650 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1651 	buffer[count++] = cpu_to_le32(0);
1652 }
1653 
1654 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1655 {
1656 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1657 	uint32_t pg_always_on_cu_num = 2;
1658 	uint32_t always_on_cu_num;
1659 	uint32_t i, j, k;
1660 	uint32_t mask, cu_bitmap, counter;
1661 
1662 	if (adev->flags & AMD_IS_APU)
1663 		always_on_cu_num = 4;
1664 	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1665 		always_on_cu_num = 8;
1666 	else
1667 		always_on_cu_num = 12;
1668 
1669 	mutex_lock(&adev->grbm_idx_mutex);
1670 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1671 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1672 			mask = 1;
1673 			cu_bitmap = 0;
1674 			counter = 0;
1675 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1676 
1677 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1678 				if (cu_info->bitmap[0][i][j] & mask) {
1679 					if (counter == pg_always_on_cu_num)
1680 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1681 					if (counter < always_on_cu_num)
1682 						cu_bitmap |= mask;
1683 					else
1684 						break;
1685 					counter++;
1686 				}
1687 				mask <<= 1;
1688 			}
1689 
1690 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1691 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1692 		}
1693 	}
1694 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1695 	mutex_unlock(&adev->grbm_idx_mutex);
1696 }
1697 
1698 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1699 {
1700 	uint32_t data;
1701 
1702 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1703 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1704 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1705 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1706 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1707 
1708 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1709 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1710 
1711 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1712 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1713 
1714 	mutex_lock(&adev->grbm_idx_mutex);
1715 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1716 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1717 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1718 
1719 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1720 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1721 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1722 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1723 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1724 
1725 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1726 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1727 	data &= 0x0000FFFF;
1728 	data |= 0x00C00000;
1729 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1730 
1731 	/*
1732 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1733 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1734 	 */
1735 
1736 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1737 	 * but used for RLC_LB_CNTL configuration */
1738 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1739 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1740 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1741 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1742 	mutex_unlock(&adev->grbm_idx_mutex);
1743 
1744 	gfx_v9_0_init_always_on_cu_mask(adev);
1745 }
1746 
1747 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1748 {
1749 	uint32_t data;
1750 
1751 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1752 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1753 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1754 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1755 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1756 
1757 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1758 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1759 
1760 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1761 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1762 
1763 	mutex_lock(&adev->grbm_idx_mutex);
1764 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1765 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1766 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1767 
1768 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1769 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1770 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1771 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1772 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1773 
1774 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1775 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1776 	data &= 0x0000FFFF;
1777 	data |= 0x00C00000;
1778 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1779 
1780 	/*
1781 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1782 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1783 	 */
1784 
1785 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1786 	 * but used for RLC_LB_CNTL configuration */
1787 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1788 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1789 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1790 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1791 	mutex_unlock(&adev->grbm_idx_mutex);
1792 
1793 	gfx_v9_0_init_always_on_cu_mask(adev);
1794 }
1795 
1796 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1797 {
1798 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1799 }
1800 
1801 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1802 {
1803 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1804 		return 5;
1805 	else
1806 		return 4;
1807 }
1808 
1809 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1810 {
1811 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1812 
1813 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1814 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1815 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1816 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1817 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1818 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1819 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1820 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1821 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1822 }
1823 
1824 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1825 {
1826 	const struct cs_section_def *cs_data;
1827 	int r;
1828 
1829 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1830 
1831 	cs_data = adev->gfx.rlc.cs_data;
1832 
1833 	if (cs_data) {
1834 		/* init clear state block */
1835 		r = amdgpu_gfx_rlc_init_csb(adev);
1836 		if (r)
1837 			return r;
1838 	}
1839 
1840 	if (adev->flags & AMD_IS_APU) {
1841 		/* TODO: double check the cp_table_size for RV */
1842 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1843 		r = amdgpu_gfx_rlc_init_cpt(adev);
1844 		if (r)
1845 			return r;
1846 	}
1847 
1848 	return 0;
1849 }
1850 
1851 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1852 {
1853 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1854 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1855 }
1856 
1857 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1858 {
1859 	int r;
1860 	u32 *hpd;
1861 	const __le32 *fw_data;
1862 	unsigned fw_size;
1863 	u32 *fw;
1864 	size_t mec_hpd_size;
1865 
1866 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1867 
1868 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1869 
1870 	/* take ownership of the relevant compute queues */
1871 	amdgpu_gfx_compute_queue_acquire(adev);
1872 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1873 	if (mec_hpd_size) {
1874 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1875 					      AMDGPU_GEM_DOMAIN_VRAM |
1876 					      AMDGPU_GEM_DOMAIN_GTT,
1877 					      &adev->gfx.mec.hpd_eop_obj,
1878 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1879 					      (void **)&hpd);
1880 		if (r) {
1881 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1882 			gfx_v9_0_mec_fini(adev);
1883 			return r;
1884 		}
1885 
1886 		memset(hpd, 0, mec_hpd_size);
1887 
1888 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1889 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1890 	}
1891 
1892 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1893 
1894 	fw_data = (const __le32 *)
1895 		(adev->gfx.mec_fw->data +
1896 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1897 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1898 
1899 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1900 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1901 				      &adev->gfx.mec.mec_fw_obj,
1902 				      &adev->gfx.mec.mec_fw_gpu_addr,
1903 				      (void **)&fw);
1904 	if (r) {
1905 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1906 		gfx_v9_0_mec_fini(adev);
1907 		return r;
1908 	}
1909 
1910 	memcpy(fw, fw_data, fw_size);
1911 
1912 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1913 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1914 
1915 	return 0;
1916 }
1917 
1918 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1919 {
1920 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1921 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1922 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1923 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1924 		(SQ_IND_INDEX__FORCE_READ_MASK));
1925 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1926 }
1927 
1928 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1929 			   uint32_t wave, uint32_t thread,
1930 			   uint32_t regno, uint32_t num, uint32_t *out)
1931 {
1932 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1933 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1934 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1935 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1936 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1937 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1938 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1939 	while (num--)
1940 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1941 }
1942 
1943 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1944 {
1945 	/* type 1 wave data */
1946 	dst[(*no_fields)++] = 1;
1947 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1948 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1949 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1950 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1951 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1952 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1953 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1954 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1955 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1956 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1957 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1958 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1959 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1960 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1961 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1962 }
1963 
1964 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1965 				     uint32_t wave, uint32_t start,
1966 				     uint32_t size, uint32_t *dst)
1967 {
1968 	wave_read_regs(
1969 		adev, simd, wave, 0,
1970 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1971 }
1972 
1973 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1974 				     uint32_t wave, uint32_t thread,
1975 				     uint32_t start, uint32_t size,
1976 				     uint32_t *dst)
1977 {
1978 	wave_read_regs(
1979 		adev, simd, wave, thread,
1980 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1981 }
1982 
1983 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1984 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1985 {
1986 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1987 }
1988 
1989 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1990         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1991         .select_se_sh = &gfx_v9_0_select_se_sh,
1992         .read_wave_data = &gfx_v9_0_read_wave_data,
1993         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1994         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1995         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1996 };
1997 
1998 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1999 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
2000 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2001 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2002 };
2003 
2004 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2005 	.ras_block = {
2006 		.hw_ops = &gfx_v9_0_ras_ops,
2007 	},
2008 };
2009 
2010 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2011 {
2012 	u32 gb_addr_config;
2013 	int err;
2014 
2015 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2016 	case IP_VERSION(9, 0, 1):
2017 		adev->gfx.config.max_hw_contexts = 8;
2018 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2019 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2020 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2021 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2022 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2023 		break;
2024 	case IP_VERSION(9, 2, 1):
2025 		adev->gfx.config.max_hw_contexts = 8;
2026 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2029 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2031 		DRM_INFO("fix gfx.config for vega12\n");
2032 		break;
2033 	case IP_VERSION(9, 4, 0):
2034 		adev->gfx.ras = &gfx_v9_0_ras;
2035 		adev->gfx.config.max_hw_contexts = 8;
2036 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041 		gb_addr_config &= ~0xf3e777ff;
2042 		gb_addr_config |= 0x22014042;
2043 		/* check vbios table if gpu info is not available */
2044 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2045 		if (err)
2046 			return err;
2047 		break;
2048 	case IP_VERSION(9, 2, 2):
2049 	case IP_VERSION(9, 1, 0):
2050 		adev->gfx.config.max_hw_contexts = 8;
2051 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2052 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2053 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2054 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2055 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2056 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2057 		else
2058 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2059 		break;
2060 	case IP_VERSION(9, 4, 1):
2061 		adev->gfx.ras = &gfx_v9_4_ras;
2062 		adev->gfx.config.max_hw_contexts = 8;
2063 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2064 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2065 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2066 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2067 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2068 		gb_addr_config &= ~0xf3e777ff;
2069 		gb_addr_config |= 0x22014042;
2070 		break;
2071 	case IP_VERSION(9, 3, 0):
2072 		adev->gfx.config.max_hw_contexts = 8;
2073 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2074 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2075 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2076 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2077 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2078 		gb_addr_config &= ~0xf3e777ff;
2079 		gb_addr_config |= 0x22010042;
2080 		break;
2081 	case IP_VERSION(9, 4, 2):
2082 		adev->gfx.ras = &gfx_v9_4_2_ras;
2083 		adev->gfx.config.max_hw_contexts = 8;
2084 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2085 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2086 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2087 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2088 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2089 		gb_addr_config &= ~0xf3e777ff;
2090 		gb_addr_config |= 0x22014042;
2091 		/* check vbios table if gpu info is not available */
2092 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2093 		if (err)
2094 			return err;
2095 		break;
2096 	default:
2097 		BUG();
2098 		break;
2099 	}
2100 
2101 	adev->gfx.config.gb_addr_config = gb_addr_config;
2102 
2103 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2104 			REG_GET_FIELD(
2105 					adev->gfx.config.gb_addr_config,
2106 					GB_ADDR_CONFIG,
2107 					NUM_PIPES);
2108 
2109 	adev->gfx.config.max_tile_pipes =
2110 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2111 
2112 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2113 			REG_GET_FIELD(
2114 					adev->gfx.config.gb_addr_config,
2115 					GB_ADDR_CONFIG,
2116 					NUM_BANKS);
2117 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2118 			REG_GET_FIELD(
2119 					adev->gfx.config.gb_addr_config,
2120 					GB_ADDR_CONFIG,
2121 					MAX_COMPRESSED_FRAGS);
2122 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2123 			REG_GET_FIELD(
2124 					adev->gfx.config.gb_addr_config,
2125 					GB_ADDR_CONFIG,
2126 					NUM_RB_PER_SE);
2127 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2128 			REG_GET_FIELD(
2129 					adev->gfx.config.gb_addr_config,
2130 					GB_ADDR_CONFIG,
2131 					NUM_SHADER_ENGINES);
2132 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2133 			REG_GET_FIELD(
2134 					adev->gfx.config.gb_addr_config,
2135 					GB_ADDR_CONFIG,
2136 					PIPE_INTERLEAVE_SIZE));
2137 
2138 	return 0;
2139 }
2140 
2141 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2142 				      int mec, int pipe, int queue)
2143 {
2144 	unsigned irq_type;
2145 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2146 	unsigned int hw_prio;
2147 
2148 	ring = &adev->gfx.compute_ring[ring_id];
2149 
2150 	/* mec0 is me1 */
2151 	ring->me = mec + 1;
2152 	ring->pipe = pipe;
2153 	ring->queue = queue;
2154 
2155 	ring->ring_obj = NULL;
2156 	ring->use_doorbell = true;
2157 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2158 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2159 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2160 	ring->vm_hub = AMDGPU_GFXHUB(0);
2161 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2162 
2163 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2164 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2165 		+ ring->pipe;
2166 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2167 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2168 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2169 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2170 				hw_prio, NULL);
2171 }
2172 
2173 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2174 {
2175 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2176 	uint32_t *ptr;
2177 	uint32_t inst;
2178 
2179 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2180 	if (!ptr) {
2181 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2182 		adev->gfx.ip_dump_core = NULL;
2183 	} else {
2184 		adev->gfx.ip_dump_core = ptr;
2185 	}
2186 
2187 	/* Allocate memory for compute queue registers for all the instances */
2188 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2189 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2190 		adev->gfx.mec.num_queue_per_pipe;
2191 
2192 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2193 	if (!ptr) {
2194 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2195 		adev->gfx.ip_dump_compute_queues = NULL;
2196 	} else {
2197 		adev->gfx.ip_dump_compute_queues = ptr;
2198 	}
2199 }
2200 
2201 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2202 {
2203 	int i, j, k, r, ring_id;
2204 	int xcc_id = 0;
2205 	struct amdgpu_ring *ring;
2206 	struct amdgpu_device *adev = ip_block->adev;
2207 	unsigned int hw_prio;
2208 
2209 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2210 	case IP_VERSION(9, 0, 1):
2211 	case IP_VERSION(9, 2, 1):
2212 	case IP_VERSION(9, 4, 0):
2213 	case IP_VERSION(9, 2, 2):
2214 	case IP_VERSION(9, 1, 0):
2215 	case IP_VERSION(9, 4, 1):
2216 	case IP_VERSION(9, 3, 0):
2217 	case IP_VERSION(9, 4, 2):
2218 		adev->gfx.mec.num_mec = 2;
2219 		break;
2220 	default:
2221 		adev->gfx.mec.num_mec = 1;
2222 		break;
2223 	}
2224 
2225 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2226 	case IP_VERSION(9, 4, 2):
2227 		adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2228 		adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2229 		if (adev->gfx.mec_fw_version >= 88) {
2230 			adev->gfx.enable_cleaner_shader = true;
2231 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2232 			if (r) {
2233 				adev->gfx.enable_cleaner_shader = false;
2234 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2235 			}
2236 		}
2237 		break;
2238 	default:
2239 		adev->gfx.enable_cleaner_shader = false;
2240 		break;
2241 	}
2242 
2243 	adev->gfx.mec.num_pipe_per_mec = 4;
2244 	adev->gfx.mec.num_queue_per_pipe = 8;
2245 
2246 	/* EOP Event */
2247 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2248 	if (r)
2249 		return r;
2250 
2251 	/* Bad opcode Event */
2252 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2253 			      GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2254 			      &adev->gfx.bad_op_irq);
2255 	if (r)
2256 		return r;
2257 
2258 	/* Privileged reg */
2259 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2260 			      &adev->gfx.priv_reg_irq);
2261 	if (r)
2262 		return r;
2263 
2264 	/* Privileged inst */
2265 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2266 			      &adev->gfx.priv_inst_irq);
2267 	if (r)
2268 		return r;
2269 
2270 	/* ECC error */
2271 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2272 			      &adev->gfx.cp_ecc_error_irq);
2273 	if (r)
2274 		return r;
2275 
2276 	/* FUE error */
2277 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2278 			      &adev->gfx.cp_ecc_error_irq);
2279 	if (r)
2280 		return r;
2281 
2282 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2283 
2284 	if (adev->gfx.rlc.funcs) {
2285 		if (adev->gfx.rlc.funcs->init) {
2286 			r = adev->gfx.rlc.funcs->init(adev);
2287 			if (r) {
2288 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2289 				return r;
2290 			}
2291 		}
2292 	}
2293 
2294 	r = gfx_v9_0_mec_init(adev);
2295 	if (r) {
2296 		DRM_ERROR("Failed to init MEC BOs!\n");
2297 		return r;
2298 	}
2299 
2300 	/* set up the gfx ring */
2301 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2302 		ring = &adev->gfx.gfx_ring[i];
2303 		ring->ring_obj = NULL;
2304 		if (!i)
2305 			sprintf(ring->name, "gfx");
2306 		else
2307 			sprintf(ring->name, "gfx_%d", i);
2308 		ring->use_doorbell = true;
2309 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2310 
2311 		/* disable scheduler on the real ring */
2312 		ring->no_scheduler = adev->gfx.mcbp;
2313 		ring->vm_hub = AMDGPU_GFXHUB(0);
2314 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2315 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2316 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2317 		if (r)
2318 			return r;
2319 	}
2320 
2321 	/* set up the software rings */
2322 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2323 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2324 			ring = &adev->gfx.sw_gfx_ring[i];
2325 			ring->ring_obj = NULL;
2326 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2327 			ring->use_doorbell = true;
2328 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2329 			ring->is_sw_ring = true;
2330 			hw_prio = amdgpu_sw_ring_priority(i);
2331 			ring->vm_hub = AMDGPU_GFXHUB(0);
2332 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2333 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2334 					     NULL);
2335 			if (r)
2336 				return r;
2337 			ring->wptr = 0;
2338 		}
2339 
2340 		/* init the muxer and add software rings */
2341 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2342 					 GFX9_NUM_SW_GFX_RINGS);
2343 		if (r) {
2344 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2345 			return r;
2346 		}
2347 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2348 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2349 							&adev->gfx.sw_gfx_ring[i]);
2350 			if (r) {
2351 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2352 				return r;
2353 			}
2354 		}
2355 	}
2356 
2357 	/* set up the compute queues - allocate horizontally across pipes */
2358 	ring_id = 0;
2359 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2360 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2361 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2362 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2363 								     k, j))
2364 					continue;
2365 
2366 				r = gfx_v9_0_compute_ring_init(adev,
2367 							       ring_id,
2368 							       i, k, j);
2369 				if (r)
2370 					return r;
2371 
2372 				ring_id++;
2373 			}
2374 		}
2375 	}
2376 
2377 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2378 	if (r) {
2379 		DRM_ERROR("Failed to init KIQ BOs!\n");
2380 		return r;
2381 	}
2382 
2383 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2384 	if (r)
2385 		return r;
2386 
2387 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2388 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2389 	if (r)
2390 		return r;
2391 
2392 	adev->gfx.ce_ram_size = 0x8000;
2393 
2394 	r = gfx_v9_0_gpu_early_init(adev);
2395 	if (r)
2396 		return r;
2397 
2398 	if (amdgpu_gfx_ras_sw_init(adev)) {
2399 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2400 		return -EINVAL;
2401 	}
2402 
2403 	gfx_v9_0_alloc_ip_dump(adev);
2404 
2405 	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
2406 	if (r)
2407 		return r;
2408 
2409 	return 0;
2410 }
2411 
2412 
2413 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2414 {
2415 	int i;
2416 	struct amdgpu_device *adev = ip_block->adev;
2417 
2418 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2419 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2420 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2421 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2422 	}
2423 
2424 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2425 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2426 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2427 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2428 
2429 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2430 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2431 	amdgpu_gfx_kiq_fini(adev, 0);
2432 
2433 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
2434 
2435 	gfx_v9_0_mec_fini(adev);
2436 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2437 				&adev->gfx.rlc.clear_state_gpu_addr,
2438 				(void **)&adev->gfx.rlc.cs_ptr);
2439 	if (adev->flags & AMD_IS_APU) {
2440 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2441 				&adev->gfx.rlc.cp_table_gpu_addr,
2442 				(void **)&adev->gfx.rlc.cp_table_ptr);
2443 	}
2444 	gfx_v9_0_free_microcode(adev);
2445 
2446 	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
2447 
2448 	kfree(adev->gfx.ip_dump_core);
2449 	kfree(adev->gfx.ip_dump_compute_queues);
2450 
2451 	return 0;
2452 }
2453 
2454 
2455 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2456 {
2457 	/* TODO */
2458 }
2459 
2460 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2461 			   u32 instance, int xcc_id)
2462 {
2463 	u32 data;
2464 
2465 	if (instance == 0xffffffff)
2466 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2467 	else
2468 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2469 
2470 	if (se_num == 0xffffffff)
2471 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2472 	else
2473 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2474 
2475 	if (sh_num == 0xffffffff)
2476 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2477 	else
2478 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2479 
2480 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2481 }
2482 
2483 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2484 {
2485 	u32 data, mask;
2486 
2487 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2488 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2489 
2490 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2491 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2492 
2493 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2494 					 adev->gfx.config.max_sh_per_se);
2495 
2496 	return (~data) & mask;
2497 }
2498 
2499 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2500 {
2501 	int i, j;
2502 	u32 data;
2503 	u32 active_rbs = 0;
2504 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2505 					adev->gfx.config.max_sh_per_se;
2506 
2507 	mutex_lock(&adev->grbm_idx_mutex);
2508 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2509 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2510 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2511 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2512 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2513 					       rb_bitmap_width_per_sh);
2514 		}
2515 	}
2516 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2517 	mutex_unlock(&adev->grbm_idx_mutex);
2518 
2519 	adev->gfx.config.backend_enable_mask = active_rbs;
2520 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2521 }
2522 
2523 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2524 				uint32_t first_vmid,
2525 				uint32_t last_vmid)
2526 {
2527 	uint32_t data;
2528 	uint32_t trap_config_vmid_mask = 0;
2529 	int i;
2530 
2531 	/* Calculate trap config vmid mask */
2532 	for (i = first_vmid; i < last_vmid; i++)
2533 		trap_config_vmid_mask |= (1 << i);
2534 
2535 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2536 			VMID_SEL, trap_config_vmid_mask);
2537 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2538 			TRAP_EN, 1);
2539 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2540 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2541 
2542 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2543 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2544 }
2545 
2546 #define DEFAULT_SH_MEM_BASES	(0x6000)
2547 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2548 {
2549 	int i;
2550 	uint32_t sh_mem_config;
2551 	uint32_t sh_mem_bases;
2552 
2553 	/*
2554 	 * Configure apertures:
2555 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2556 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2557 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2558 	 */
2559 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2560 
2561 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2562 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2563 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2564 
2565 	mutex_lock(&adev->srbm_mutex);
2566 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2567 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2568 		/* CP and shaders */
2569 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2570 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2571 	}
2572 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2573 	mutex_unlock(&adev->srbm_mutex);
2574 
2575 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2576 	   access. These should be enabled by FW for target VMIDs. */
2577 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2578 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2579 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2580 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2581 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2582 	}
2583 }
2584 
2585 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2586 {
2587 	int vmid;
2588 
2589 	/*
2590 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2591 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2592 	 * the driver can enable them for graphics. VMID0 should maintain
2593 	 * access so that HWS firmware can save/restore entries.
2594 	 */
2595 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2596 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2597 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2598 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2599 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2600 	}
2601 }
2602 
2603 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2604 {
2605 	uint32_t tmp;
2606 
2607 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2608 	case IP_VERSION(9, 4, 1):
2609 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2610 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2611 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2612 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2613 		break;
2614 	default:
2615 		break;
2616 	}
2617 }
2618 
2619 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2620 {
2621 	u32 tmp;
2622 	int i;
2623 
2624 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2625 
2626 	gfx_v9_0_tiling_mode_table_init(adev);
2627 
2628 	if (adev->gfx.num_gfx_rings)
2629 		gfx_v9_0_setup_rb(adev);
2630 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2631 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2632 
2633 	/* XXX SH_MEM regs */
2634 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2635 	mutex_lock(&adev->srbm_mutex);
2636 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2637 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2638 		/* CP and shaders */
2639 		if (i == 0) {
2640 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2641 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2642 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2643 					    !!adev->gmc.noretry);
2644 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2645 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2646 		} else {
2647 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2648 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2649 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2650 					    !!adev->gmc.noretry);
2651 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2652 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2653 				(adev->gmc.private_aperture_start >> 48));
2654 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2655 				(adev->gmc.shared_aperture_start >> 48));
2656 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2657 		}
2658 	}
2659 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2660 
2661 	mutex_unlock(&adev->srbm_mutex);
2662 
2663 	gfx_v9_0_init_compute_vmid(adev);
2664 	gfx_v9_0_init_gds_vmid(adev);
2665 	gfx_v9_0_init_sq_config(adev);
2666 }
2667 
2668 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2669 {
2670 	u32 i, j, k;
2671 	u32 mask;
2672 
2673 	mutex_lock(&adev->grbm_idx_mutex);
2674 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2675 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2676 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2677 			for (k = 0; k < adev->usec_timeout; k++) {
2678 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2679 					break;
2680 				udelay(1);
2681 			}
2682 			if (k == adev->usec_timeout) {
2683 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2684 						      0xffffffff, 0xffffffff, 0);
2685 				mutex_unlock(&adev->grbm_idx_mutex);
2686 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2687 					 i, j);
2688 				return;
2689 			}
2690 		}
2691 	}
2692 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2693 	mutex_unlock(&adev->grbm_idx_mutex);
2694 
2695 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2696 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2697 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2698 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2699 	for (k = 0; k < adev->usec_timeout; k++) {
2700 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2701 			break;
2702 		udelay(1);
2703 	}
2704 }
2705 
2706 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2707 					       bool enable)
2708 {
2709 	u32 tmp;
2710 
2711 	/* These interrupts should be enabled to drive DS clock */
2712 
2713 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2714 
2715 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2716 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2717 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2718 	if (adev->gfx.num_gfx_rings)
2719 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2720 
2721 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2722 }
2723 
2724 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2725 {
2726 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2727 	/* csib */
2728 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2729 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2730 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2731 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2732 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2733 			adev->gfx.rlc.clear_state_size);
2734 }
2735 
2736 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2737 				int indirect_offset,
2738 				int list_size,
2739 				int *unique_indirect_regs,
2740 				int unique_indirect_reg_count,
2741 				int *indirect_start_offsets,
2742 				int *indirect_start_offsets_count,
2743 				int max_start_offsets_count)
2744 {
2745 	int idx;
2746 
2747 	for (; indirect_offset < list_size; indirect_offset++) {
2748 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2749 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2750 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2751 
2752 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2753 			indirect_offset += 2;
2754 
2755 			/* look for the matching indice */
2756 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2757 				if (unique_indirect_regs[idx] ==
2758 					register_list_format[indirect_offset] ||
2759 					!unique_indirect_regs[idx])
2760 					break;
2761 			}
2762 
2763 			BUG_ON(idx >= unique_indirect_reg_count);
2764 
2765 			if (!unique_indirect_regs[idx])
2766 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2767 
2768 			indirect_offset++;
2769 		}
2770 	}
2771 }
2772 
2773 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2774 {
2775 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2776 	int unique_indirect_reg_count = 0;
2777 
2778 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2779 	int indirect_start_offsets_count = 0;
2780 
2781 	int list_size = 0;
2782 	int i = 0, j = 0;
2783 	u32 tmp = 0;
2784 
2785 	u32 *register_list_format =
2786 		kmemdup(adev->gfx.rlc.register_list_format,
2787 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2788 	if (!register_list_format)
2789 		return -ENOMEM;
2790 
2791 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2792 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2793 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2794 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2795 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2796 				    unique_indirect_regs,
2797 				    unique_indirect_reg_count,
2798 				    indirect_start_offsets,
2799 				    &indirect_start_offsets_count,
2800 				    ARRAY_SIZE(indirect_start_offsets));
2801 
2802 	/* enable auto inc in case it is disabled */
2803 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2804 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2805 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2806 
2807 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2808 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2809 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2810 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2811 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2812 			adev->gfx.rlc.register_restore[i]);
2813 
2814 	/* load indirect register */
2815 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2816 		adev->gfx.rlc.reg_list_format_start);
2817 
2818 	/* direct register portion */
2819 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2820 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2821 			register_list_format[i]);
2822 
2823 	/* indirect register portion */
2824 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2825 		if (register_list_format[i] == 0xFFFFFFFF) {
2826 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2827 			continue;
2828 		}
2829 
2830 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2831 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2832 
2833 		for (j = 0; j < unique_indirect_reg_count; j++) {
2834 			if (register_list_format[i] == unique_indirect_regs[j]) {
2835 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2836 				break;
2837 			}
2838 		}
2839 
2840 		BUG_ON(j >= unique_indirect_reg_count);
2841 
2842 		i++;
2843 	}
2844 
2845 	/* set save/restore list size */
2846 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2847 	list_size = list_size >> 1;
2848 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2849 		adev->gfx.rlc.reg_restore_list_size);
2850 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2851 
2852 	/* write the starting offsets to RLC scratch ram */
2853 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2854 		adev->gfx.rlc.starting_offsets_start);
2855 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2856 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2857 		       indirect_start_offsets[i]);
2858 
2859 	/* load unique indirect regs*/
2860 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2861 		if (unique_indirect_regs[i] != 0) {
2862 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2863 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2864 			       unique_indirect_regs[i] & 0x3FFFF);
2865 
2866 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2867 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2868 			       unique_indirect_regs[i] >> 20);
2869 		}
2870 	}
2871 
2872 	kfree(register_list_format);
2873 	return 0;
2874 }
2875 
2876 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2877 {
2878 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2879 }
2880 
2881 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2882 					     bool enable)
2883 {
2884 	uint32_t data = 0;
2885 	uint32_t default_data = 0;
2886 
2887 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2888 	if (enable) {
2889 		/* enable GFXIP control over CGPG */
2890 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2891 		if(default_data != data)
2892 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2893 
2894 		/* update status */
2895 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2896 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2897 		if(default_data != data)
2898 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2899 	} else {
2900 		/* restore GFXIP control over GCPG */
2901 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2902 		if(default_data != data)
2903 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2904 	}
2905 }
2906 
2907 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2908 {
2909 	uint32_t data = 0;
2910 
2911 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2912 			      AMD_PG_SUPPORT_GFX_SMG |
2913 			      AMD_PG_SUPPORT_GFX_DMG)) {
2914 		/* init IDLE_POLL_COUNT = 60 */
2915 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2916 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2917 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2918 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2919 
2920 		/* init RLC PG Delay */
2921 		data = 0;
2922 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2923 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2924 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2925 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2926 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2927 
2928 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2929 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2930 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2931 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2932 
2933 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2934 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2935 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2936 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2937 
2938 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2939 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2940 
2941 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2942 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2943 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2944 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2945 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2946 	}
2947 }
2948 
2949 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2950 						bool enable)
2951 {
2952 	uint32_t data = 0;
2953 	uint32_t default_data = 0;
2954 
2955 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2956 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2957 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2958 			     enable ? 1 : 0);
2959 	if (default_data != data)
2960 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2961 }
2962 
2963 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2964 						bool enable)
2965 {
2966 	uint32_t data = 0;
2967 	uint32_t default_data = 0;
2968 
2969 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2970 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2971 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2972 			     enable ? 1 : 0);
2973 	if(default_data != data)
2974 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2975 }
2976 
2977 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2978 					bool enable)
2979 {
2980 	uint32_t data = 0;
2981 	uint32_t default_data = 0;
2982 
2983 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2984 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2985 			     CP_PG_DISABLE,
2986 			     enable ? 0 : 1);
2987 	if(default_data != data)
2988 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2989 }
2990 
2991 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2992 						bool enable)
2993 {
2994 	uint32_t data, default_data;
2995 
2996 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2997 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2998 			     GFX_POWER_GATING_ENABLE,
2999 			     enable ? 1 : 0);
3000 	if(default_data != data)
3001 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3002 }
3003 
3004 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3005 						bool enable)
3006 {
3007 	uint32_t data, default_data;
3008 
3009 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3010 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3011 			     GFX_PIPELINE_PG_ENABLE,
3012 			     enable ? 1 : 0);
3013 	if(default_data != data)
3014 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3015 
3016 	if (!enable)
3017 		/* read any GFX register to wake up GFX */
3018 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3019 }
3020 
3021 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3022 						       bool enable)
3023 {
3024 	uint32_t data, default_data;
3025 
3026 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3027 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3028 			     STATIC_PER_CU_PG_ENABLE,
3029 			     enable ? 1 : 0);
3030 	if(default_data != data)
3031 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3032 }
3033 
3034 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3035 						bool enable)
3036 {
3037 	uint32_t data, default_data;
3038 
3039 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3040 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3041 			     DYN_PER_CU_PG_ENABLE,
3042 			     enable ? 1 : 0);
3043 	if(default_data != data)
3044 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3045 }
3046 
3047 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3048 {
3049 	gfx_v9_0_init_csb(adev);
3050 
3051 	/*
3052 	 * Rlc save restore list is workable since v2_1.
3053 	 * And it's needed by gfxoff feature.
3054 	 */
3055 	if (adev->gfx.rlc.is_rlc_v2_1) {
3056 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3057 			    IP_VERSION(9, 2, 1) ||
3058 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3059 			gfx_v9_1_init_rlc_save_restore_list(adev);
3060 		gfx_v9_0_enable_save_restore_machine(adev);
3061 	}
3062 
3063 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3064 			      AMD_PG_SUPPORT_GFX_SMG |
3065 			      AMD_PG_SUPPORT_GFX_DMG |
3066 			      AMD_PG_SUPPORT_CP |
3067 			      AMD_PG_SUPPORT_GDS |
3068 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3069 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3070 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3071 		gfx_v9_0_init_gfx_power_gating(adev);
3072 	}
3073 }
3074 
3075 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3076 {
3077 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3078 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3079 	gfx_v9_0_wait_for_rlc_serdes(adev);
3080 }
3081 
3082 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3083 {
3084 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3085 	udelay(50);
3086 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3087 	udelay(50);
3088 }
3089 
3090 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3091 {
3092 #ifdef AMDGPU_RLC_DEBUG_RETRY
3093 	u32 rlc_ucode_ver;
3094 #endif
3095 
3096 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3097 	udelay(50);
3098 
3099 	/* carrizo do enable cp interrupt after cp inited */
3100 	if (!(adev->flags & AMD_IS_APU)) {
3101 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3102 		udelay(50);
3103 	}
3104 
3105 #ifdef AMDGPU_RLC_DEBUG_RETRY
3106 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3107 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3108 	if(rlc_ucode_ver == 0x108) {
3109 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3110 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3111 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3112 		 * default is 0x9C4 to create a 100us interval */
3113 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3114 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3115 		 * to disable the page fault retry interrupts, default is
3116 		 * 0x100 (256) */
3117 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3118 	}
3119 #endif
3120 }
3121 
3122 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3123 {
3124 	const struct rlc_firmware_header_v2_0 *hdr;
3125 	const __le32 *fw_data;
3126 	unsigned i, fw_size;
3127 
3128 	if (!adev->gfx.rlc_fw)
3129 		return -EINVAL;
3130 
3131 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3132 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3133 
3134 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3135 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3136 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3137 
3138 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3139 			RLCG_UCODE_LOADING_START_ADDRESS);
3140 	for (i = 0; i < fw_size; i++)
3141 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3142 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3143 
3144 	return 0;
3145 }
3146 
3147 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3148 {
3149 	int r;
3150 
3151 	if (amdgpu_sriov_vf(adev)) {
3152 		gfx_v9_0_init_csb(adev);
3153 		return 0;
3154 	}
3155 
3156 	adev->gfx.rlc.funcs->stop(adev);
3157 
3158 	/* disable CG */
3159 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3160 
3161 	gfx_v9_0_init_pg(adev);
3162 
3163 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3164 		/* legacy rlc firmware loading */
3165 		r = gfx_v9_0_rlc_load_microcode(adev);
3166 		if (r)
3167 			return r;
3168 	}
3169 
3170 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3171 	case IP_VERSION(9, 2, 2):
3172 	case IP_VERSION(9, 1, 0):
3173 		gfx_v9_0_init_lbpw(adev);
3174 		if (amdgpu_lbpw == 0)
3175 			gfx_v9_0_enable_lbpw(adev, false);
3176 		else
3177 			gfx_v9_0_enable_lbpw(adev, true);
3178 		break;
3179 	case IP_VERSION(9, 4, 0):
3180 		gfx_v9_4_init_lbpw(adev);
3181 		if (amdgpu_lbpw > 0)
3182 			gfx_v9_0_enable_lbpw(adev, true);
3183 		else
3184 			gfx_v9_0_enable_lbpw(adev, false);
3185 		break;
3186 	default:
3187 		break;
3188 	}
3189 
3190 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3191 
3192 	adev->gfx.rlc.funcs->start(adev);
3193 
3194 	return 0;
3195 }
3196 
3197 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3198 {
3199 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3200 
3201 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3202 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3203 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3204 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3205 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3206 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3207 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3208 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3209 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3210 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3211 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3212 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3213 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3214 	udelay(50);
3215 }
3216 
3217 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3218 {
3219 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3220 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3221 	const struct gfx_firmware_header_v1_0 *me_hdr;
3222 	const __le32 *fw_data;
3223 	unsigned i, fw_size;
3224 
3225 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3226 		return -EINVAL;
3227 
3228 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3229 		adev->gfx.pfp_fw->data;
3230 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3231 		adev->gfx.ce_fw->data;
3232 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3233 		adev->gfx.me_fw->data;
3234 
3235 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3236 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3237 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3238 
3239 	gfx_v9_0_cp_gfx_enable(adev, false);
3240 
3241 	/* PFP */
3242 	fw_data = (const __le32 *)
3243 		(adev->gfx.pfp_fw->data +
3244 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3245 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3246 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3247 	for (i = 0; i < fw_size; i++)
3248 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3249 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3250 
3251 	/* CE */
3252 	fw_data = (const __le32 *)
3253 		(adev->gfx.ce_fw->data +
3254 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3255 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3256 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3257 	for (i = 0; i < fw_size; i++)
3258 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3259 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3260 
3261 	/* ME */
3262 	fw_data = (const __le32 *)
3263 		(adev->gfx.me_fw->data +
3264 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3265 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3266 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3267 	for (i = 0; i < fw_size; i++)
3268 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3269 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3270 
3271 	return 0;
3272 }
3273 
3274 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3275 {
3276 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3277 	const struct cs_section_def *sect = NULL;
3278 	const struct cs_extent_def *ext = NULL;
3279 	int r, i, tmp;
3280 
3281 	/* init the CP */
3282 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3283 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3284 
3285 	gfx_v9_0_cp_gfx_enable(adev, true);
3286 
3287 	/* Now only limit the quirk on the APU gfx9 series and already
3288 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3289 	 */
3290 	if (adev->flags & AMD_IS_APU &&
3291 			adev->in_s3 && !adev->suspend_complete) {
3292 		DRM_INFO(" Will skip the CSB packet resubmit\n");
3293 		return 0;
3294 	}
3295 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3296 	if (r) {
3297 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3298 		return r;
3299 	}
3300 
3301 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3302 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3303 
3304 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3305 	amdgpu_ring_write(ring, 0x80000000);
3306 	amdgpu_ring_write(ring, 0x80000000);
3307 
3308 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3309 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3310 			if (sect->id == SECT_CONTEXT) {
3311 				amdgpu_ring_write(ring,
3312 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3313 					       ext->reg_count));
3314 				amdgpu_ring_write(ring,
3315 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3316 				for (i = 0; i < ext->reg_count; i++)
3317 					amdgpu_ring_write(ring, ext->extent[i]);
3318 			}
3319 		}
3320 	}
3321 
3322 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3323 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3324 
3325 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3326 	amdgpu_ring_write(ring, 0);
3327 
3328 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3329 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3330 	amdgpu_ring_write(ring, 0x8000);
3331 	amdgpu_ring_write(ring, 0x8000);
3332 
3333 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3334 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3335 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3336 	amdgpu_ring_write(ring, tmp);
3337 	amdgpu_ring_write(ring, 0);
3338 
3339 	amdgpu_ring_commit(ring);
3340 
3341 	return 0;
3342 }
3343 
3344 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3345 {
3346 	struct amdgpu_ring *ring;
3347 	u32 tmp;
3348 	u32 rb_bufsz;
3349 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3350 
3351 	/* Set the write pointer delay */
3352 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3353 
3354 	/* set the RB to use vmid 0 */
3355 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3356 
3357 	/* Set ring buffer size */
3358 	ring = &adev->gfx.gfx_ring[0];
3359 	rb_bufsz = order_base_2(ring->ring_size / 8);
3360 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3361 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3362 #ifdef __BIG_ENDIAN
3363 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3364 #endif
3365 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3366 
3367 	/* Initialize the ring buffer's write pointers */
3368 	ring->wptr = 0;
3369 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3370 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3371 
3372 	/* set the wb address whether it's enabled or not */
3373 	rptr_addr = ring->rptr_gpu_addr;
3374 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3375 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3376 
3377 	wptr_gpu_addr = ring->wptr_gpu_addr;
3378 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3379 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3380 
3381 	mdelay(1);
3382 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3383 
3384 	rb_addr = ring->gpu_addr >> 8;
3385 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3386 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3387 
3388 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3389 	if (ring->use_doorbell) {
3390 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3391 				    DOORBELL_OFFSET, ring->doorbell_index);
3392 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3393 				    DOORBELL_EN, 1);
3394 	} else {
3395 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3396 	}
3397 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3398 
3399 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3400 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3401 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3402 
3403 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3404 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3405 
3406 
3407 	/* start the ring */
3408 	gfx_v9_0_cp_gfx_start(adev);
3409 
3410 	return 0;
3411 }
3412 
3413 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3414 {
3415 	if (enable) {
3416 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3417 	} else {
3418 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3419 				 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3420 				  CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3421 				  CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3422 				  CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3423 				  CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3424 				  CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3425 				  CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3426 				  CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3427 				  CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3428 		adev->gfx.kiq[0].ring.sched.ready = false;
3429 	}
3430 	udelay(50);
3431 }
3432 
3433 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3434 {
3435 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3436 	const __le32 *fw_data;
3437 	unsigned i;
3438 	u32 tmp;
3439 
3440 	if (!adev->gfx.mec_fw)
3441 		return -EINVAL;
3442 
3443 	gfx_v9_0_cp_compute_enable(adev, false);
3444 
3445 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3446 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3447 
3448 	fw_data = (const __le32 *)
3449 		(adev->gfx.mec_fw->data +
3450 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3451 	tmp = 0;
3452 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3453 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3454 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3455 
3456 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3457 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3458 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3459 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3460 
3461 	/* MEC1 */
3462 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3463 			 mec_hdr->jt_offset);
3464 	for (i = 0; i < mec_hdr->jt_size; i++)
3465 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3466 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3467 
3468 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3469 			adev->gfx.mec_fw_version);
3470 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3471 
3472 	return 0;
3473 }
3474 
3475 /* KIQ functions */
3476 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3477 {
3478 	uint32_t tmp;
3479 	struct amdgpu_device *adev = ring->adev;
3480 
3481 	/* tell RLC which is KIQ queue */
3482 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3483 	tmp &= 0xffffff00;
3484 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3485 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3486 	tmp |= 0x80;
3487 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3488 }
3489 
3490 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3491 {
3492 	struct amdgpu_device *adev = ring->adev;
3493 
3494 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3495 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3496 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3497 			mqd->cp_hqd_queue_priority =
3498 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3499 		}
3500 	}
3501 }
3502 
3503 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3504 {
3505 	struct amdgpu_device *adev = ring->adev;
3506 	struct v9_mqd *mqd = ring->mqd_ptr;
3507 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3508 	uint32_t tmp;
3509 
3510 	mqd->header = 0xC0310800;
3511 	mqd->compute_pipelinestat_enable = 0x00000001;
3512 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3513 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3514 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3515 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3516 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3517 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3518 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3519 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3520 	mqd->compute_misc_reserved = 0x00000003;
3521 
3522 	mqd->dynamic_cu_mask_addr_lo =
3523 		lower_32_bits(ring->mqd_gpu_addr
3524 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3525 	mqd->dynamic_cu_mask_addr_hi =
3526 		upper_32_bits(ring->mqd_gpu_addr
3527 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3528 
3529 	eop_base_addr = ring->eop_gpu_addr >> 8;
3530 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3531 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3532 
3533 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3534 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3535 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3536 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3537 
3538 	mqd->cp_hqd_eop_control = tmp;
3539 
3540 	/* enable doorbell? */
3541 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3542 
3543 	if (ring->use_doorbell) {
3544 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3545 				    DOORBELL_OFFSET, ring->doorbell_index);
3546 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3547 				    DOORBELL_EN, 1);
3548 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3549 				    DOORBELL_SOURCE, 0);
3550 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3551 				    DOORBELL_HIT, 0);
3552 	} else {
3553 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3554 					 DOORBELL_EN, 0);
3555 	}
3556 
3557 	mqd->cp_hqd_pq_doorbell_control = tmp;
3558 
3559 	/* disable the queue if it's active */
3560 	ring->wptr = 0;
3561 	mqd->cp_hqd_dequeue_request = 0;
3562 	mqd->cp_hqd_pq_rptr = 0;
3563 	mqd->cp_hqd_pq_wptr_lo = 0;
3564 	mqd->cp_hqd_pq_wptr_hi = 0;
3565 
3566 	/* set the pointer to the MQD */
3567 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3568 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3569 
3570 	/* set MQD vmid to 0 */
3571 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3572 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3573 	mqd->cp_mqd_control = tmp;
3574 
3575 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3576 	hqd_gpu_addr = ring->gpu_addr >> 8;
3577 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3578 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3579 
3580 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3581 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3582 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3583 			    (order_base_2(ring->ring_size / 4) - 1));
3584 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3585 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3586 #ifdef __BIG_ENDIAN
3587 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3588 #endif
3589 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3590 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3591 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3592 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3593 	mqd->cp_hqd_pq_control = tmp;
3594 
3595 	/* set the wb address whether it's enabled or not */
3596 	wb_gpu_addr = ring->rptr_gpu_addr;
3597 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3598 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3599 		upper_32_bits(wb_gpu_addr) & 0xffff;
3600 
3601 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3602 	wb_gpu_addr = ring->wptr_gpu_addr;
3603 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3604 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3605 
3606 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3607 	ring->wptr = 0;
3608 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3609 
3610 	/* set the vmid for the queue */
3611 	mqd->cp_hqd_vmid = 0;
3612 
3613 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3614 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3615 	mqd->cp_hqd_persistent_state = tmp;
3616 
3617 	/* set MIN_IB_AVAIL_SIZE */
3618 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3619 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3620 	mqd->cp_hqd_ib_control = tmp;
3621 
3622 	/* set static priority for a queue/ring */
3623 	gfx_v9_0_mqd_set_priority(ring, mqd);
3624 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3625 
3626 	/* map_queues packet doesn't need activate the queue,
3627 	 * so only kiq need set this field.
3628 	 */
3629 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3630 		mqd->cp_hqd_active = 1;
3631 
3632 	return 0;
3633 }
3634 
3635 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3636 {
3637 	struct amdgpu_device *adev = ring->adev;
3638 	struct v9_mqd *mqd = ring->mqd_ptr;
3639 	int j;
3640 
3641 	/* disable wptr polling */
3642 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3643 
3644 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3645 	       mqd->cp_hqd_eop_base_addr_lo);
3646 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3647 	       mqd->cp_hqd_eop_base_addr_hi);
3648 
3649 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3650 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3651 	       mqd->cp_hqd_eop_control);
3652 
3653 	/* enable doorbell? */
3654 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3655 	       mqd->cp_hqd_pq_doorbell_control);
3656 
3657 	/* disable the queue if it's active */
3658 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3659 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3660 		for (j = 0; j < adev->usec_timeout; j++) {
3661 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3662 				break;
3663 			udelay(1);
3664 		}
3665 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3666 		       mqd->cp_hqd_dequeue_request);
3667 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3668 		       mqd->cp_hqd_pq_rptr);
3669 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3670 		       mqd->cp_hqd_pq_wptr_lo);
3671 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3672 		       mqd->cp_hqd_pq_wptr_hi);
3673 	}
3674 
3675 	/* set the pointer to the MQD */
3676 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3677 	       mqd->cp_mqd_base_addr_lo);
3678 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3679 	       mqd->cp_mqd_base_addr_hi);
3680 
3681 	/* set MQD vmid to 0 */
3682 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3683 	       mqd->cp_mqd_control);
3684 
3685 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3686 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3687 	       mqd->cp_hqd_pq_base_lo);
3688 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3689 	       mqd->cp_hqd_pq_base_hi);
3690 
3691 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3692 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3693 	       mqd->cp_hqd_pq_control);
3694 
3695 	/* set the wb address whether it's enabled or not */
3696 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3697 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3698 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3699 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3700 
3701 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3702 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3703 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3704 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3705 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3706 
3707 	/* enable the doorbell if requested */
3708 	if (ring->use_doorbell) {
3709 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3710 					(adev->doorbell_index.kiq * 2) << 2);
3711 		/* If GC has entered CGPG, ringing doorbell > first page
3712 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3713 		 * workaround this issue. And this change has to align with firmware
3714 		 * update.
3715 		 */
3716 		if (check_if_enlarge_doorbell_range(adev))
3717 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3718 					(adev->doorbell.size - 4));
3719 		else
3720 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3721 					(adev->doorbell_index.userqueue_end * 2) << 2);
3722 	}
3723 
3724 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3725 	       mqd->cp_hqd_pq_doorbell_control);
3726 
3727 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3728 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3729 	       mqd->cp_hqd_pq_wptr_lo);
3730 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3731 	       mqd->cp_hqd_pq_wptr_hi);
3732 
3733 	/* set the vmid for the queue */
3734 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3735 
3736 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3737 	       mqd->cp_hqd_persistent_state);
3738 
3739 	/* activate the queue */
3740 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3741 	       mqd->cp_hqd_active);
3742 
3743 	if (ring->use_doorbell)
3744 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3745 
3746 	return 0;
3747 }
3748 
3749 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3750 {
3751 	struct amdgpu_device *adev = ring->adev;
3752 	int j;
3753 
3754 	/* disable the queue if it's active */
3755 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3756 
3757 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3758 
3759 		for (j = 0; j < adev->usec_timeout; j++) {
3760 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3761 				break;
3762 			udelay(1);
3763 		}
3764 
3765 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3766 			DRM_DEBUG("KIQ dequeue request failed.\n");
3767 
3768 			/* Manual disable if dequeue request times out */
3769 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3770 		}
3771 
3772 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3773 		      0);
3774 	}
3775 
3776 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3777 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3778 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3779 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3780 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3781 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3782 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3783 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3784 
3785 	return 0;
3786 }
3787 
3788 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3789 {
3790 	struct amdgpu_device *adev = ring->adev;
3791 	struct v9_mqd *mqd = ring->mqd_ptr;
3792 	struct v9_mqd *tmp_mqd;
3793 
3794 	gfx_v9_0_kiq_setting(ring);
3795 
3796 	/* GPU could be in bad state during probe, driver trigger the reset
3797 	 * after load the SMU, in this case , the mqd is not be initialized.
3798 	 * driver need to re-init the mqd.
3799 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3800 	 */
3801 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3802 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3803 		/* for GPU_RESET case , reset MQD to a clean status */
3804 		if (adev->gfx.kiq[0].mqd_backup)
3805 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3806 
3807 		/* reset ring buffer */
3808 		ring->wptr = 0;
3809 		amdgpu_ring_clear_ring(ring);
3810 
3811 		mutex_lock(&adev->srbm_mutex);
3812 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3813 		gfx_v9_0_kiq_init_register(ring);
3814 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3815 		mutex_unlock(&adev->srbm_mutex);
3816 	} else {
3817 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3818 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3819 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3820 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3821 			amdgpu_ring_clear_ring(ring);
3822 		mutex_lock(&adev->srbm_mutex);
3823 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3824 		gfx_v9_0_mqd_init(ring);
3825 		gfx_v9_0_kiq_init_register(ring);
3826 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3827 		mutex_unlock(&adev->srbm_mutex);
3828 
3829 		if (adev->gfx.kiq[0].mqd_backup)
3830 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3831 	}
3832 
3833 	return 0;
3834 }
3835 
3836 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3837 {
3838 	struct amdgpu_device *adev = ring->adev;
3839 	struct v9_mqd *mqd = ring->mqd_ptr;
3840 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3841 	struct v9_mqd *tmp_mqd;
3842 
3843 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3844 	 * is not be initialized before
3845 	 */
3846 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3847 
3848 	if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3849 	    (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3850 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3851 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3852 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3853 		mutex_lock(&adev->srbm_mutex);
3854 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3855 		gfx_v9_0_mqd_init(ring);
3856 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3857 		mutex_unlock(&adev->srbm_mutex);
3858 
3859 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3860 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3861 	} else {
3862 		/* restore MQD to a clean status */
3863 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3864 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3865 		/* reset ring buffer */
3866 		ring->wptr = 0;
3867 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3868 		amdgpu_ring_clear_ring(ring);
3869 	}
3870 
3871 	return 0;
3872 }
3873 
3874 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3875 {
3876 	struct amdgpu_ring *ring;
3877 	int r;
3878 
3879 	ring = &adev->gfx.kiq[0].ring;
3880 
3881 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3882 	if (unlikely(r != 0))
3883 		return r;
3884 
3885 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3886 	if (unlikely(r != 0)) {
3887 		amdgpu_bo_unreserve(ring->mqd_obj);
3888 		return r;
3889 	}
3890 
3891 	gfx_v9_0_kiq_init_queue(ring);
3892 	amdgpu_bo_kunmap(ring->mqd_obj);
3893 	ring->mqd_ptr = NULL;
3894 	amdgpu_bo_unreserve(ring->mqd_obj);
3895 	return 0;
3896 }
3897 
3898 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3899 {
3900 	struct amdgpu_ring *ring = NULL;
3901 	int r = 0, i;
3902 
3903 	gfx_v9_0_cp_compute_enable(adev, true);
3904 
3905 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3906 		ring = &adev->gfx.compute_ring[i];
3907 
3908 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3909 		if (unlikely(r != 0))
3910 			goto done;
3911 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3912 		if (!r) {
3913 			r = gfx_v9_0_kcq_init_queue(ring, false);
3914 			amdgpu_bo_kunmap(ring->mqd_obj);
3915 			ring->mqd_ptr = NULL;
3916 		}
3917 		amdgpu_bo_unreserve(ring->mqd_obj);
3918 		if (r)
3919 			goto done;
3920 	}
3921 
3922 	r = amdgpu_gfx_enable_kcq(adev, 0);
3923 done:
3924 	return r;
3925 }
3926 
3927 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3928 {
3929 	int r, i;
3930 	struct amdgpu_ring *ring;
3931 
3932 	if (!(adev->flags & AMD_IS_APU))
3933 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3934 
3935 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3936 		if (adev->gfx.num_gfx_rings) {
3937 			/* legacy firmware loading */
3938 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3939 			if (r)
3940 				return r;
3941 		}
3942 
3943 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3944 		if (r)
3945 			return r;
3946 	}
3947 
3948 	if (adev->gfx.num_gfx_rings)
3949 		gfx_v9_0_cp_gfx_enable(adev, false);
3950 	gfx_v9_0_cp_compute_enable(adev, false);
3951 
3952 	r = gfx_v9_0_kiq_resume(adev);
3953 	if (r)
3954 		return r;
3955 
3956 	if (adev->gfx.num_gfx_rings) {
3957 		r = gfx_v9_0_cp_gfx_resume(adev);
3958 		if (r)
3959 			return r;
3960 	}
3961 
3962 	r = gfx_v9_0_kcq_resume(adev);
3963 	if (r)
3964 		return r;
3965 
3966 	if (adev->gfx.num_gfx_rings) {
3967 		ring = &adev->gfx.gfx_ring[0];
3968 		r = amdgpu_ring_test_helper(ring);
3969 		if (r)
3970 			return r;
3971 	}
3972 
3973 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3974 		ring = &adev->gfx.compute_ring[i];
3975 		amdgpu_ring_test_helper(ring);
3976 	}
3977 
3978 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3979 
3980 	return 0;
3981 }
3982 
3983 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3984 {
3985 	u32 tmp;
3986 
3987 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3988 	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3989 		return;
3990 
3991 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3992 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3993 				adev->df.hash_status.hash_64k);
3994 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3995 				adev->df.hash_status.hash_2m);
3996 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3997 				adev->df.hash_status.hash_1g);
3998 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3999 }
4000 
4001 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4002 {
4003 	if (adev->gfx.num_gfx_rings)
4004 		gfx_v9_0_cp_gfx_enable(adev, enable);
4005 	gfx_v9_0_cp_compute_enable(adev, enable);
4006 }
4007 
4008 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4009 {
4010 	int r;
4011 	struct amdgpu_device *adev = ip_block->adev;
4012 
4013 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4014 				       adev->gfx.cleaner_shader_ptr);
4015 
4016 	if (!amdgpu_sriov_vf(adev))
4017 		gfx_v9_0_init_golden_registers(adev);
4018 
4019 	gfx_v9_0_constants_init(adev);
4020 
4021 	gfx_v9_0_init_tcp_config(adev);
4022 
4023 	r = adev->gfx.rlc.funcs->resume(adev);
4024 	if (r)
4025 		return r;
4026 
4027 	r = gfx_v9_0_cp_resume(adev);
4028 	if (r)
4029 		return r;
4030 
4031 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4032 		gfx_v9_4_2_set_power_brake_sequence(adev);
4033 
4034 	return r;
4035 }
4036 
4037 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4038 {
4039 	struct amdgpu_device *adev = ip_block->adev;
4040 
4041 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4042 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4043 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4044 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4045 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4046 
4047 	/* DF freeze and kcq disable will fail */
4048 	if (!amdgpu_ras_intr_triggered())
4049 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4050 		amdgpu_gfx_disable_kcq(adev, 0);
4051 
4052 	if (amdgpu_sriov_vf(adev)) {
4053 		gfx_v9_0_cp_gfx_enable(adev, false);
4054 		/* must disable polling for SRIOV when hw finished, otherwise
4055 		 * CPC engine may still keep fetching WB address which is already
4056 		 * invalid after sw finished and trigger DMAR reading error in
4057 		 * hypervisor side.
4058 		 */
4059 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4060 		return 0;
4061 	}
4062 
4063 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4064 	 * otherwise KIQ is hanging when binding back
4065 	 */
4066 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4067 		mutex_lock(&adev->srbm_mutex);
4068 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4069 				adev->gfx.kiq[0].ring.pipe,
4070 				adev->gfx.kiq[0].ring.queue, 0, 0);
4071 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4072 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4073 		mutex_unlock(&adev->srbm_mutex);
4074 	}
4075 
4076 	gfx_v9_0_cp_enable(adev, false);
4077 
4078 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4079 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4080 	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4081 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4082 		return 0;
4083 	}
4084 
4085 	adev->gfx.rlc.funcs->stop(adev);
4086 	return 0;
4087 }
4088 
4089 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4090 {
4091 	return gfx_v9_0_hw_fini(ip_block);
4092 }
4093 
4094 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4095 {
4096 	return gfx_v9_0_hw_init(ip_block);
4097 }
4098 
4099 static bool gfx_v9_0_is_idle(void *handle)
4100 {
4101 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4102 
4103 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4104 				GRBM_STATUS, GUI_ACTIVE))
4105 		return false;
4106 	else
4107 		return true;
4108 }
4109 
4110 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4111 {
4112 	unsigned i;
4113 	struct amdgpu_device *adev = ip_block->adev;
4114 
4115 	for (i = 0; i < adev->usec_timeout; i++) {
4116 		if (gfx_v9_0_is_idle(adev))
4117 			return 0;
4118 		udelay(1);
4119 	}
4120 	return -ETIMEDOUT;
4121 }
4122 
4123 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4124 {
4125 	u32 grbm_soft_reset = 0;
4126 	u32 tmp;
4127 	struct amdgpu_device *adev = ip_block->adev;
4128 
4129 	/* GRBM_STATUS */
4130 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4131 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4132 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4133 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4134 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4135 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4136 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4137 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4138 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4139 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4140 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4141 	}
4142 
4143 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4144 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4145 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4146 	}
4147 
4148 	/* GRBM_STATUS2 */
4149 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4150 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4151 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4152 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4153 
4154 
4155 	if (grbm_soft_reset) {
4156 		/* stop the rlc */
4157 		adev->gfx.rlc.funcs->stop(adev);
4158 
4159 		if (adev->gfx.num_gfx_rings)
4160 			/* Disable GFX parsing/prefetching */
4161 			gfx_v9_0_cp_gfx_enable(adev, false);
4162 
4163 		/* Disable MEC parsing/prefetching */
4164 		gfx_v9_0_cp_compute_enable(adev, false);
4165 
4166 		if (grbm_soft_reset) {
4167 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4168 			tmp |= grbm_soft_reset;
4169 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4170 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4171 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4172 
4173 			udelay(50);
4174 
4175 			tmp &= ~grbm_soft_reset;
4176 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4177 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4178 		}
4179 
4180 		/* Wait a little for things to settle down */
4181 		udelay(50);
4182 	}
4183 	return 0;
4184 }
4185 
4186 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4187 {
4188 	signed long r, cnt = 0;
4189 	unsigned long flags;
4190 	uint32_t seq, reg_val_offs = 0;
4191 	uint64_t value = 0;
4192 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4193 	struct amdgpu_ring *ring = &kiq->ring;
4194 
4195 	BUG_ON(!ring->funcs->emit_rreg);
4196 
4197 	spin_lock_irqsave(&kiq->ring_lock, flags);
4198 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4199 		pr_err("critical bug! too many kiq readers\n");
4200 		goto failed_unlock;
4201 	}
4202 	amdgpu_ring_alloc(ring, 32);
4203 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4204 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4205 				(5 << 8) |	/* dst: memory */
4206 				(1 << 16) |	/* count sel */
4207 				(1 << 20));	/* write confirm */
4208 	amdgpu_ring_write(ring, 0);
4209 	amdgpu_ring_write(ring, 0);
4210 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4211 				reg_val_offs * 4));
4212 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4213 				reg_val_offs * 4));
4214 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4215 	if (r)
4216 		goto failed_undo;
4217 
4218 	amdgpu_ring_commit(ring);
4219 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4220 
4221 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4222 
4223 	/* don't wait anymore for gpu reset case because this way may
4224 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4225 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4226 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4227 	 * gpu_recover() hang there.
4228 	 *
4229 	 * also don't wait anymore for IRQ context
4230 	 * */
4231 	if (r < 1 && (amdgpu_in_reset(adev)))
4232 		goto failed_kiq_read;
4233 
4234 	might_sleep();
4235 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4236 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4237 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4238 	}
4239 
4240 	if (cnt > MAX_KIQ_REG_TRY)
4241 		goto failed_kiq_read;
4242 
4243 	mb();
4244 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4245 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4246 	amdgpu_device_wb_free(adev, reg_val_offs);
4247 	return value;
4248 
4249 failed_undo:
4250 	amdgpu_ring_undo(ring);
4251 failed_unlock:
4252 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4253 failed_kiq_read:
4254 	if (reg_val_offs)
4255 		amdgpu_device_wb_free(adev, reg_val_offs);
4256 	pr_err("failed to read gpu clock\n");
4257 	return ~0;
4258 }
4259 
4260 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4261 {
4262 	uint64_t clock, clock_lo, clock_hi, hi_check;
4263 
4264 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4265 	case IP_VERSION(9, 3, 0):
4266 		preempt_disable();
4267 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4268 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4269 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4270 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4271 		 * roughly every 42 seconds.
4272 		 */
4273 		if (hi_check != clock_hi) {
4274 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4275 			clock_hi = hi_check;
4276 		}
4277 		preempt_enable();
4278 		clock = clock_lo | (clock_hi << 32ULL);
4279 		break;
4280 	default:
4281 		amdgpu_gfx_off_ctrl(adev, false);
4282 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4283 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4284 			    IP_VERSION(9, 0, 1) &&
4285 		    amdgpu_sriov_runtime(adev)) {
4286 			clock = gfx_v9_0_kiq_read_clock(adev);
4287 		} else {
4288 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4289 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4290 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4291 		}
4292 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4293 		amdgpu_gfx_off_ctrl(adev, true);
4294 		break;
4295 	}
4296 	return clock;
4297 }
4298 
4299 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4300 					  uint32_t vmid,
4301 					  uint32_t gds_base, uint32_t gds_size,
4302 					  uint32_t gws_base, uint32_t gws_size,
4303 					  uint32_t oa_base, uint32_t oa_size)
4304 {
4305 	struct amdgpu_device *adev = ring->adev;
4306 
4307 	/* GDS Base */
4308 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4309 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4310 				   gds_base);
4311 
4312 	/* GDS Size */
4313 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4314 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4315 				   gds_size);
4316 
4317 	/* GWS */
4318 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4319 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4320 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4321 
4322 	/* OA */
4323 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4324 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4325 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4326 }
4327 
4328 static const u32 vgpr_init_compute_shader[] =
4329 {
4330 	0xb07c0000, 0xbe8000ff,
4331 	0x000000f8, 0xbf110800,
4332 	0x7e000280, 0x7e020280,
4333 	0x7e040280, 0x7e060280,
4334 	0x7e080280, 0x7e0a0280,
4335 	0x7e0c0280, 0x7e0e0280,
4336 	0x80808800, 0xbe803200,
4337 	0xbf84fff5, 0xbf9c0000,
4338 	0xd28c0001, 0x0001007f,
4339 	0xd28d0001, 0x0002027e,
4340 	0x10020288, 0xb8810904,
4341 	0xb7814000, 0xd1196a01,
4342 	0x00000301, 0xbe800087,
4343 	0xbefc00c1, 0xd89c4000,
4344 	0x00020201, 0xd89cc080,
4345 	0x00040401, 0x320202ff,
4346 	0x00000800, 0x80808100,
4347 	0xbf84fff8, 0x7e020280,
4348 	0xbf810000, 0x00000000,
4349 };
4350 
4351 static const u32 sgpr_init_compute_shader[] =
4352 {
4353 	0xb07c0000, 0xbe8000ff,
4354 	0x0000005f, 0xbee50080,
4355 	0xbe812c65, 0xbe822c65,
4356 	0xbe832c65, 0xbe842c65,
4357 	0xbe852c65, 0xb77c0005,
4358 	0x80808500, 0xbf84fff8,
4359 	0xbe800080, 0xbf810000,
4360 };
4361 
4362 static const u32 vgpr_init_compute_shader_arcturus[] = {
4363 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4364 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4365 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4366 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4367 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4368 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4369 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4370 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4371 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4372 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4373 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4374 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4375 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4376 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4377 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4378 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4379 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4380 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4381 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4382 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4383 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4384 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4385 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4386 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4387 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4388 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4389 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4390 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4391 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4392 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4393 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4394 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4395 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4396 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4397 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4398 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4399 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4400 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4401 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4402 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4403 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4404 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4405 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4406 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4407 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4408 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4409 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4410 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4411 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4412 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4413 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4414 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4415 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4416 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4417 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4418 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4419 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4420 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4421 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4422 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4423 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4424 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4425 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4426 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4427 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4428 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4429 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4430 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4431 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4432 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4433 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4434 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4435 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4436 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4437 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4438 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4439 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4440 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4441 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4442 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4443 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4444 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4445 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4446 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4447 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4448 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4449 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4450 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4451 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4452 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4453 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4454 	0xbf84fff8, 0xbf810000,
4455 };
4456 
4457 /* When below register arrays changed, please update gpr_reg_size,
4458   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4459   to cover all gfx9 ASICs */
4460 static const struct soc15_reg_entry vgpr_init_regs[] = {
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4470    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4471    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4472    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4475 };
4476 
4477 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4487    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4488    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4489    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4490    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4491    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4492 };
4493 
4494 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4495    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4496    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4497    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4498    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4499    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4500    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4501    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4502    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4503    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4504    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4505    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4506    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4507    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4508    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4509 };
4510 
4511 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4512    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4513    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4514    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4515    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4516    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4517    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4518    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4519    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4520    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4521    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4522    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4523    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4524    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4525    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4526 };
4527 
4528 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4529    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4530    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4531    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4532    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4533    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4534    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4535    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4536    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4537    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4538    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4539    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4540    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4541    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4542    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4543    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4544    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4545    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4546    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4547    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4548    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4549    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4550    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4551    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4552    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4553    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4554    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4555    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4556    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4557    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4558    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4559    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4560    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4561    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4562 };
4563 
4564 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4565 {
4566 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4567 	int i, r;
4568 
4569 	/* only support when RAS is enabled */
4570 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4571 		return 0;
4572 
4573 	r = amdgpu_ring_alloc(ring, 7);
4574 	if (r) {
4575 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4576 			ring->name, r);
4577 		return r;
4578 	}
4579 
4580 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4581 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4582 
4583 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4584 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4585 				PACKET3_DMA_DATA_DST_SEL(1) |
4586 				PACKET3_DMA_DATA_SRC_SEL(2) |
4587 				PACKET3_DMA_DATA_ENGINE(0)));
4588 	amdgpu_ring_write(ring, 0);
4589 	amdgpu_ring_write(ring, 0);
4590 	amdgpu_ring_write(ring, 0);
4591 	amdgpu_ring_write(ring, 0);
4592 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4593 				adev->gds.gds_size);
4594 
4595 	amdgpu_ring_commit(ring);
4596 
4597 	for (i = 0; i < adev->usec_timeout; i++) {
4598 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4599 			break;
4600 		udelay(1);
4601 	}
4602 
4603 	if (i >= adev->usec_timeout)
4604 		r = -ETIMEDOUT;
4605 
4606 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4607 
4608 	return r;
4609 }
4610 
4611 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4612 {
4613 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4614 	struct amdgpu_ib ib;
4615 	struct dma_fence *f = NULL;
4616 	int r, i;
4617 	unsigned total_size, vgpr_offset, sgpr_offset;
4618 	u64 gpu_addr;
4619 
4620 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4621 						adev->gfx.config.max_cu_per_sh *
4622 						adev->gfx.config.max_sh_per_se;
4623 	int sgpr_work_group_size = 5;
4624 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4625 	int vgpr_init_shader_size;
4626 	const u32 *vgpr_init_shader_ptr;
4627 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4628 
4629 	/* only support when RAS is enabled */
4630 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4631 		return 0;
4632 
4633 	/* bail if the compute ring is not ready */
4634 	if (!ring->sched.ready)
4635 		return 0;
4636 
4637 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4638 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4639 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4640 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4641 	} else {
4642 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4643 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4644 		vgpr_init_regs_ptr = vgpr_init_regs;
4645 	}
4646 
4647 	total_size =
4648 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4649 	total_size +=
4650 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4651 	total_size +=
4652 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4653 	total_size = ALIGN(total_size, 256);
4654 	vgpr_offset = total_size;
4655 	total_size += ALIGN(vgpr_init_shader_size, 256);
4656 	sgpr_offset = total_size;
4657 	total_size += sizeof(sgpr_init_compute_shader);
4658 
4659 	/* allocate an indirect buffer to put the commands in */
4660 	memset(&ib, 0, sizeof(ib));
4661 	r = amdgpu_ib_get(adev, NULL, total_size,
4662 					AMDGPU_IB_POOL_DIRECT, &ib);
4663 	if (r) {
4664 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4665 		return r;
4666 	}
4667 
4668 	/* load the compute shaders */
4669 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4670 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4671 
4672 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4673 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4674 
4675 	/* init the ib length to 0 */
4676 	ib.length_dw = 0;
4677 
4678 	/* VGPR */
4679 	/* write the register state for the compute dispatch */
4680 	for (i = 0; i < gpr_reg_size; i++) {
4681 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4682 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4683 								- PACKET3_SET_SH_REG_START;
4684 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4685 	}
4686 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4687 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4688 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4689 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4690 							- PACKET3_SET_SH_REG_START;
4691 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4692 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4693 
4694 	/* write dispatch packet */
4695 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4696 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4697 	ib.ptr[ib.length_dw++] = 1; /* y */
4698 	ib.ptr[ib.length_dw++] = 1; /* z */
4699 	ib.ptr[ib.length_dw++] =
4700 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4701 
4702 	/* write CS partial flush packet */
4703 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4704 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4705 
4706 	/* SGPR1 */
4707 	/* write the register state for the compute dispatch */
4708 	for (i = 0; i < gpr_reg_size; i++) {
4709 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4710 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4711 								- PACKET3_SET_SH_REG_START;
4712 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4713 	}
4714 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4715 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4716 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4717 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4718 							- PACKET3_SET_SH_REG_START;
4719 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4720 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4721 
4722 	/* write dispatch packet */
4723 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4724 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4725 	ib.ptr[ib.length_dw++] = 1; /* y */
4726 	ib.ptr[ib.length_dw++] = 1; /* z */
4727 	ib.ptr[ib.length_dw++] =
4728 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4729 
4730 	/* write CS partial flush packet */
4731 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4732 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4733 
4734 	/* SGPR2 */
4735 	/* write the register state for the compute dispatch */
4736 	for (i = 0; i < gpr_reg_size; i++) {
4737 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4738 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4739 								- PACKET3_SET_SH_REG_START;
4740 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4741 	}
4742 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4743 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4744 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4745 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4746 							- PACKET3_SET_SH_REG_START;
4747 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4748 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4749 
4750 	/* write dispatch packet */
4751 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4752 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4753 	ib.ptr[ib.length_dw++] = 1; /* y */
4754 	ib.ptr[ib.length_dw++] = 1; /* z */
4755 	ib.ptr[ib.length_dw++] =
4756 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4757 
4758 	/* write CS partial flush packet */
4759 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4760 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4761 
4762 	/* shedule the ib on the ring */
4763 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4764 	if (r) {
4765 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4766 		goto fail;
4767 	}
4768 
4769 	/* wait for the GPU to finish processing the IB */
4770 	r = dma_fence_wait(f, false);
4771 	if (r) {
4772 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4773 		goto fail;
4774 	}
4775 
4776 fail:
4777 	amdgpu_ib_free(adev, &ib, NULL);
4778 	dma_fence_put(f);
4779 
4780 	return r;
4781 }
4782 
4783 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4784 {
4785 	struct amdgpu_device *adev = ip_block->adev;
4786 
4787 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4788 
4789 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4790 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4791 		adev->gfx.num_gfx_rings = 0;
4792 	else
4793 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4794 	adev->gfx.xcc_mask = 1;
4795 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4796 					  AMDGPU_MAX_COMPUTE_RINGS);
4797 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4798 	gfx_v9_0_set_ring_funcs(adev);
4799 	gfx_v9_0_set_irq_funcs(adev);
4800 	gfx_v9_0_set_gds_init(adev);
4801 	gfx_v9_0_set_rlc_funcs(adev);
4802 
4803 	/* init rlcg reg access ctrl */
4804 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4805 
4806 	return gfx_v9_0_init_microcode(adev);
4807 }
4808 
4809 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4810 {
4811 	struct amdgpu_device *adev = ip_block->adev;
4812 	int r;
4813 
4814 	/*
4815 	 * Temp workaround to fix the issue that CP firmware fails to
4816 	 * update read pointer when CPDMA is writing clearing operation
4817 	 * to GDS in suspend/resume sequence on several cards. So just
4818 	 * limit this operation in cold boot sequence.
4819 	 */
4820 	if ((!adev->in_suspend) &&
4821 	    (adev->gds.gds_size)) {
4822 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4823 		if (r)
4824 			return r;
4825 	}
4826 
4827 	/* requires IBs so do in late init after IB pool is initialized */
4828 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4829 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4830 	else
4831 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4832 
4833 	if (r)
4834 		return r;
4835 
4836 	if (adev->gfx.ras &&
4837 	    adev->gfx.ras->enable_watchdog_timer)
4838 		adev->gfx.ras->enable_watchdog_timer(adev);
4839 
4840 	return 0;
4841 }
4842 
4843 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4844 {
4845 	struct amdgpu_device *adev = ip_block->adev;
4846 	int r;
4847 
4848 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4849 	if (r)
4850 		return r;
4851 
4852 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4853 	if (r)
4854 		return r;
4855 
4856 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4857 	if (r)
4858 		return r;
4859 
4860 	r = gfx_v9_0_ecc_late_init(ip_block);
4861 	if (r)
4862 		return r;
4863 
4864 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4865 		gfx_v9_4_2_debug_trap_config_init(adev,
4866 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4867 	else
4868 		gfx_v9_0_debug_trap_config_init(adev,
4869 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4870 
4871 	return 0;
4872 }
4873 
4874 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4875 {
4876 	uint32_t rlc_setting;
4877 
4878 	/* if RLC is not enabled, do nothing */
4879 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4880 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4881 		return false;
4882 
4883 	return true;
4884 }
4885 
4886 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4887 {
4888 	uint32_t data;
4889 	unsigned i;
4890 
4891 	data = RLC_SAFE_MODE__CMD_MASK;
4892 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4893 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4894 
4895 	/* wait for RLC_SAFE_MODE */
4896 	for (i = 0; i < adev->usec_timeout; i++) {
4897 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4898 			break;
4899 		udelay(1);
4900 	}
4901 }
4902 
4903 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4904 {
4905 	uint32_t data;
4906 
4907 	data = RLC_SAFE_MODE__CMD_MASK;
4908 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4909 }
4910 
4911 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4912 						bool enable)
4913 {
4914 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4915 
4916 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4917 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4918 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4919 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4920 	} else {
4921 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4922 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4923 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4924 	}
4925 
4926 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4927 }
4928 
4929 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4930 						bool enable)
4931 {
4932 	/* TODO: double check if we need to perform under safe mode */
4933 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4934 
4935 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4936 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4937 	else
4938 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4939 
4940 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4941 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4942 	else
4943 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4944 
4945 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4946 }
4947 
4948 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4949 						      bool enable)
4950 {
4951 	uint32_t data, def;
4952 
4953 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4954 
4955 	/* It is disabled by HW by default */
4956 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4957 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4958 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4959 
4960 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4961 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4962 
4963 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4964 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4965 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4966 
4967 		/* only for Vega10 & Raven1 */
4968 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4969 
4970 		if (def != data)
4971 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4972 
4973 		/* MGLS is a global flag to control all MGLS in GFX */
4974 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4975 			/* 2 - RLC memory Light sleep */
4976 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4977 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4978 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4979 				if (def != data)
4980 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4981 			}
4982 			/* 3 - CP memory Light sleep */
4983 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4984 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4985 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4986 				if (def != data)
4987 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4988 			}
4989 		}
4990 	} else {
4991 		/* 1 - MGCG_OVERRIDE */
4992 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4993 
4994 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4995 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4996 
4997 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4998 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4999 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5000 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5001 
5002 		if (def != data)
5003 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5004 
5005 		/* 2 - disable MGLS in RLC */
5006 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5007 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5008 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5009 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5010 		}
5011 
5012 		/* 3 - disable MGLS in CP */
5013 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5014 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5015 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5016 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5017 		}
5018 	}
5019 
5020 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5021 }
5022 
5023 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5024 					   bool enable)
5025 {
5026 	uint32_t data, def;
5027 
5028 	if (!adev->gfx.num_gfx_rings)
5029 		return;
5030 
5031 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5032 
5033 	/* Enable 3D CGCG/CGLS */
5034 	if (enable) {
5035 		/* write cmd to clear cgcg/cgls ov */
5036 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5037 		/* unset CGCG override */
5038 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5039 		/* update CGCG and CGLS override bits */
5040 		if (def != data)
5041 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5042 
5043 		/* enable 3Dcgcg FSM(0x0000363f) */
5044 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5045 
5046 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5047 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5048 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5049 		else
5050 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5051 
5052 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5053 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5054 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5055 		if (def != data)
5056 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5057 
5058 		/* set IDLE_POLL_COUNT(0x00900100) */
5059 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5060 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5061 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5062 		if (def != data)
5063 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5064 	} else {
5065 		/* Disable CGCG/CGLS */
5066 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5067 		/* disable cgcg, cgls should be disabled */
5068 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5069 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5070 		/* disable cgcg and cgls in FSM */
5071 		if (def != data)
5072 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5073 	}
5074 
5075 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5076 }
5077 
5078 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5079 						      bool enable)
5080 {
5081 	uint32_t def, data;
5082 
5083 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5084 
5085 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5086 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5087 		/* unset CGCG override */
5088 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5089 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5090 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5091 		else
5092 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5093 		/* update CGCG and CGLS override bits */
5094 		if (def != data)
5095 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5096 
5097 		/* enable cgcg FSM(0x0000363F) */
5098 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5099 
5100 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5101 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5102 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5103 		else
5104 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5105 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5106 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5107 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5108 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5109 		if (def != data)
5110 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5111 
5112 		/* set IDLE_POLL_COUNT(0x00900100) */
5113 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5114 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5115 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5116 		if (def != data)
5117 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5118 	} else {
5119 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5120 		/* reset CGCG/CGLS bits */
5121 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5122 		/* disable cgcg and cgls in FSM */
5123 		if (def != data)
5124 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5125 	}
5126 
5127 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5128 }
5129 
5130 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5131 					    bool enable)
5132 {
5133 	if (enable) {
5134 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5135 		 * ===  MGCG + MGLS ===
5136 		 */
5137 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5138 		/* ===  CGCG /CGLS for GFX 3D Only === */
5139 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5140 		/* ===  CGCG + CGLS === */
5141 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5142 	} else {
5143 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5144 		 * ===  CGCG + CGLS ===
5145 		 */
5146 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5147 		/* ===  CGCG /CGLS for GFX 3D Only === */
5148 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5149 		/* ===  MGCG + MGLS === */
5150 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5151 	}
5152 	return 0;
5153 }
5154 
5155 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5156 					      unsigned int vmid)
5157 {
5158 	u32 reg, data;
5159 
5160 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5161 	if (amdgpu_sriov_is_pp_one_vf(adev))
5162 		data = RREG32_NO_KIQ(reg);
5163 	else
5164 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5165 
5166 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5167 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5168 
5169 	if (amdgpu_sriov_is_pp_one_vf(adev))
5170 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5171 	else
5172 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5173 }
5174 
5175 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5176 {
5177 	amdgpu_gfx_off_ctrl(adev, false);
5178 
5179 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5180 
5181 	amdgpu_gfx_off_ctrl(adev, true);
5182 }
5183 
5184 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5185 					uint32_t offset,
5186 					struct soc15_reg_rlcg *entries, int arr_size)
5187 {
5188 	int i;
5189 	uint32_t reg;
5190 
5191 	if (!entries)
5192 		return false;
5193 
5194 	for (i = 0; i < arr_size; i++) {
5195 		const struct soc15_reg_rlcg *entry;
5196 
5197 		entry = &entries[i];
5198 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5199 		if (offset == reg)
5200 			return true;
5201 	}
5202 
5203 	return false;
5204 }
5205 
5206 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5207 {
5208 	return gfx_v9_0_check_rlcg_range(adev, offset,
5209 					(void *)rlcg_access_gc_9_0,
5210 					ARRAY_SIZE(rlcg_access_gc_9_0));
5211 }
5212 
5213 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5214 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5215 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5216 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5217 	.init = gfx_v9_0_rlc_init,
5218 	.get_csb_size = gfx_v9_0_get_csb_size,
5219 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5220 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5221 	.resume = gfx_v9_0_rlc_resume,
5222 	.stop = gfx_v9_0_rlc_stop,
5223 	.reset = gfx_v9_0_rlc_reset,
5224 	.start = gfx_v9_0_rlc_start,
5225 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5226 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5227 };
5228 
5229 static int gfx_v9_0_set_powergating_state(void *handle,
5230 					  enum amd_powergating_state state)
5231 {
5232 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5233 	bool enable = (state == AMD_PG_STATE_GATE);
5234 
5235 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5236 	case IP_VERSION(9, 2, 2):
5237 	case IP_VERSION(9, 1, 0):
5238 	case IP_VERSION(9, 3, 0):
5239 		if (!enable)
5240 			amdgpu_gfx_off_ctrl(adev, false);
5241 
5242 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5243 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5244 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5245 		} else {
5246 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5247 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5248 		}
5249 
5250 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5251 			gfx_v9_0_enable_cp_power_gating(adev, true);
5252 		else
5253 			gfx_v9_0_enable_cp_power_gating(adev, false);
5254 
5255 		/* update gfx cgpg state */
5256 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5257 
5258 		/* update mgcg state */
5259 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5260 
5261 		if (enable)
5262 			amdgpu_gfx_off_ctrl(adev, true);
5263 		break;
5264 	case IP_VERSION(9, 2, 1):
5265 		amdgpu_gfx_off_ctrl(adev, enable);
5266 		break;
5267 	default:
5268 		break;
5269 	}
5270 
5271 	return 0;
5272 }
5273 
5274 static int gfx_v9_0_set_clockgating_state(void *handle,
5275 					  enum amd_clockgating_state state)
5276 {
5277 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5278 
5279 	if (amdgpu_sriov_vf(adev))
5280 		return 0;
5281 
5282 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5283 	case IP_VERSION(9, 0, 1):
5284 	case IP_VERSION(9, 2, 1):
5285 	case IP_VERSION(9, 4, 0):
5286 	case IP_VERSION(9, 2, 2):
5287 	case IP_VERSION(9, 1, 0):
5288 	case IP_VERSION(9, 4, 1):
5289 	case IP_VERSION(9, 3, 0):
5290 	case IP_VERSION(9, 4, 2):
5291 		gfx_v9_0_update_gfx_clock_gating(adev,
5292 						 state == AMD_CG_STATE_GATE);
5293 		break;
5294 	default:
5295 		break;
5296 	}
5297 	return 0;
5298 }
5299 
5300 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5301 {
5302 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5303 	int data;
5304 
5305 	if (amdgpu_sriov_vf(adev))
5306 		*flags = 0;
5307 
5308 	/* AMD_CG_SUPPORT_GFX_MGCG */
5309 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5310 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5311 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5312 
5313 	/* AMD_CG_SUPPORT_GFX_CGCG */
5314 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5315 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5316 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5317 
5318 	/* AMD_CG_SUPPORT_GFX_CGLS */
5319 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5320 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5321 
5322 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5323 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5324 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5325 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5326 
5327 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5328 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5329 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5330 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5331 
5332 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5333 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5334 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5335 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5336 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5337 
5338 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5339 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5340 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5341 	}
5342 }
5343 
5344 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5345 {
5346 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5347 }
5348 
5349 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5350 {
5351 	struct amdgpu_device *adev = ring->adev;
5352 	u64 wptr;
5353 
5354 	/* XXX check if swapping is necessary on BE */
5355 	if (ring->use_doorbell) {
5356 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5357 	} else {
5358 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5359 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5360 	}
5361 
5362 	return wptr;
5363 }
5364 
5365 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5366 {
5367 	struct amdgpu_device *adev = ring->adev;
5368 
5369 	if (ring->use_doorbell) {
5370 		/* XXX check if swapping is necessary on BE */
5371 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5372 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5373 	} else {
5374 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5375 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5376 	}
5377 }
5378 
5379 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5380 {
5381 	struct amdgpu_device *adev = ring->adev;
5382 	u32 ref_and_mask, reg_mem_engine;
5383 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5384 
5385 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5386 		switch (ring->me) {
5387 		case 1:
5388 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5389 			break;
5390 		case 2:
5391 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5392 			break;
5393 		default:
5394 			return;
5395 		}
5396 		reg_mem_engine = 0;
5397 	} else {
5398 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5399 		reg_mem_engine = 1; /* pfp */
5400 	}
5401 
5402 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5403 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5404 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5405 			      ref_and_mask, ref_and_mask, 0x20);
5406 }
5407 
5408 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5409 					struct amdgpu_job *job,
5410 					struct amdgpu_ib *ib,
5411 					uint32_t flags)
5412 {
5413 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5414 	u32 header, control = 0;
5415 
5416 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5417 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5418 	else
5419 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5420 
5421 	control |= ib->length_dw | (vmid << 24);
5422 
5423 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5424 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5425 
5426 		if (flags & AMDGPU_IB_PREEMPTED)
5427 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5428 
5429 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5430 			gfx_v9_0_ring_emit_de_meta(ring,
5431 						   (!amdgpu_sriov_vf(ring->adev) &&
5432 						   flags & AMDGPU_IB_PREEMPTED) ?
5433 						   true : false,
5434 						   job->gds_size > 0 && job->gds_base != 0);
5435 	}
5436 
5437 	amdgpu_ring_write(ring, header);
5438 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5439 	amdgpu_ring_write(ring,
5440 #ifdef __BIG_ENDIAN
5441 		(2 << 0) |
5442 #endif
5443 		lower_32_bits(ib->gpu_addr));
5444 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5445 	amdgpu_ring_ib_on_emit_cntl(ring);
5446 	amdgpu_ring_write(ring, control);
5447 }
5448 
5449 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5450 				     unsigned offset)
5451 {
5452 	u32 control = ring->ring[offset];
5453 
5454 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5455 	ring->ring[offset] = control;
5456 }
5457 
5458 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5459 					unsigned offset)
5460 {
5461 	struct amdgpu_device *adev = ring->adev;
5462 	void *ce_payload_cpu_addr;
5463 	uint64_t payload_offset, payload_size;
5464 
5465 	payload_size = sizeof(struct v9_ce_ib_state);
5466 
5467 	if (ring->is_mes_queue) {
5468 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5469 					  gfx[0].gfx_meta_data) +
5470 			offsetof(struct v9_gfx_meta_data, ce_payload);
5471 		ce_payload_cpu_addr =
5472 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5473 	} else {
5474 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5475 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5476 	}
5477 
5478 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5479 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5480 	} else {
5481 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5482 		       (ring->buf_mask + 1 - offset) << 2);
5483 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5484 		memcpy((void *)&ring->ring[0],
5485 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5486 		       payload_size);
5487 	}
5488 }
5489 
5490 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5491 					unsigned offset)
5492 {
5493 	struct amdgpu_device *adev = ring->adev;
5494 	void *de_payload_cpu_addr;
5495 	uint64_t payload_offset, payload_size;
5496 
5497 	payload_size = sizeof(struct v9_de_ib_state);
5498 
5499 	if (ring->is_mes_queue) {
5500 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5501 					  gfx[0].gfx_meta_data) +
5502 			offsetof(struct v9_gfx_meta_data, de_payload);
5503 		de_payload_cpu_addr =
5504 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5505 	} else {
5506 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5507 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5508 	}
5509 
5510 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5511 		IB_COMPLETION_STATUS_PREEMPTED;
5512 
5513 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5514 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5515 	} else {
5516 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5517 		       (ring->buf_mask + 1 - offset) << 2);
5518 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5519 		memcpy((void *)&ring->ring[0],
5520 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5521 		       payload_size);
5522 	}
5523 }
5524 
5525 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5526 					  struct amdgpu_job *job,
5527 					  struct amdgpu_ib *ib,
5528 					  uint32_t flags)
5529 {
5530 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5531 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5532 
5533 	/* Currently, there is a high possibility to get wave ID mismatch
5534 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5535 	 * different wave IDs than the GDS expects. This situation happens
5536 	 * randomly when at least 5 compute pipes use GDS ordered append.
5537 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5538 	 * Those are probably bugs somewhere else in the kernel driver.
5539 	 *
5540 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5541 	 * GDS to 0 for this ring (me/pipe).
5542 	 */
5543 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5544 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5545 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5546 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5547 	}
5548 
5549 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5550 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5551 	amdgpu_ring_write(ring,
5552 #ifdef __BIG_ENDIAN
5553 				(2 << 0) |
5554 #endif
5555 				lower_32_bits(ib->gpu_addr));
5556 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5557 	amdgpu_ring_write(ring, control);
5558 }
5559 
5560 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5561 				     u64 seq, unsigned flags)
5562 {
5563 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5564 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5565 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5566 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5567 	uint32_t dw2 = 0;
5568 
5569 	/* RELEASE_MEM - flush caches, send int */
5570 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5571 
5572 	if (writeback) {
5573 		dw2 = EOP_TC_NC_ACTION_EN;
5574 	} else {
5575 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5576 				EOP_TC_MD_ACTION_EN;
5577 	}
5578 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5579 				EVENT_INDEX(5);
5580 	if (exec)
5581 		dw2 |= EOP_EXEC;
5582 
5583 	amdgpu_ring_write(ring, dw2);
5584 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5585 
5586 	/*
5587 	 * the address should be Qword aligned if 64bit write, Dword
5588 	 * aligned if only send 32bit data low (discard data high)
5589 	 */
5590 	if (write64bit)
5591 		BUG_ON(addr & 0x7);
5592 	else
5593 		BUG_ON(addr & 0x3);
5594 	amdgpu_ring_write(ring, lower_32_bits(addr));
5595 	amdgpu_ring_write(ring, upper_32_bits(addr));
5596 	amdgpu_ring_write(ring, lower_32_bits(seq));
5597 	amdgpu_ring_write(ring, upper_32_bits(seq));
5598 	amdgpu_ring_write(ring, 0);
5599 }
5600 
5601 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5602 {
5603 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5604 	uint32_t seq = ring->fence_drv.sync_seq;
5605 	uint64_t addr = ring->fence_drv.gpu_addr;
5606 
5607 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5608 			      lower_32_bits(addr), upper_32_bits(addr),
5609 			      seq, 0xffffffff, 4);
5610 }
5611 
5612 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5613 					unsigned vmid, uint64_t pd_addr)
5614 {
5615 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5616 
5617 	/* compute doesn't have PFP */
5618 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5619 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5620 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5621 		amdgpu_ring_write(ring, 0x0);
5622 	}
5623 }
5624 
5625 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5626 {
5627 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5628 }
5629 
5630 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5631 {
5632 	u64 wptr;
5633 
5634 	/* XXX check if swapping is necessary on BE */
5635 	if (ring->use_doorbell)
5636 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5637 	else
5638 		BUG();
5639 	return wptr;
5640 }
5641 
5642 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5643 {
5644 	struct amdgpu_device *adev = ring->adev;
5645 
5646 	/* XXX check if swapping is necessary on BE */
5647 	if (ring->use_doorbell) {
5648 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5649 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5650 	} else{
5651 		BUG(); /* only DOORBELL method supported on gfx9 now */
5652 	}
5653 }
5654 
5655 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5656 					 u64 seq, unsigned int flags)
5657 {
5658 	struct amdgpu_device *adev = ring->adev;
5659 
5660 	/* we only allocate 32bit for each seq wb address */
5661 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5662 
5663 	/* write fence seq to the "addr" */
5664 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5665 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5666 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5667 	amdgpu_ring_write(ring, lower_32_bits(addr));
5668 	amdgpu_ring_write(ring, upper_32_bits(addr));
5669 	amdgpu_ring_write(ring, lower_32_bits(seq));
5670 
5671 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5672 		/* set register to trigger INT */
5673 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5674 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5675 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5676 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5677 		amdgpu_ring_write(ring, 0);
5678 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5679 	}
5680 }
5681 
5682 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5683 {
5684 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5685 	amdgpu_ring_write(ring, 0);
5686 }
5687 
5688 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5689 {
5690 	struct amdgpu_device *adev = ring->adev;
5691 	struct v9_ce_ib_state ce_payload = {0};
5692 	uint64_t offset, ce_payload_gpu_addr;
5693 	void *ce_payload_cpu_addr;
5694 	int cnt;
5695 
5696 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5697 
5698 	if (ring->is_mes_queue) {
5699 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5700 				  gfx[0].gfx_meta_data) +
5701 			offsetof(struct v9_gfx_meta_data, ce_payload);
5702 		ce_payload_gpu_addr =
5703 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5704 		ce_payload_cpu_addr =
5705 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5706 	} else {
5707 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5708 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5709 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5710 	}
5711 
5712 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5713 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5714 				 WRITE_DATA_DST_SEL(8) |
5715 				 WR_CONFIRM) |
5716 				 WRITE_DATA_CACHE_POLICY(0));
5717 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5718 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5719 
5720 	amdgpu_ring_ib_on_emit_ce(ring);
5721 
5722 	if (resume)
5723 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5724 					   sizeof(ce_payload) >> 2);
5725 	else
5726 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5727 					   sizeof(ce_payload) >> 2);
5728 }
5729 
5730 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5731 {
5732 	int i, r = 0;
5733 	struct amdgpu_device *adev = ring->adev;
5734 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5735 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5736 	unsigned long flags;
5737 
5738 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5739 		return -EINVAL;
5740 
5741 	spin_lock_irqsave(&kiq->ring_lock, flags);
5742 
5743 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5744 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5745 		return -ENOMEM;
5746 	}
5747 
5748 	/* assert preemption condition */
5749 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5750 
5751 	ring->trail_seq += 1;
5752 	amdgpu_ring_alloc(ring, 13);
5753 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5754 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5755 
5756 	/* assert IB preemption, emit the trailing fence */
5757 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5758 				   ring->trail_fence_gpu_addr,
5759 				   ring->trail_seq);
5760 
5761 	amdgpu_ring_commit(kiq_ring);
5762 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5763 
5764 	/* poll the trailing fence */
5765 	for (i = 0; i < adev->usec_timeout; i++) {
5766 		if (ring->trail_seq ==
5767 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5768 			break;
5769 		udelay(1);
5770 	}
5771 
5772 	if (i >= adev->usec_timeout) {
5773 		r = -EINVAL;
5774 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5775 	}
5776 
5777 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5778 	amdgpu_ring_emit_wreg(ring,
5779 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5780 			      0x0);
5781 	amdgpu_ring_commit(ring);
5782 
5783 	/* deassert preemption condition */
5784 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5785 	return r;
5786 }
5787 
5788 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5789 {
5790 	struct amdgpu_device *adev = ring->adev;
5791 	struct v9_de_ib_state de_payload = {0};
5792 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5793 	void *de_payload_cpu_addr;
5794 	int cnt;
5795 
5796 	if (ring->is_mes_queue) {
5797 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5798 				  gfx[0].gfx_meta_data) +
5799 			offsetof(struct v9_gfx_meta_data, de_payload);
5800 		de_payload_gpu_addr =
5801 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5802 		de_payload_cpu_addr =
5803 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5804 
5805 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5806 				  gfx[0].gds_backup) +
5807 			offsetof(struct v9_gfx_meta_data, de_payload);
5808 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5809 	} else {
5810 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5811 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5812 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5813 
5814 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5815 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5816 				 PAGE_SIZE);
5817 	}
5818 
5819 	if (usegds) {
5820 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5821 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5822 	}
5823 
5824 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5825 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5826 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5827 				 WRITE_DATA_DST_SEL(8) |
5828 				 WR_CONFIRM) |
5829 				 WRITE_DATA_CACHE_POLICY(0));
5830 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5831 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5832 
5833 	amdgpu_ring_ib_on_emit_de(ring);
5834 	if (resume)
5835 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5836 					   sizeof(de_payload) >> 2);
5837 	else
5838 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5839 					   sizeof(de_payload) >> 2);
5840 }
5841 
5842 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5843 				   bool secure)
5844 {
5845 	uint32_t v = secure ? FRAME_TMZ : 0;
5846 
5847 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5848 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5849 }
5850 
5851 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5852 {
5853 	uint32_t dw2 = 0;
5854 
5855 	gfx_v9_0_ring_emit_ce_meta(ring,
5856 				   (!amdgpu_sriov_vf(ring->adev) &&
5857 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5858 
5859 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5860 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5861 		/* set load_global_config & load_global_uconfig */
5862 		dw2 |= 0x8001;
5863 		/* set load_cs_sh_regs */
5864 		dw2 |= 0x01000000;
5865 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5866 		dw2 |= 0x10002;
5867 
5868 		/* set load_ce_ram if preamble presented */
5869 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5870 			dw2 |= 0x10000000;
5871 	} else {
5872 		/* still load_ce_ram if this is the first time preamble presented
5873 		 * although there is no context switch happens.
5874 		 */
5875 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5876 			dw2 |= 0x10000000;
5877 	}
5878 
5879 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5880 	amdgpu_ring_write(ring, dw2);
5881 	amdgpu_ring_write(ring, 0);
5882 }
5883 
5884 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5885 						  uint64_t addr)
5886 {
5887 	unsigned ret;
5888 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5889 	amdgpu_ring_write(ring, lower_32_bits(addr));
5890 	amdgpu_ring_write(ring, upper_32_bits(addr));
5891 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5892 	amdgpu_ring_write(ring, 0);
5893 	ret = ring->wptr & ring->buf_mask;
5894 	/* patch dummy value later */
5895 	amdgpu_ring_write(ring, 0);
5896 	return ret;
5897 }
5898 
5899 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5900 				    uint32_t reg_val_offs)
5901 {
5902 	struct amdgpu_device *adev = ring->adev;
5903 
5904 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5905 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5906 				(5 << 8) |	/* dst: memory */
5907 				(1 << 20));	/* write confirm */
5908 	amdgpu_ring_write(ring, reg);
5909 	amdgpu_ring_write(ring, 0);
5910 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5911 				reg_val_offs * 4));
5912 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5913 				reg_val_offs * 4));
5914 }
5915 
5916 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5917 				    uint32_t val)
5918 {
5919 	uint32_t cmd = 0;
5920 
5921 	switch (ring->funcs->type) {
5922 	case AMDGPU_RING_TYPE_GFX:
5923 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5924 		break;
5925 	case AMDGPU_RING_TYPE_KIQ:
5926 		cmd = (1 << 16); /* no inc addr */
5927 		break;
5928 	default:
5929 		cmd = WR_CONFIRM;
5930 		break;
5931 	}
5932 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5933 	amdgpu_ring_write(ring, cmd);
5934 	amdgpu_ring_write(ring, reg);
5935 	amdgpu_ring_write(ring, 0);
5936 	amdgpu_ring_write(ring, val);
5937 }
5938 
5939 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5940 					uint32_t val, uint32_t mask)
5941 {
5942 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5943 }
5944 
5945 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5946 						  uint32_t reg0, uint32_t reg1,
5947 						  uint32_t ref, uint32_t mask)
5948 {
5949 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5950 	struct amdgpu_device *adev = ring->adev;
5951 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5952 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5953 
5954 	if (fw_version_ok)
5955 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5956 				      ref, mask, 0x20);
5957 	else
5958 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5959 							   ref, mask);
5960 }
5961 
5962 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5963 {
5964 	struct amdgpu_device *adev = ring->adev;
5965 	uint32_t value = 0;
5966 
5967 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5968 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5969 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5970 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5971 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5972 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5973 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5974 }
5975 
5976 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5977 						 enum amdgpu_interrupt_state state)
5978 {
5979 	switch (state) {
5980 	case AMDGPU_IRQ_STATE_DISABLE:
5981 	case AMDGPU_IRQ_STATE_ENABLE:
5982 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5983 			       TIME_STAMP_INT_ENABLE,
5984 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5985 		break;
5986 	default:
5987 		break;
5988 	}
5989 }
5990 
5991 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5992 						     int me, int pipe,
5993 						     enum amdgpu_interrupt_state state)
5994 {
5995 	u32 mec_int_cntl, mec_int_cntl_reg;
5996 
5997 	/*
5998 	 * amdgpu controls only the first MEC. That's why this function only
5999 	 * handles the setting of interrupts for this specific MEC. All other
6000 	 * pipes' interrupts are set by amdkfd.
6001 	 */
6002 
6003 	if (me == 1) {
6004 		switch (pipe) {
6005 		case 0:
6006 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6007 			break;
6008 		case 1:
6009 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6010 			break;
6011 		case 2:
6012 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6013 			break;
6014 		case 3:
6015 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6016 			break;
6017 		default:
6018 			DRM_DEBUG("invalid pipe %d\n", pipe);
6019 			return;
6020 		}
6021 	} else {
6022 		DRM_DEBUG("invalid me %d\n", me);
6023 		return;
6024 	}
6025 
6026 	switch (state) {
6027 	case AMDGPU_IRQ_STATE_DISABLE:
6028 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6029 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6030 					     TIME_STAMP_INT_ENABLE, 0);
6031 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6032 		break;
6033 	case AMDGPU_IRQ_STATE_ENABLE:
6034 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6035 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6036 					     TIME_STAMP_INT_ENABLE, 1);
6037 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6038 		break;
6039 	default:
6040 		break;
6041 	}
6042 }
6043 
6044 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6045 				     int me, int pipe)
6046 {
6047 	/*
6048 	 * amdgpu controls only the first MEC. That's why this function only
6049 	 * handles the setting of interrupts for this specific MEC. All other
6050 	 * pipes' interrupts are set by amdkfd.
6051 	 */
6052 	if (me != 1)
6053 		return 0;
6054 
6055 	switch (pipe) {
6056 	case 0:
6057 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6058 	case 1:
6059 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6060 	case 2:
6061 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6062 	case 3:
6063 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6064 	default:
6065 		return 0;
6066 	}
6067 }
6068 
6069 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6070 					     struct amdgpu_irq_src *source,
6071 					     unsigned type,
6072 					     enum amdgpu_interrupt_state state)
6073 {
6074 	u32 cp_int_cntl_reg, cp_int_cntl;
6075 	int i, j;
6076 
6077 	switch (state) {
6078 	case AMDGPU_IRQ_STATE_DISABLE:
6079 	case AMDGPU_IRQ_STATE_ENABLE:
6080 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6081 			       PRIV_REG_INT_ENABLE,
6082 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6083 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6084 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6085 				/* MECs start at 1 */
6086 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6087 
6088 				if (cp_int_cntl_reg) {
6089 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6090 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6091 								    PRIV_REG_INT_ENABLE,
6092 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6093 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6094 				}
6095 			}
6096 		}
6097 		break;
6098 	default:
6099 		break;
6100 	}
6101 
6102 	return 0;
6103 }
6104 
6105 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6106 					   struct amdgpu_irq_src *source,
6107 					   unsigned type,
6108 					   enum amdgpu_interrupt_state state)
6109 {
6110 	u32 cp_int_cntl_reg, cp_int_cntl;
6111 	int i, j;
6112 
6113 	switch (state) {
6114 	case AMDGPU_IRQ_STATE_DISABLE:
6115 	case AMDGPU_IRQ_STATE_ENABLE:
6116 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6117 			       OPCODE_ERROR_INT_ENABLE,
6118 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6119 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6120 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6121 				/* MECs start at 1 */
6122 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6123 
6124 				if (cp_int_cntl_reg) {
6125 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6126 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6127 								    OPCODE_ERROR_INT_ENABLE,
6128 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6129 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6130 				}
6131 			}
6132 		}
6133 		break;
6134 	default:
6135 		break;
6136 	}
6137 
6138 	return 0;
6139 }
6140 
6141 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6142 					      struct amdgpu_irq_src *source,
6143 					      unsigned type,
6144 					      enum amdgpu_interrupt_state state)
6145 {
6146 	switch (state) {
6147 	case AMDGPU_IRQ_STATE_DISABLE:
6148 	case AMDGPU_IRQ_STATE_ENABLE:
6149 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6150 			       PRIV_INSTR_INT_ENABLE,
6151 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6152 		break;
6153 	default:
6154 		break;
6155 	}
6156 
6157 	return 0;
6158 }
6159 
6160 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
6161 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6162 			CP_ECC_ERROR_INT_ENABLE, 1)
6163 
6164 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
6165 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6166 			CP_ECC_ERROR_INT_ENABLE, 0)
6167 
6168 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6169 					      struct amdgpu_irq_src *source,
6170 					      unsigned type,
6171 					      enum amdgpu_interrupt_state state)
6172 {
6173 	switch (state) {
6174 	case AMDGPU_IRQ_STATE_DISABLE:
6175 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6176 				CP_ECC_ERROR_INT_ENABLE, 0);
6177 		DISABLE_ECC_ON_ME_PIPE(1, 0);
6178 		DISABLE_ECC_ON_ME_PIPE(1, 1);
6179 		DISABLE_ECC_ON_ME_PIPE(1, 2);
6180 		DISABLE_ECC_ON_ME_PIPE(1, 3);
6181 		break;
6182 
6183 	case AMDGPU_IRQ_STATE_ENABLE:
6184 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6185 				CP_ECC_ERROR_INT_ENABLE, 1);
6186 		ENABLE_ECC_ON_ME_PIPE(1, 0);
6187 		ENABLE_ECC_ON_ME_PIPE(1, 1);
6188 		ENABLE_ECC_ON_ME_PIPE(1, 2);
6189 		ENABLE_ECC_ON_ME_PIPE(1, 3);
6190 		break;
6191 	default:
6192 		break;
6193 	}
6194 
6195 	return 0;
6196 }
6197 
6198 
6199 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6200 					    struct amdgpu_irq_src *src,
6201 					    unsigned type,
6202 					    enum amdgpu_interrupt_state state)
6203 {
6204 	switch (type) {
6205 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6206 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6207 		break;
6208 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6209 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6210 		break;
6211 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6212 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6213 		break;
6214 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6215 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6216 		break;
6217 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6218 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6219 		break;
6220 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6221 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6222 		break;
6223 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6224 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6225 		break;
6226 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6227 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6228 		break;
6229 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6230 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6231 		break;
6232 	default:
6233 		break;
6234 	}
6235 	return 0;
6236 }
6237 
6238 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6239 			    struct amdgpu_irq_src *source,
6240 			    struct amdgpu_iv_entry *entry)
6241 {
6242 	int i;
6243 	u8 me_id, pipe_id, queue_id;
6244 	struct amdgpu_ring *ring;
6245 
6246 	DRM_DEBUG("IH: CP EOP\n");
6247 	me_id = (entry->ring_id & 0x0c) >> 2;
6248 	pipe_id = (entry->ring_id & 0x03) >> 0;
6249 	queue_id = (entry->ring_id & 0x70) >> 4;
6250 
6251 	switch (me_id) {
6252 	case 0:
6253 		if (adev->gfx.num_gfx_rings) {
6254 			if (!adev->gfx.mcbp) {
6255 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6256 			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6257 				/* Fence signals are handled on the software rings*/
6258 				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6259 					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6260 			}
6261 		}
6262 		break;
6263 	case 1:
6264 	case 2:
6265 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6266 			ring = &adev->gfx.compute_ring[i];
6267 			/* Per-queue interrupt is supported for MEC starting from VI.
6268 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6269 			  */
6270 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6271 				amdgpu_fence_process(ring);
6272 		}
6273 		break;
6274 	}
6275 	return 0;
6276 }
6277 
6278 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6279 			   struct amdgpu_iv_entry *entry)
6280 {
6281 	u8 me_id, pipe_id, queue_id;
6282 	struct amdgpu_ring *ring;
6283 	int i;
6284 
6285 	me_id = (entry->ring_id & 0x0c) >> 2;
6286 	pipe_id = (entry->ring_id & 0x03) >> 0;
6287 	queue_id = (entry->ring_id & 0x70) >> 4;
6288 
6289 	switch (me_id) {
6290 	case 0:
6291 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6292 		break;
6293 	case 1:
6294 	case 2:
6295 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6296 			ring = &adev->gfx.compute_ring[i];
6297 			if (ring->me == me_id && ring->pipe == pipe_id &&
6298 			    ring->queue == queue_id)
6299 				drm_sched_fault(&ring->sched);
6300 		}
6301 		break;
6302 	}
6303 }
6304 
6305 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6306 				 struct amdgpu_irq_src *source,
6307 				 struct amdgpu_iv_entry *entry)
6308 {
6309 	DRM_ERROR("Illegal register access in command stream\n");
6310 	gfx_v9_0_fault(adev, entry);
6311 	return 0;
6312 }
6313 
6314 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6315 			       struct amdgpu_irq_src *source,
6316 			       struct amdgpu_iv_entry *entry)
6317 {
6318 	DRM_ERROR("Illegal opcode in command stream\n");
6319 	gfx_v9_0_fault(adev, entry);
6320 	return 0;
6321 }
6322 
6323 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6324 				  struct amdgpu_irq_src *source,
6325 				  struct amdgpu_iv_entry *entry)
6326 {
6327 	DRM_ERROR("Illegal instruction in command stream\n");
6328 	gfx_v9_0_fault(adev, entry);
6329 	return 0;
6330 }
6331 
6332 
6333 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6334 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6335 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6336 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6337 	},
6338 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6339 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6340 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6341 	},
6342 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6343 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6344 	  0, 0
6345 	},
6346 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6347 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6348 	  0, 0
6349 	},
6350 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6351 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6352 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6353 	},
6354 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6355 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6356 	  0, 0
6357 	},
6358 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6359 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6360 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6361 	},
6362 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6363 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6364 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6365 	},
6366 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6367 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6368 	  0, 0
6369 	},
6370 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6371 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6372 	  0, 0
6373 	},
6374 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6375 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6376 	  0, 0
6377 	},
6378 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6379 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6380 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6381 	},
6382 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6383 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6384 	  0, 0
6385 	},
6386 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6387 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6388 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6389 	},
6390 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6391 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6392 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6393 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6394 	},
6395 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6396 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6397 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6398 	  0, 0
6399 	},
6400 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6401 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6402 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6403 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6404 	},
6405 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6406 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6407 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6408 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6409 	},
6410 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6411 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6412 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6413 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6414 	},
6415 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6416 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6417 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6418 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6419 	},
6420 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6421 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6422 	  0, 0
6423 	},
6424 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6425 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6426 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6427 	},
6428 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6429 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6430 	  0, 0
6431 	},
6432 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6433 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6434 	  0, 0
6435 	},
6436 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6437 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6438 	  0, 0
6439 	},
6440 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6441 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6442 	  0, 0
6443 	},
6444 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6445 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6446 	  0, 0
6447 	},
6448 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6449 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6450 	  0, 0
6451 	},
6452 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6453 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6454 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6455 	},
6456 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6457 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6458 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6459 	},
6460 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6461 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6462 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6463 	},
6464 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6465 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6466 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6467 	},
6468 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6469 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6470 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6471 	},
6472 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6473 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6474 	  0, 0
6475 	},
6476 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6477 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6478 	  0, 0
6479 	},
6480 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6481 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6482 	  0, 0
6483 	},
6484 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6485 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6486 	  0, 0
6487 	},
6488 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6489 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6490 	  0, 0
6491 	},
6492 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6493 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6494 	  0, 0
6495 	},
6496 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6497 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6498 	  0, 0
6499 	},
6500 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6501 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6502 	  0, 0
6503 	},
6504 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6505 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6506 	  0, 0
6507 	},
6508 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6509 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6510 	  0, 0
6511 	},
6512 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6513 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6514 	  0, 0
6515 	},
6516 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6517 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6518 	  0, 0
6519 	},
6520 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6521 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6522 	  0, 0
6523 	},
6524 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6525 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6526 	  0, 0
6527 	},
6528 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6529 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6530 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6531 	},
6532 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6533 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6534 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6535 	},
6536 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6537 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6538 	  0, 0
6539 	},
6540 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6541 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6542 	  0, 0
6543 	},
6544 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6545 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6546 	  0, 0
6547 	},
6548 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6549 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6550 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6551 	},
6552 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6553 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6554 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6555 	},
6556 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6557 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6558 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6559 	},
6560 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6561 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6562 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6563 	},
6564 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6565 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6566 	  0, 0
6567 	},
6568 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6569 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6570 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6571 	},
6572 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6573 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6574 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6575 	},
6576 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6577 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6578 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6579 	},
6580 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6581 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6582 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6583 	},
6584 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6585 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6586 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6587 	},
6588 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6589 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6590 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6591 	},
6592 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6593 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6594 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6595 	},
6596 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6597 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6598 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6599 	},
6600 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6601 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6602 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6603 	},
6604 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6605 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6606 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6607 	},
6608 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6609 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6610 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6611 	},
6612 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6613 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6614 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6615 	},
6616 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6617 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6618 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6619 	},
6620 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6621 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6622 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6623 	},
6624 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6625 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6626 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6627 	},
6628 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6629 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6630 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6631 	},
6632 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6633 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6634 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6635 	},
6636 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6637 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6638 	  0, 0
6639 	},
6640 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6641 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6642 	  0, 0
6643 	},
6644 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6645 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6646 	  0, 0
6647 	},
6648 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6649 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6650 	  0, 0
6651 	},
6652 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6653 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6654 	  0, 0
6655 	},
6656 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6657 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6658 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6659 	},
6660 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6661 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6662 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6663 	},
6664 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6665 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6666 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6667 	},
6668 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6669 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6670 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6671 	},
6672 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6673 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6674 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6675 	},
6676 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6677 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6678 	  0, 0
6679 	},
6680 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6681 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6682 	  0, 0
6683 	},
6684 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6685 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6686 	  0, 0
6687 	},
6688 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6689 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6690 	  0, 0
6691 	},
6692 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6693 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6694 	  0, 0
6695 	},
6696 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6697 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6698 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6699 	},
6700 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6701 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6702 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6703 	},
6704 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6705 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6706 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6707 	},
6708 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6709 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6710 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6711 	},
6712 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6713 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6714 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6715 	},
6716 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6717 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6718 	  0, 0
6719 	},
6720 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6721 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6722 	  0, 0
6723 	},
6724 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6725 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6726 	  0, 0
6727 	},
6728 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6729 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6730 	  0, 0
6731 	},
6732 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6733 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6734 	  0, 0
6735 	},
6736 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6737 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6738 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6739 	},
6740 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6741 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6742 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6743 	},
6744 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6745 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6746 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6747 	},
6748 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6749 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6750 	  0, 0
6751 	},
6752 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6753 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6754 	  0, 0
6755 	},
6756 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6757 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6758 	  0, 0
6759 	},
6760 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6761 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6762 	  0, 0
6763 	},
6764 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6765 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6766 	  0, 0
6767 	},
6768 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6769 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6770 	  0, 0
6771 	}
6772 };
6773 
6774 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6775 				     void *inject_if, uint32_t instance_mask)
6776 {
6777 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6778 	int ret;
6779 	struct ta_ras_trigger_error_input block_info = { 0 };
6780 
6781 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6782 		return -EINVAL;
6783 
6784 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6785 		return -EINVAL;
6786 
6787 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6788 		return -EPERM;
6789 
6790 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6791 	      info->head.type)) {
6792 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6793 			ras_gfx_subblocks[info->head.sub_block_index].name,
6794 			info->head.type);
6795 		return -EPERM;
6796 	}
6797 
6798 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6799 	      info->head.type)) {
6800 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6801 			ras_gfx_subblocks[info->head.sub_block_index].name,
6802 			info->head.type);
6803 		return -EPERM;
6804 	}
6805 
6806 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6807 	block_info.sub_block_index =
6808 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6809 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6810 	block_info.address = info->address;
6811 	block_info.value = info->value;
6812 
6813 	mutex_lock(&adev->grbm_idx_mutex);
6814 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6815 	mutex_unlock(&adev->grbm_idx_mutex);
6816 
6817 	return ret;
6818 }
6819 
6820 static const char * const vml2_mems[] = {
6821 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6822 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6823 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6824 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6825 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6826 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6827 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6828 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6829 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6830 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6831 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6832 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6833 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6834 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6835 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6836 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6837 };
6838 
6839 static const char * const vml2_walker_mems[] = {
6840 	"UTC_VML2_CACHE_PDE0_MEM0",
6841 	"UTC_VML2_CACHE_PDE0_MEM1",
6842 	"UTC_VML2_CACHE_PDE1_MEM0",
6843 	"UTC_VML2_CACHE_PDE1_MEM1",
6844 	"UTC_VML2_CACHE_PDE2_MEM0",
6845 	"UTC_VML2_CACHE_PDE2_MEM1",
6846 	"UTC_VML2_RDIF_LOG_FIFO",
6847 };
6848 
6849 static const char * const atc_l2_cache_2m_mems[] = {
6850 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6851 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6852 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6853 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6854 };
6855 
6856 static const char *atc_l2_cache_4k_mems[] = {
6857 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6858 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6859 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6860 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6861 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6862 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6863 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6864 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6865 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6866 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6867 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6868 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6869 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6870 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6871 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6872 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6873 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6874 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6875 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6876 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6877 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6878 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6879 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6880 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6881 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6882 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6883 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6884 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6885 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6886 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6887 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6888 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6889 };
6890 
6891 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6892 					 struct ras_err_data *err_data)
6893 {
6894 	uint32_t i, data;
6895 	uint32_t sec_count, ded_count;
6896 
6897 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6898 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6899 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6900 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6901 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6902 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6903 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6904 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6905 
6906 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6907 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6908 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6909 
6910 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6911 		if (sec_count) {
6912 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6913 				"SEC %d\n", i, vml2_mems[i], sec_count);
6914 			err_data->ce_count += sec_count;
6915 		}
6916 
6917 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6918 		if (ded_count) {
6919 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6920 				"DED %d\n", i, vml2_mems[i], ded_count);
6921 			err_data->ue_count += ded_count;
6922 		}
6923 	}
6924 
6925 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6926 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6927 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6928 
6929 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6930 						SEC_COUNT);
6931 		if (sec_count) {
6932 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6933 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6934 			err_data->ce_count += sec_count;
6935 		}
6936 
6937 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6938 						DED_COUNT);
6939 		if (ded_count) {
6940 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6941 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6942 			err_data->ue_count += ded_count;
6943 		}
6944 	}
6945 
6946 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6947 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6948 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6949 
6950 		sec_count = (data & 0x00006000L) >> 0xd;
6951 		if (sec_count) {
6952 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6953 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6954 				sec_count);
6955 			err_data->ce_count += sec_count;
6956 		}
6957 	}
6958 
6959 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6960 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6961 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6962 
6963 		sec_count = (data & 0x00006000L) >> 0xd;
6964 		if (sec_count) {
6965 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6966 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6967 				sec_count);
6968 			err_data->ce_count += sec_count;
6969 		}
6970 
6971 		ded_count = (data & 0x00018000L) >> 0xf;
6972 		if (ded_count) {
6973 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6974 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6975 				ded_count);
6976 			err_data->ue_count += ded_count;
6977 		}
6978 	}
6979 
6980 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6981 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6982 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6983 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6984 
6985 	return 0;
6986 }
6987 
6988 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6989 	const struct soc15_reg_entry *reg,
6990 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6991 	uint32_t *sec_count, uint32_t *ded_count)
6992 {
6993 	uint32_t i;
6994 	uint32_t sec_cnt, ded_cnt;
6995 
6996 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6997 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6998 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6999 			gfx_v9_0_ras_fields[i].inst != reg->inst)
7000 			continue;
7001 
7002 		sec_cnt = (value &
7003 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
7004 				gfx_v9_0_ras_fields[i].sec_count_shift;
7005 		if (sec_cnt) {
7006 			dev_info(adev->dev, "GFX SubBlock %s, "
7007 				"Instance[%d][%d], SEC %d\n",
7008 				gfx_v9_0_ras_fields[i].name,
7009 				se_id, inst_id,
7010 				sec_cnt);
7011 			*sec_count += sec_cnt;
7012 		}
7013 
7014 		ded_cnt = (value &
7015 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
7016 				gfx_v9_0_ras_fields[i].ded_count_shift;
7017 		if (ded_cnt) {
7018 			dev_info(adev->dev, "GFX SubBlock %s, "
7019 				"Instance[%d][%d], DED %d\n",
7020 				gfx_v9_0_ras_fields[i].name,
7021 				se_id, inst_id,
7022 				ded_cnt);
7023 			*ded_count += ded_cnt;
7024 		}
7025 	}
7026 
7027 	return 0;
7028 }
7029 
7030 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7031 {
7032 	int i, j, k;
7033 
7034 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7035 		return;
7036 
7037 	/* read back registers to clear the counters */
7038 	mutex_lock(&adev->grbm_idx_mutex);
7039 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7040 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7041 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7042 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7043 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7044 			}
7045 		}
7046 	}
7047 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7048 	mutex_unlock(&adev->grbm_idx_mutex);
7049 
7050 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7051 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7052 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7053 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7054 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7055 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7056 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7057 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7058 
7059 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7060 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7061 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7062 	}
7063 
7064 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7065 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7066 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7067 	}
7068 
7069 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7070 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7071 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7072 	}
7073 
7074 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7075 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7076 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7077 	}
7078 
7079 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7080 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7081 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7082 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7083 }
7084 
7085 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7086 					  void *ras_error_status)
7087 {
7088 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7089 	uint32_t sec_count = 0, ded_count = 0;
7090 	uint32_t i, j, k;
7091 	uint32_t reg_value;
7092 
7093 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7094 		return;
7095 
7096 	err_data->ue_count = 0;
7097 	err_data->ce_count = 0;
7098 
7099 	mutex_lock(&adev->grbm_idx_mutex);
7100 
7101 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7102 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7103 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7104 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7105 				reg_value =
7106 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7107 				if (reg_value)
7108 					gfx_v9_0_ras_error_count(adev,
7109 						&gfx_v9_0_edc_counter_regs[i],
7110 						j, k, reg_value,
7111 						&sec_count, &ded_count);
7112 			}
7113 		}
7114 	}
7115 
7116 	err_data->ce_count += sec_count;
7117 	err_data->ue_count += ded_count;
7118 
7119 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7120 	mutex_unlock(&adev->grbm_idx_mutex);
7121 
7122 	gfx_v9_0_query_utc_edc_status(adev, err_data);
7123 }
7124 
7125 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7126 {
7127 	const unsigned int cp_coher_cntl =
7128 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7129 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7130 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7131 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7132 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7133 
7134 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7135 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7136 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7137 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7138 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7139 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7140 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7141 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7142 }
7143 
7144 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7145 					uint32_t pipe, bool enable)
7146 {
7147 	struct amdgpu_device *adev = ring->adev;
7148 	uint32_t val;
7149 	uint32_t wcl_cs_reg;
7150 
7151 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7152 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7153 
7154 	switch (pipe) {
7155 	case 0:
7156 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7157 		break;
7158 	case 1:
7159 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7160 		break;
7161 	case 2:
7162 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7163 		break;
7164 	case 3:
7165 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7166 		break;
7167 	default:
7168 		DRM_DEBUG("invalid pipe %d\n", pipe);
7169 		return;
7170 	}
7171 
7172 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7173 
7174 }
7175 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7176 {
7177 	struct amdgpu_device *adev = ring->adev;
7178 	uint32_t val;
7179 	int i;
7180 
7181 
7182 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7183 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7184 	 * around 25% of gpu resources.
7185 	 */
7186 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7187 	amdgpu_ring_emit_wreg(ring,
7188 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7189 			      val);
7190 
7191 	/* Restrict waves for normal/low priority compute queues as well
7192 	 * to get best QoS for high priority compute jobs.
7193 	 *
7194 	 * amdgpu controls only 1st ME(0-3 CS pipes).
7195 	 */
7196 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7197 		if (i != ring->pipe)
7198 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7199 
7200 	}
7201 }
7202 
7203 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7204 {
7205 	/* Header itself is a NOP packet */
7206 	if (num_nop == 1) {
7207 		amdgpu_ring_write(ring, ring->funcs->nop);
7208 		return;
7209 	}
7210 
7211 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7212 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7213 
7214 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
7215 	amdgpu_ring_insert_nop(ring, num_nop - 1);
7216 }
7217 
7218 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7219 {
7220 	struct amdgpu_device *adev = ring->adev;
7221 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7222 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7223 	unsigned long flags;
7224 	u32 tmp;
7225 	int r;
7226 
7227 	if (amdgpu_sriov_vf(adev))
7228 		return -EINVAL;
7229 
7230 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7231 		return -EINVAL;
7232 
7233 	spin_lock_irqsave(&kiq->ring_lock, flags);
7234 
7235 	if (amdgpu_ring_alloc(kiq_ring, 5)) {
7236 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7237 		return -ENOMEM;
7238 	}
7239 
7240 	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7241 	gfx_v9_0_ring_emit_wreg(kiq_ring,
7242 				 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7243 	amdgpu_ring_commit(kiq_ring);
7244 
7245 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7246 
7247 	r = amdgpu_ring_test_ring(kiq_ring);
7248 	if (r)
7249 		return r;
7250 
7251 	if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7252 		return -ENOMEM;
7253 	gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7254 				 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7255 	gfx_v9_0_ring_emit_reg_wait(ring,
7256 				    SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7257 	gfx_v9_0_ring_emit_wreg(ring,
7258 				SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7259 
7260 	return amdgpu_ring_test_ring(ring);
7261 }
7262 
7263 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7264 			      unsigned int vmid)
7265 {
7266 	struct amdgpu_device *adev = ring->adev;
7267 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7268 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7269 	unsigned long flags;
7270 	int i, r;
7271 
7272 	if (amdgpu_sriov_vf(adev))
7273 		return -EINVAL;
7274 
7275 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7276 		return -EINVAL;
7277 
7278 	spin_lock_irqsave(&kiq->ring_lock, flags);
7279 
7280 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7281 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7282 		return -ENOMEM;
7283 	}
7284 
7285 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7286 				   0, 0);
7287 	amdgpu_ring_commit(kiq_ring);
7288 
7289 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7290 
7291 	r = amdgpu_ring_test_ring(kiq_ring);
7292 	if (r)
7293 		return r;
7294 
7295 	/* make sure dequeue is complete*/
7296 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7297 	mutex_lock(&adev->srbm_mutex);
7298 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7299 	for (i = 0; i < adev->usec_timeout; i++) {
7300 		if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7301 			break;
7302 		udelay(1);
7303 	}
7304 	if (i >= adev->usec_timeout)
7305 		r = -ETIMEDOUT;
7306 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7307 	mutex_unlock(&adev->srbm_mutex);
7308 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7309 	if (r) {
7310 		dev_err(adev->dev, "fail to wait on hqd deactive\n");
7311 		return r;
7312 	}
7313 
7314 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
7315 	if (unlikely(r != 0)){
7316 		dev_err(adev->dev, "fail to resv mqd_obj\n");
7317 		return r;
7318 	}
7319 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7320 	if (!r) {
7321 		r = gfx_v9_0_kcq_init_queue(ring, true);
7322 		amdgpu_bo_kunmap(ring->mqd_obj);
7323 		ring->mqd_ptr = NULL;
7324 	}
7325 	amdgpu_bo_unreserve(ring->mqd_obj);
7326 	if (r) {
7327 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
7328 		return r;
7329 	}
7330 	spin_lock_irqsave(&kiq->ring_lock, flags);
7331 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7332 	if (r) {
7333 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7334 		return -ENOMEM;
7335 	}
7336 	kiq->pmf->kiq_map_queues(kiq_ring, ring);
7337 	amdgpu_ring_commit(kiq_ring);
7338 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7339 	r = amdgpu_ring_test_ring(kiq_ring);
7340 	if (r) {
7341 		DRM_ERROR("fail to remap queue\n");
7342 		return r;
7343 	}
7344 	return amdgpu_ring_test_ring(ring);
7345 }
7346 
7347 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7348 {
7349 	struct amdgpu_device *adev = ip_block->adev;
7350 	uint32_t i, j, k, reg, index = 0;
7351 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7352 
7353 	if (!adev->gfx.ip_dump_core)
7354 		return;
7355 
7356 	for (i = 0; i < reg_count; i++)
7357 		drm_printf(p, "%-50s \t 0x%08x\n",
7358 			   gc_reg_list_9[i].reg_name,
7359 			   adev->gfx.ip_dump_core[i]);
7360 
7361 	/* print compute queue registers for all instances */
7362 	if (!adev->gfx.ip_dump_compute_queues)
7363 		return;
7364 
7365 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7366 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7367 		   adev->gfx.mec.num_mec,
7368 		   adev->gfx.mec.num_pipe_per_mec,
7369 		   adev->gfx.mec.num_queue_per_pipe);
7370 
7371 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7372 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7373 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7374 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7375 				for (reg = 0; reg < reg_count; reg++) {
7376 					drm_printf(p, "%-50s \t 0x%08x\n",
7377 						   gc_cp_reg_list_9[reg].reg_name,
7378 						   adev->gfx.ip_dump_compute_queues[index + reg]);
7379 				}
7380 				index += reg_count;
7381 			}
7382 		}
7383 	}
7384 
7385 }
7386 
7387 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7388 {
7389 	struct amdgpu_device *adev = ip_block->adev;
7390 	uint32_t i, j, k, reg, index = 0;
7391 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7392 
7393 	if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7394 		return;
7395 
7396 	amdgpu_gfx_off_ctrl(adev, false);
7397 	for (i = 0; i < reg_count; i++)
7398 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7399 	amdgpu_gfx_off_ctrl(adev, true);
7400 
7401 	/* dump compute queue registers for all instances */
7402 	if (!adev->gfx.ip_dump_compute_queues)
7403 		return;
7404 
7405 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7406 	amdgpu_gfx_off_ctrl(adev, false);
7407 	mutex_lock(&adev->srbm_mutex);
7408 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7409 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7410 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7411 				/* ME0 is for GFX so start from 1 for CP */
7412 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7413 
7414 				for (reg = 0; reg < reg_count; reg++) {
7415 					adev->gfx.ip_dump_compute_queues[index + reg] =
7416 						RREG32(SOC15_REG_ENTRY_OFFSET(
7417 							gc_cp_reg_list_9[reg]));
7418 				}
7419 				index += reg_count;
7420 			}
7421 		}
7422 	}
7423 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7424 	mutex_unlock(&adev->srbm_mutex);
7425 	amdgpu_gfx_off_ctrl(adev, true);
7426 
7427 }
7428 
7429 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7430 {
7431 	/* Emit the cleaner shader */
7432 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7433 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7434 }
7435 
7436 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7437 	.name = "gfx_v9_0",
7438 	.early_init = gfx_v9_0_early_init,
7439 	.late_init = gfx_v9_0_late_init,
7440 	.sw_init = gfx_v9_0_sw_init,
7441 	.sw_fini = gfx_v9_0_sw_fini,
7442 	.hw_init = gfx_v9_0_hw_init,
7443 	.hw_fini = gfx_v9_0_hw_fini,
7444 	.suspend = gfx_v9_0_suspend,
7445 	.resume = gfx_v9_0_resume,
7446 	.is_idle = gfx_v9_0_is_idle,
7447 	.wait_for_idle = gfx_v9_0_wait_for_idle,
7448 	.soft_reset = gfx_v9_0_soft_reset,
7449 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
7450 	.set_powergating_state = gfx_v9_0_set_powergating_state,
7451 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
7452 	.dump_ip_state = gfx_v9_ip_dump,
7453 	.print_ip_state = gfx_v9_ip_print,
7454 };
7455 
7456 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7457 	.type = AMDGPU_RING_TYPE_GFX,
7458 	.align_mask = 0xff,
7459 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7460 	.support_64bit_ptrs = true,
7461 	.secure_submission_supported = true,
7462 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7463 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7464 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7465 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7466 		5 +  /* COND_EXEC */
7467 		7 +  /* PIPELINE_SYNC */
7468 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7469 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7470 		2 + /* VM_FLUSH */
7471 		8 +  /* FENCE for VM_FLUSH */
7472 		20 + /* GDS switch */
7473 		4 + /* double SWITCH_BUFFER,
7474 		       the first COND_EXEC jump to the place just
7475 			   prior to this double SWITCH_BUFFER  */
7476 		5 + /* COND_EXEC */
7477 		7 +	 /*	HDP_flush */
7478 		4 +	 /*	VGT_flush */
7479 		14 + /*	CE_META */
7480 		31 + /*	DE_META */
7481 		3 + /* CNTX_CTRL */
7482 		5 + /* HDP_INVL */
7483 		8 + 8 + /* FENCE x2 */
7484 		2 + /* SWITCH_BUFFER */
7485 		7 + /* gfx_v9_0_emit_mem_sync */
7486 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7487 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7488 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7489 	.emit_fence = gfx_v9_0_ring_emit_fence,
7490 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7491 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7492 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7493 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7494 	.test_ring = gfx_v9_0_ring_test_ring,
7495 	.insert_nop = gfx_v9_ring_insert_nop,
7496 	.pad_ib = amdgpu_ring_generic_pad_ib,
7497 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7498 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7499 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7500 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
7501 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7502 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7503 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7504 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7505 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7506 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7507 	.reset = gfx_v9_0_reset_kgq,
7508 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7509 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7510 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7511 };
7512 
7513 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7514 	.type = AMDGPU_RING_TYPE_GFX,
7515 	.align_mask = 0xff,
7516 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7517 	.support_64bit_ptrs = true,
7518 	.secure_submission_supported = true,
7519 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7520 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7521 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7522 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7523 		5 +  /* COND_EXEC */
7524 		7 +  /* PIPELINE_SYNC */
7525 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7526 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7527 		2 + /* VM_FLUSH */
7528 		8 +  /* FENCE for VM_FLUSH */
7529 		20 + /* GDS switch */
7530 		4 + /* double SWITCH_BUFFER,
7531 		     * the first COND_EXEC jump to the place just
7532 		     * prior to this double SWITCH_BUFFER
7533 		     */
7534 		5 + /* COND_EXEC */
7535 		7 +	 /*	HDP_flush */
7536 		4 +	 /*	VGT_flush */
7537 		14 + /*	CE_META */
7538 		31 + /*	DE_META */
7539 		3 + /* CNTX_CTRL */
7540 		5 + /* HDP_INVL */
7541 		8 + 8 + /* FENCE x2 */
7542 		2 + /* SWITCH_BUFFER */
7543 		7 + /* gfx_v9_0_emit_mem_sync */
7544 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7545 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7546 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7547 	.emit_fence = gfx_v9_0_ring_emit_fence,
7548 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7549 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7550 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7551 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7552 	.test_ring = gfx_v9_0_ring_test_ring,
7553 	.test_ib = gfx_v9_0_ring_test_ib,
7554 	.insert_nop = gfx_v9_ring_insert_nop,
7555 	.pad_ib = amdgpu_ring_generic_pad_ib,
7556 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7557 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7558 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7559 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7560 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7561 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7562 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7563 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7564 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7565 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
7566 	.patch_de = gfx_v9_0_ring_patch_de_meta,
7567 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
7568 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7569 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7570 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7571 };
7572 
7573 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7574 	.type = AMDGPU_RING_TYPE_COMPUTE,
7575 	.align_mask = 0xff,
7576 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7577 	.support_64bit_ptrs = true,
7578 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7579 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7580 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7581 	.emit_frame_size =
7582 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7583 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7584 		5 + /* hdp invalidate */
7585 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7586 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7587 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7588 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7589 		7 + /* gfx_v9_0_emit_mem_sync */
7590 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7591 		15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7592 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7593 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7594 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7595 	.emit_fence = gfx_v9_0_ring_emit_fence,
7596 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7597 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7598 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7599 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7600 	.test_ring = gfx_v9_0_ring_test_ring,
7601 	.test_ib = gfx_v9_0_ring_test_ib,
7602 	.insert_nop = gfx_v9_ring_insert_nop,
7603 	.pad_ib = amdgpu_ring_generic_pad_ib,
7604 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7605 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7606 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7607 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7608 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7609 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7610 	.reset = gfx_v9_0_reset_kcq,
7611 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7612 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7613 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7614 };
7615 
7616 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7617 	.type = AMDGPU_RING_TYPE_KIQ,
7618 	.align_mask = 0xff,
7619 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7620 	.support_64bit_ptrs = true,
7621 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7622 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7623 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7624 	.emit_frame_size =
7625 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7626 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7627 		5 + /* hdp invalidate */
7628 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7629 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7630 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7631 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7632 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7633 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7634 	.test_ring = gfx_v9_0_ring_test_ring,
7635 	.insert_nop = amdgpu_ring_insert_nop,
7636 	.pad_ib = amdgpu_ring_generic_pad_ib,
7637 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7638 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7639 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7640 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7641 };
7642 
7643 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7644 {
7645 	int i;
7646 
7647 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7648 
7649 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7650 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7651 
7652 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7653 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7654 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7655 	}
7656 
7657 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7658 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7659 }
7660 
7661 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7662 	.set = gfx_v9_0_set_eop_interrupt_state,
7663 	.process = gfx_v9_0_eop_irq,
7664 };
7665 
7666 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7667 	.set = gfx_v9_0_set_priv_reg_fault_state,
7668 	.process = gfx_v9_0_priv_reg_irq,
7669 };
7670 
7671 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7672 	.set = gfx_v9_0_set_bad_op_fault_state,
7673 	.process = gfx_v9_0_bad_op_irq,
7674 };
7675 
7676 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7677 	.set = gfx_v9_0_set_priv_inst_fault_state,
7678 	.process = gfx_v9_0_priv_inst_irq,
7679 };
7680 
7681 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7682 	.set = gfx_v9_0_set_cp_ecc_error_state,
7683 	.process = amdgpu_gfx_cp_ecc_error_irq,
7684 };
7685 
7686 
7687 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7688 {
7689 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7690 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7691 
7692 	adev->gfx.priv_reg_irq.num_types = 1;
7693 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7694 
7695 	adev->gfx.bad_op_irq.num_types = 1;
7696 	adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7697 
7698 	adev->gfx.priv_inst_irq.num_types = 1;
7699 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7700 
7701 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7702 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7703 }
7704 
7705 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7706 {
7707 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7708 	case IP_VERSION(9, 0, 1):
7709 	case IP_VERSION(9, 2, 1):
7710 	case IP_VERSION(9, 4, 0):
7711 	case IP_VERSION(9, 2, 2):
7712 	case IP_VERSION(9, 1, 0):
7713 	case IP_VERSION(9, 4, 1):
7714 	case IP_VERSION(9, 3, 0):
7715 	case IP_VERSION(9, 4, 2):
7716 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7717 		break;
7718 	default:
7719 		break;
7720 	}
7721 }
7722 
7723 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7724 {
7725 	/* init asci gds info */
7726 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7727 	case IP_VERSION(9, 0, 1):
7728 	case IP_VERSION(9, 2, 1):
7729 	case IP_VERSION(9, 4, 0):
7730 		adev->gds.gds_size = 0x10000;
7731 		break;
7732 	case IP_VERSION(9, 2, 2):
7733 	case IP_VERSION(9, 1, 0):
7734 	case IP_VERSION(9, 4, 1):
7735 		adev->gds.gds_size = 0x1000;
7736 		break;
7737 	case IP_VERSION(9, 4, 2):
7738 		/* aldebaran removed all the GDS internal memory,
7739 		 * only support GWS opcode in kernel, like barrier
7740 		 * semaphore.etc */
7741 		adev->gds.gds_size = 0;
7742 		break;
7743 	default:
7744 		adev->gds.gds_size = 0x10000;
7745 		break;
7746 	}
7747 
7748 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7749 	case IP_VERSION(9, 0, 1):
7750 	case IP_VERSION(9, 4, 0):
7751 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7752 		break;
7753 	case IP_VERSION(9, 2, 1):
7754 		adev->gds.gds_compute_max_wave_id = 0x27f;
7755 		break;
7756 	case IP_VERSION(9, 2, 2):
7757 	case IP_VERSION(9, 1, 0):
7758 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7759 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7760 		else
7761 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7762 		break;
7763 	case IP_VERSION(9, 4, 1):
7764 		adev->gds.gds_compute_max_wave_id = 0xfff;
7765 		break;
7766 	case IP_VERSION(9, 4, 2):
7767 		/* deprecated for Aldebaran, no usage at all */
7768 		adev->gds.gds_compute_max_wave_id = 0;
7769 		break;
7770 	default:
7771 		/* this really depends on the chip */
7772 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7773 		break;
7774 	}
7775 
7776 	adev->gds.gws_size = 64;
7777 	adev->gds.oa_size = 16;
7778 }
7779 
7780 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7781 						 u32 bitmap)
7782 {
7783 	u32 data;
7784 
7785 	if (!bitmap)
7786 		return;
7787 
7788 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7789 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7790 
7791 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7792 }
7793 
7794 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7795 {
7796 	u32 data, mask;
7797 
7798 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7799 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7800 
7801 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7802 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7803 
7804 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7805 
7806 	return (~data) & mask;
7807 }
7808 
7809 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7810 				 struct amdgpu_cu_info *cu_info)
7811 {
7812 	int i, j, k, counter, active_cu_number = 0;
7813 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7814 	unsigned disable_masks[4 * 4];
7815 
7816 	if (!adev || !cu_info)
7817 		return -EINVAL;
7818 
7819 	/*
7820 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7821 	 */
7822 	if (adev->gfx.config.max_shader_engines *
7823 		adev->gfx.config.max_sh_per_se > 16)
7824 		return -EINVAL;
7825 
7826 	amdgpu_gfx_parse_disable_cu(disable_masks,
7827 				    adev->gfx.config.max_shader_engines,
7828 				    adev->gfx.config.max_sh_per_se);
7829 
7830 	mutex_lock(&adev->grbm_idx_mutex);
7831 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7832 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7833 			mask = 1;
7834 			ao_bitmap = 0;
7835 			counter = 0;
7836 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7837 			gfx_v9_0_set_user_cu_inactive_bitmap(
7838 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7839 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7840 
7841 			/*
7842 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7843 			 * 4x4 size array, and it's usually suitable for Vega
7844 			 * ASICs which has 4*2 SE/SH layout.
7845 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7846 			 * To mostly reduce the impact, we make it compatible
7847 			 * with current bitmap array as below:
7848 			 *    SE4,SH0 --> bitmap[0][1]
7849 			 *    SE5,SH0 --> bitmap[1][1]
7850 			 *    SE6,SH0 --> bitmap[2][1]
7851 			 *    SE7,SH0 --> bitmap[3][1]
7852 			 */
7853 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7854 
7855 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7856 				if (bitmap & mask) {
7857 					if (counter < adev->gfx.config.max_cu_per_sh)
7858 						ao_bitmap |= mask;
7859 					counter ++;
7860 				}
7861 				mask <<= 1;
7862 			}
7863 			active_cu_number += counter;
7864 			if (i < 2 && j < 2)
7865 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7866 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7867 		}
7868 	}
7869 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7870 	mutex_unlock(&adev->grbm_idx_mutex);
7871 
7872 	cu_info->number = active_cu_number;
7873 	cu_info->ao_cu_mask = ao_cu_mask;
7874 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7875 
7876 	return 0;
7877 }
7878 
7879 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7880 {
7881 	.type = AMD_IP_BLOCK_TYPE_GFX,
7882 	.major = 9,
7883 	.minor = 0,
7884 	.rev = 0,
7885 	.funcs = &gfx_v9_0_ip_funcs,
7886 };
7887