xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 2845f512232de9e436b9e3b5529e906e62414013)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55 
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59 
60 #define GFX9_NUM_GFX_RINGS     1
61 #define GFX9_NUM_SW_GFX_RINGS  2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65 
66 #define mmGCEA_PROBE_MAP                        0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX               0
68 
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75 
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82 
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89 
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96 
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104 
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112 
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115 
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121 
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128 
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134 
135 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
137 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
139 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
141 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
143 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
145 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
147 
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
152 
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 	SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 	/* cp header registers */
229 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 	/* SE status registers */
235 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240 
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 	/* compute queue registers */
243 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281 
282 enum ta_ras_gfx_subblock {
283 	/*CPC*/
284 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 	TA_RAS_BLOCK__GFX_CPC_UCODE,
287 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 	/* CPF*/
295 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 	TA_RAS_BLOCK__GFX_CPF_TAG,
299 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 	/* CPG*/
301 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 	TA_RAS_BLOCK__GFX_CPG_TAG,
305 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 	/* GDS*/
307 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 	/* SPI*/
315 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 	/* SQ*/
317 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 	/* SQC (3 ranges)*/
324 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 	/* SQC range 0*/
326 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 	/* SQC range 1*/
338 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 	/* SQC range 2*/
352 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 	/* TA*/
367 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 	/* TCA*/
375 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 	/* TCC (5 sub-ranges)*/
380 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 	/* TCC range 0*/
382 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 	/* TCC range 1*/
393 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 	/* TCC range 2*/
399 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 	/* TCC range 3*/
411 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 	/* TCC range 4*/
417 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 	/* TCI*/
425 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 	/* TCP*/
427 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 	/* TD*/
437 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 	/* EA (3 sub-ranges)*/
443 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 	/* EA range 0*/
445 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 	/* EA range 1*/
456 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 	/* EA range 2*/
466 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 	/* UTC VM L2 bank*/
474 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 	/* UTC VM walker*/
476 	TA_RAS_BLOCK__UTC_VML2_WALKER,
477 	/* UTC ATC L2 2MB cache*/
478 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 	/* UTC ATC L2 4KB cache*/
480 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 	TA_RAS_BLOCK__GFX_MAX
482 };
483 
484 struct ras_gfx_subblock {
485 	unsigned char *name;
486 	int ta_subblock;
487 	int hw_supported_error_type;
488 	int sw_supported_error_type;
489 };
490 
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
492 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
493 		#subblock,                                                     \
494 		TA_RAS_BLOCK__##subblock,                                      \
495 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
496 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
497 	}
498 
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 			     0),
518 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 			     0),
520 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 			     0, 0),
529 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 			     0),
531 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 			     0, 0),
533 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 			     0),
535 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 			     0, 0),
537 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 			     0),
539 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 			     1),
541 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 			     0, 0, 0),
543 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 			     0),
545 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 			     0),
547 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 			     0),
549 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 			     0),
551 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 			     0),
553 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 			     0, 0),
555 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 			     0),
557 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 			     0),
559 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 			     0, 0, 0),
561 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 			     0),
563 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 			     0),
565 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 			     0),
567 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 			     0),
569 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 			     0),
571 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 			     0, 0),
573 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 			     0),
575 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 			     1),
585 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 			     1),
587 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 			     1),
589 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 			     0),
591 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 			     0),
593 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 			     0),
606 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 			     0),
609 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 			     0, 0),
611 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 			     0),
613 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648 
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672 
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694 
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709 
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737 
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748 
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771 
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787 
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794 
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814 
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831 
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846 
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851 
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863 
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875 
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880 
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 				struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 					  void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 				     void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 					      unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899 
900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 				uint64_t queue_mask)
902 {
903 	struct amdgpu_device *adev = kiq_ring->adev;
904 	u64 shader_mc_addr;
905 
906 	/* Cleaner shader MC address */
907 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908 
909 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 	amdgpu_ring_write(kiq_ring,
911 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 		/* vmid_mask:0* queue_type:0 (KIQ) */
913 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 	amdgpu_ring_write(kiq_ring,
915 			lower_32_bits(queue_mask));	/* queue mask lo */
916 	amdgpu_ring_write(kiq_ring,
917 			upper_32_bits(queue_mask));	/* queue mask hi */
918 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
921 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
922 }
923 
924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 				 struct amdgpu_ring *ring)
926 {
927 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 	uint64_t wptr_addr = ring->wptr_gpu_addr;
929 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930 
931 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 			 /*queue_type: normal compute queue */
940 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 			 /* alloc format: all_on_one_pipe */
942 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 			 /* num_queues: must be 1 */
945 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 	amdgpu_ring_write(kiq_ring,
947 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953 
954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 				   struct amdgpu_ring *ring,
956 				   enum amdgpu_unmap_queues_action action,
957 				   u64 gpu_addr, u64 seq)
958 {
959 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960 
961 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
964 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 	amdgpu_ring_write(kiq_ring,
968 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969 
970 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 		amdgpu_ring_write(kiq_ring, 0);
973 		amdgpu_ring_write(kiq_ring, 0);
974 
975 	} else {
976 		amdgpu_ring_write(kiq_ring, 0);
977 		amdgpu_ring_write(kiq_ring, 0);
978 		amdgpu_ring_write(kiq_ring, 0);
979 	}
980 }
981 
982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 				   struct amdgpu_ring *ring,
984 				   u64 addr,
985 				   u64 seq)
986 {
987 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988 
989 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 	amdgpu_ring_write(kiq_ring,
991 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 			  PACKET3_QUERY_STATUS_COMMAND(2));
994 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 	amdgpu_ring_write(kiq_ring,
996 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003 
1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 				uint16_t pasid, uint32_t flush_type,
1006 				bool all_hub)
1007 {
1008 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 	amdgpu_ring_write(kiq_ring,
1010 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015 
1016 
1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 					uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 					uint32_t xcc_id, uint32_t vmid)
1020 {
1021 	struct amdgpu_device *adev = kiq_ring->adev;
1022 	unsigned i;
1023 
1024 	/* enter save mode */
1025 	amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 	mutex_lock(&adev->srbm_mutex);
1027 	soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028 
1029 	if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 		WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 		/* wait till dequeue take effects */
1033 		for (i = 0; i < adev->usec_timeout; i++) {
1034 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 				break;
1036 			udelay(1);
1037 		}
1038 		if (i >= adev->usec_timeout)
1039 			dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 	} else {
1041 		dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 	}
1043 
1044 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 	mutex_unlock(&adev->srbm_mutex);
1046 	/* exit safe mode */
1047 	amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049 
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1055 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 	.kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 	.set_resources_size = 8,
1058 	.map_queues_size = 7,
1059 	.unmap_queues_size = 6,
1060 	.query_status_size = 7,
1061 	.invalidate_tlbs_size = 2,
1062 };
1063 
1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068 
1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 	case IP_VERSION(9, 0, 1):
1073 		soc15_program_register_sequence(adev,
1074 						golden_settings_gc_9_0,
1075 						ARRAY_SIZE(golden_settings_gc_9_0));
1076 		soc15_program_register_sequence(adev,
1077 						golden_settings_gc_9_0_vg10,
1078 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 		break;
1080 	case IP_VERSION(9, 2, 1):
1081 		soc15_program_register_sequence(adev,
1082 						golden_settings_gc_9_2_1,
1083 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 		soc15_program_register_sequence(adev,
1085 						golden_settings_gc_9_2_1_vg12,
1086 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 		break;
1088 	case IP_VERSION(9, 4, 0):
1089 		soc15_program_register_sequence(adev,
1090 						golden_settings_gc_9_0,
1091 						ARRAY_SIZE(golden_settings_gc_9_0));
1092 		soc15_program_register_sequence(adev,
1093 						golden_settings_gc_9_0_vg20,
1094 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 		break;
1096 	case IP_VERSION(9, 4, 1):
1097 		soc15_program_register_sequence(adev,
1098 						golden_settings_gc_9_4_1_arct,
1099 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 		break;
1101 	case IP_VERSION(9, 2, 2):
1102 	case IP_VERSION(9, 1, 0):
1103 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 						ARRAY_SIZE(golden_settings_gc_9_1));
1105 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 			soc15_program_register_sequence(adev,
1107 							golden_settings_gc_9_1_rv2,
1108 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 		else
1110 			soc15_program_register_sequence(adev,
1111 							golden_settings_gc_9_1_rv1,
1112 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 		break;
1114 	 case IP_VERSION(9, 3, 0):
1115 		soc15_program_register_sequence(adev,
1116 						golden_settings_gc_9_1_rn,
1117 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 		return; /* for renoir, don't need common goldensetting */
1119 	case IP_VERSION(9, 4, 2):
1120 		gfx_v9_4_2_init_golden_registers(adev,
1121 						 adev->smuio.funcs->get_die_id(adev));
1122 		break;
1123 	default:
1124 		break;
1125 	}
1126 
1127 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132 
1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 				       bool wc, uint32_t reg, uint32_t val)
1135 {
1136 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 				WRITE_DATA_DST_SEL(0) |
1139 				(wc ? WR_CONFIRM : 0));
1140 	amdgpu_ring_write(ring, reg);
1141 	amdgpu_ring_write(ring, 0);
1142 	amdgpu_ring_write(ring, val);
1143 }
1144 
1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 				  int mem_space, int opt, uint32_t addr0,
1147 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1148 				  uint32_t inv)
1149 {
1150 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 	amdgpu_ring_write(ring,
1152 				 /* memory (1) or register (0) */
1153 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1156 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1157 
1158 	if (mem_space)
1159 		BUG_ON(addr0 & 0x3); /* Dword align */
1160 	amdgpu_ring_write(ring, addr0);
1161 	amdgpu_ring_write(ring, addr1);
1162 	amdgpu_ring_write(ring, ref);
1163 	amdgpu_ring_write(ring, mask);
1164 	amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166 
1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 	struct amdgpu_device *adev = ring->adev;
1170 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 	uint32_t tmp = 0;
1172 	unsigned i;
1173 	int r;
1174 
1175 	WREG32(scratch, 0xCAFEDEAD);
1176 	r = amdgpu_ring_alloc(ring, 3);
1177 	if (r)
1178 		return r;
1179 
1180 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 	amdgpu_ring_write(ring, 0xDEADBEEF);
1183 	amdgpu_ring_commit(ring);
1184 
1185 	for (i = 0; i < adev->usec_timeout; i++) {
1186 		tmp = RREG32(scratch);
1187 		if (tmp == 0xDEADBEEF)
1188 			break;
1189 		udelay(1);
1190 	}
1191 
1192 	if (i >= adev->usec_timeout)
1193 		r = -ETIMEDOUT;
1194 	return r;
1195 }
1196 
1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 	struct amdgpu_device *adev = ring->adev;
1200 	struct amdgpu_ib ib;
1201 	struct dma_fence *f = NULL;
1202 
1203 	unsigned index;
1204 	uint64_t gpu_addr;
1205 	uint32_t tmp;
1206 	long r;
1207 
1208 	r = amdgpu_device_wb_get(adev, &index);
1209 	if (r)
1210 		return r;
1211 
1212 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 	memset(&ib, 0, sizeof(ib));
1215 
1216 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 	if (r)
1218 		goto err1;
1219 
1220 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 	ib.ptr[2] = lower_32_bits(gpu_addr);
1223 	ib.ptr[3] = upper_32_bits(gpu_addr);
1224 	ib.ptr[4] = 0xDEADBEEF;
1225 	ib.length_dw = 5;
1226 
1227 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 	if (r)
1229 		goto err2;
1230 
1231 	r = dma_fence_wait_timeout(f, false, timeout);
1232 	if (r == 0) {
1233 		r = -ETIMEDOUT;
1234 		goto err2;
1235 	} else if (r < 0) {
1236 		goto err2;
1237 	}
1238 
1239 	tmp = adev->wb.wb[index];
1240 	if (tmp == 0xDEADBEEF)
1241 		r = 0;
1242 	else
1243 		r = -EINVAL;
1244 
1245 err2:
1246 	amdgpu_ib_free(adev, &ib, NULL);
1247 	dma_fence_put(f);
1248 err1:
1249 	amdgpu_device_wb_free(adev, index);
1250 	return r;
1251 }
1252 
1253 
1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 	amdgpu_ucode_release(&adev->gfx.me_fw);
1258 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262 
1263 	kfree(adev->gfx.rlc.register_list_format);
1264 }
1265 
1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 	adev->gfx.me_fw_write_wait = false;
1269 	adev->gfx.mec_fw_write_wait = false;
1270 
1271 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1273 	     (adev->gfx.mec_feature_version < 46) ||
1274 	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1275 	     (adev->gfx.pfp_feature_version < 46)))
1276 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1277 
1278 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1279 	case IP_VERSION(9, 0, 1):
1280 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1281 		    (adev->gfx.me_feature_version >= 42) &&
1282 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1283 		    (adev->gfx.pfp_feature_version >= 42))
1284 			adev->gfx.me_fw_write_wait = true;
1285 
1286 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1287 		    (adev->gfx.mec_feature_version >= 42))
1288 			adev->gfx.mec_fw_write_wait = true;
1289 		break;
1290 	case IP_VERSION(9, 2, 1):
1291 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1292 		    (adev->gfx.me_feature_version >= 44) &&
1293 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1294 		    (adev->gfx.pfp_feature_version >= 44))
1295 			adev->gfx.me_fw_write_wait = true;
1296 
1297 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1298 		    (adev->gfx.mec_feature_version >= 44))
1299 			adev->gfx.mec_fw_write_wait = true;
1300 		break;
1301 	case IP_VERSION(9, 4, 0):
1302 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1303 		    (adev->gfx.me_feature_version >= 44) &&
1304 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1305 		    (adev->gfx.pfp_feature_version >= 44))
1306 			adev->gfx.me_fw_write_wait = true;
1307 
1308 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1309 		    (adev->gfx.mec_feature_version >= 44))
1310 			adev->gfx.mec_fw_write_wait = true;
1311 		break;
1312 	case IP_VERSION(9, 1, 0):
1313 	case IP_VERSION(9, 2, 2):
1314 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1315 		    (adev->gfx.me_feature_version >= 42) &&
1316 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1317 		    (adev->gfx.pfp_feature_version >= 42))
1318 			adev->gfx.me_fw_write_wait = true;
1319 
1320 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1321 		    (adev->gfx.mec_feature_version >= 42))
1322 			adev->gfx.mec_fw_write_wait = true;
1323 		break;
1324 	default:
1325 		adev->gfx.me_fw_write_wait = true;
1326 		adev->gfx.mec_fw_write_wait = true;
1327 		break;
1328 	}
1329 }
1330 
1331 struct amdgpu_gfxoff_quirk {
1332 	u16 chip_vendor;
1333 	u16 chip_device;
1334 	u16 subsys_vendor;
1335 	u16 subsys_device;
1336 	u8 revision;
1337 };
1338 
1339 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1340 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1341 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1342 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1343 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1344 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1345 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1346 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1347 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1348 	{ 0, 0, 0, 0, 0 },
1349 };
1350 
1351 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1352 {
1353 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1354 
1355 	while (p && p->chip_device != 0) {
1356 		if (pdev->vendor == p->chip_vendor &&
1357 		    pdev->device == p->chip_device &&
1358 		    pdev->subsystem_vendor == p->subsys_vendor &&
1359 		    pdev->subsystem_device == p->subsys_device &&
1360 		    pdev->revision == p->revision) {
1361 			return true;
1362 		}
1363 		++p;
1364 	}
1365 	return false;
1366 }
1367 
1368 static bool is_raven_kicker(struct amdgpu_device *adev)
1369 {
1370 	if (adev->pm.fw_version >= 0x41e2b)
1371 		return true;
1372 	else
1373 		return false;
1374 }
1375 
1376 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1377 {
1378 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1379 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1380 	    (adev->gfx.me_feature_version >= 52))
1381 		return true;
1382 	else
1383 		return false;
1384 }
1385 
1386 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1387 {
1388 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1389 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1390 
1391 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1392 	case IP_VERSION(9, 0, 1):
1393 	case IP_VERSION(9, 2, 1):
1394 	case IP_VERSION(9, 4, 0):
1395 		break;
1396 	case IP_VERSION(9, 2, 2):
1397 	case IP_VERSION(9, 1, 0):
1398 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1399 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1400 		    ((!is_raven_kicker(adev) &&
1401 		      adev->gfx.rlc_fw_version < 531) ||
1402 		     (adev->gfx.rlc_feature_version < 1) ||
1403 		     !adev->gfx.rlc.is_rlc_v2_1))
1404 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1405 
1406 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1407 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1408 				AMD_PG_SUPPORT_CP |
1409 				AMD_PG_SUPPORT_RLC_SMU_HS;
1410 		break;
1411 	case IP_VERSION(9, 3, 0):
1412 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1413 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1414 				AMD_PG_SUPPORT_CP |
1415 				AMD_PG_SUPPORT_RLC_SMU_HS;
1416 		break;
1417 	default:
1418 		break;
1419 	}
1420 }
1421 
1422 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1423 					  char *chip_name)
1424 {
1425 	int err;
1426 
1427 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1428 				   "amdgpu/%s_pfp.bin", chip_name);
1429 	if (err)
1430 		goto out;
1431 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1432 
1433 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1434 				   "amdgpu/%s_me.bin", chip_name);
1435 	if (err)
1436 		goto out;
1437 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1438 
1439 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1440 				   "amdgpu/%s_ce.bin", chip_name);
1441 	if (err)
1442 		goto out;
1443 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1444 
1445 out:
1446 	if (err) {
1447 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1448 		amdgpu_ucode_release(&adev->gfx.me_fw);
1449 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1450 	}
1451 	return err;
1452 }
1453 
1454 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1455 				       char *chip_name)
1456 {
1457 	int err;
1458 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1459 	uint16_t version_major;
1460 	uint16_t version_minor;
1461 	uint32_t smu_version;
1462 
1463 	/*
1464 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1465 	 * instead of picasso_rlc.bin.
1466 	 * Judgment method:
1467 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1468 	 *          or revision >= 0xD8 && revision <= 0xDF
1469 	 * otherwise is PCO FP5
1470 	 */
1471 	if (!strcmp(chip_name, "picasso") &&
1472 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1473 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1474 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1475 					   "amdgpu/%s_rlc_am4.bin", chip_name);
1476 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1477 		(smu_version >= 0x41e2b))
1478 		/**
1479 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1480 		*/
1481 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1482 					   "amdgpu/%s_kicker_rlc.bin", chip_name);
1483 	else
1484 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1485 					   "amdgpu/%s_rlc.bin", chip_name);
1486 	if (err)
1487 		goto out;
1488 
1489 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1490 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1491 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1492 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1493 out:
1494 	if (err)
1495 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1496 
1497 	return err;
1498 }
1499 
1500 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1501 {
1502 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1503 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1504 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1505 		return false;
1506 
1507 	return true;
1508 }
1509 
1510 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1511 					      char *chip_name)
1512 {
1513 	int err;
1514 
1515 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1516 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1517 					   "amdgpu/%s_sjt_mec.bin", chip_name);
1518 	else
1519 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1520 					   "amdgpu/%s_mec.bin", chip_name);
1521 	if (err)
1522 		goto out;
1523 
1524 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1525 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1526 
1527 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1528 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1529 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1530 						   "amdgpu/%s_sjt_mec2.bin", chip_name);
1531 		else
1532 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1533 						   "amdgpu/%s_mec2.bin", chip_name);
1534 		if (!err) {
1535 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1536 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1537 		} else {
1538 			err = 0;
1539 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1540 		}
1541 	} else {
1542 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1543 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1544 	}
1545 
1546 	gfx_v9_0_check_if_need_gfxoff(adev);
1547 	gfx_v9_0_check_fw_write_wait(adev);
1548 
1549 out:
1550 	if (err)
1551 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1552 	return err;
1553 }
1554 
1555 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1556 {
1557 	char ucode_prefix[30];
1558 	int r;
1559 
1560 	DRM_DEBUG("\n");
1561 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1562 
1563 	/* No CPG in Arcturus */
1564 	if (adev->gfx.num_gfx_rings) {
1565 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1566 		if (r)
1567 			return r;
1568 	}
1569 
1570 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1571 	if (r)
1572 		return r;
1573 
1574 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1575 	if (r)
1576 		return r;
1577 
1578 	return r;
1579 }
1580 
1581 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1582 {
1583 	u32 count = 0;
1584 	const struct cs_section_def *sect = NULL;
1585 	const struct cs_extent_def *ext = NULL;
1586 
1587 	/* begin clear state */
1588 	count += 2;
1589 	/* context control state */
1590 	count += 3;
1591 
1592 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1593 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1594 			if (sect->id == SECT_CONTEXT)
1595 				count += 2 + ext->reg_count;
1596 			else
1597 				return 0;
1598 		}
1599 	}
1600 
1601 	/* end clear state */
1602 	count += 2;
1603 	/* clear state */
1604 	count += 2;
1605 
1606 	return count;
1607 }
1608 
1609 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1610 				    volatile u32 *buffer)
1611 {
1612 	u32 count = 0, i;
1613 	const struct cs_section_def *sect = NULL;
1614 	const struct cs_extent_def *ext = NULL;
1615 
1616 	if (adev->gfx.rlc.cs_data == NULL)
1617 		return;
1618 	if (buffer == NULL)
1619 		return;
1620 
1621 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1622 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1623 
1624 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1625 	buffer[count++] = cpu_to_le32(0x80000000);
1626 	buffer[count++] = cpu_to_le32(0x80000000);
1627 
1628 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1629 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1630 			if (sect->id == SECT_CONTEXT) {
1631 				buffer[count++] =
1632 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1633 				buffer[count++] = cpu_to_le32(ext->reg_index -
1634 						PACKET3_SET_CONTEXT_REG_START);
1635 				for (i = 0; i < ext->reg_count; i++)
1636 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1637 			} else {
1638 				return;
1639 			}
1640 		}
1641 	}
1642 
1643 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1644 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1645 
1646 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1647 	buffer[count++] = cpu_to_le32(0);
1648 }
1649 
1650 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1651 {
1652 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1653 	uint32_t pg_always_on_cu_num = 2;
1654 	uint32_t always_on_cu_num;
1655 	uint32_t i, j, k;
1656 	uint32_t mask, cu_bitmap, counter;
1657 
1658 	if (adev->flags & AMD_IS_APU)
1659 		always_on_cu_num = 4;
1660 	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1661 		always_on_cu_num = 8;
1662 	else
1663 		always_on_cu_num = 12;
1664 
1665 	mutex_lock(&adev->grbm_idx_mutex);
1666 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1667 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1668 			mask = 1;
1669 			cu_bitmap = 0;
1670 			counter = 0;
1671 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1672 
1673 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1674 				if (cu_info->bitmap[0][i][j] & mask) {
1675 					if (counter == pg_always_on_cu_num)
1676 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1677 					if (counter < always_on_cu_num)
1678 						cu_bitmap |= mask;
1679 					else
1680 						break;
1681 					counter++;
1682 				}
1683 				mask <<= 1;
1684 			}
1685 
1686 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1687 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1688 		}
1689 	}
1690 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1691 	mutex_unlock(&adev->grbm_idx_mutex);
1692 }
1693 
1694 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1695 {
1696 	uint32_t data;
1697 
1698 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1699 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1700 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1701 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1702 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1703 
1704 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1705 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1706 
1707 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1708 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1709 
1710 	mutex_lock(&adev->grbm_idx_mutex);
1711 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1712 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1713 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1714 
1715 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1716 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1717 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1718 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1719 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1720 
1721 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1722 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1723 	data &= 0x0000FFFF;
1724 	data |= 0x00C00000;
1725 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1726 
1727 	/*
1728 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1729 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1730 	 */
1731 
1732 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1733 	 * but used for RLC_LB_CNTL configuration */
1734 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1735 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1736 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1737 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1738 	mutex_unlock(&adev->grbm_idx_mutex);
1739 
1740 	gfx_v9_0_init_always_on_cu_mask(adev);
1741 }
1742 
1743 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1744 {
1745 	uint32_t data;
1746 
1747 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1748 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1749 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1750 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1751 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1752 
1753 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1754 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1755 
1756 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1757 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1758 
1759 	mutex_lock(&adev->grbm_idx_mutex);
1760 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1761 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1762 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1763 
1764 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1765 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1766 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1767 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1768 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1769 
1770 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1771 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1772 	data &= 0x0000FFFF;
1773 	data |= 0x00C00000;
1774 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1775 
1776 	/*
1777 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1778 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1779 	 */
1780 
1781 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1782 	 * but used for RLC_LB_CNTL configuration */
1783 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1784 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1785 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1786 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1787 	mutex_unlock(&adev->grbm_idx_mutex);
1788 
1789 	gfx_v9_0_init_always_on_cu_mask(adev);
1790 }
1791 
1792 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1793 {
1794 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1795 }
1796 
1797 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1798 {
1799 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1800 		return 5;
1801 	else
1802 		return 4;
1803 }
1804 
1805 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1806 {
1807 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1808 
1809 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1810 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1811 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1812 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1813 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1814 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1815 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1816 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1817 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1818 }
1819 
1820 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1821 {
1822 	const struct cs_section_def *cs_data;
1823 	int r;
1824 
1825 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1826 
1827 	cs_data = adev->gfx.rlc.cs_data;
1828 
1829 	if (cs_data) {
1830 		/* init clear state block */
1831 		r = amdgpu_gfx_rlc_init_csb(adev);
1832 		if (r)
1833 			return r;
1834 	}
1835 
1836 	if (adev->flags & AMD_IS_APU) {
1837 		/* TODO: double check the cp_table_size for RV */
1838 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1839 		r = amdgpu_gfx_rlc_init_cpt(adev);
1840 		if (r)
1841 			return r;
1842 	}
1843 
1844 	return 0;
1845 }
1846 
1847 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1848 {
1849 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1850 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1851 }
1852 
1853 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1854 {
1855 	int r;
1856 	u32 *hpd;
1857 	const __le32 *fw_data;
1858 	unsigned fw_size;
1859 	u32 *fw;
1860 	size_t mec_hpd_size;
1861 
1862 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1863 
1864 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1865 
1866 	/* take ownership of the relevant compute queues */
1867 	amdgpu_gfx_compute_queue_acquire(adev);
1868 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1869 	if (mec_hpd_size) {
1870 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1871 					      AMDGPU_GEM_DOMAIN_VRAM |
1872 					      AMDGPU_GEM_DOMAIN_GTT,
1873 					      &adev->gfx.mec.hpd_eop_obj,
1874 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1875 					      (void **)&hpd);
1876 		if (r) {
1877 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1878 			gfx_v9_0_mec_fini(adev);
1879 			return r;
1880 		}
1881 
1882 		memset(hpd, 0, mec_hpd_size);
1883 
1884 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1885 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1886 	}
1887 
1888 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1889 
1890 	fw_data = (const __le32 *)
1891 		(adev->gfx.mec_fw->data +
1892 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1893 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1894 
1895 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1896 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1897 				      &adev->gfx.mec.mec_fw_obj,
1898 				      &adev->gfx.mec.mec_fw_gpu_addr,
1899 				      (void **)&fw);
1900 	if (r) {
1901 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1902 		gfx_v9_0_mec_fini(adev);
1903 		return r;
1904 	}
1905 
1906 	memcpy(fw, fw_data, fw_size);
1907 
1908 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1909 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1910 
1911 	return 0;
1912 }
1913 
1914 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1915 {
1916 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1917 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1918 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1919 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1920 		(SQ_IND_INDEX__FORCE_READ_MASK));
1921 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1922 }
1923 
1924 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1925 			   uint32_t wave, uint32_t thread,
1926 			   uint32_t regno, uint32_t num, uint32_t *out)
1927 {
1928 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1929 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1930 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1931 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1932 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1933 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1934 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1935 	while (num--)
1936 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1937 }
1938 
1939 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1940 {
1941 	/* type 1 wave data */
1942 	dst[(*no_fields)++] = 1;
1943 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1944 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1945 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1946 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1947 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1948 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1949 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1950 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1951 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1952 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1953 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1954 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1955 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1956 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1957 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1958 }
1959 
1960 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1961 				     uint32_t wave, uint32_t start,
1962 				     uint32_t size, uint32_t *dst)
1963 {
1964 	wave_read_regs(
1965 		adev, simd, wave, 0,
1966 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1967 }
1968 
1969 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1970 				     uint32_t wave, uint32_t thread,
1971 				     uint32_t start, uint32_t size,
1972 				     uint32_t *dst)
1973 {
1974 	wave_read_regs(
1975 		adev, simd, wave, thread,
1976 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1977 }
1978 
1979 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1980 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1981 {
1982 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1983 }
1984 
1985 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1986         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1987         .select_se_sh = &gfx_v9_0_select_se_sh,
1988         .read_wave_data = &gfx_v9_0_read_wave_data,
1989         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1990         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1991         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1992 };
1993 
1994 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1995 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1996 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1997 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1998 };
1999 
2000 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2001 	.ras_block = {
2002 		.hw_ops = &gfx_v9_0_ras_ops,
2003 	},
2004 };
2005 
2006 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2007 {
2008 	u32 gb_addr_config;
2009 	int err;
2010 
2011 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2012 	case IP_VERSION(9, 0, 1):
2013 		adev->gfx.config.max_hw_contexts = 8;
2014 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2015 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2016 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2017 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2018 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2019 		break;
2020 	case IP_VERSION(9, 2, 1):
2021 		adev->gfx.config.max_hw_contexts = 8;
2022 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2023 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2024 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2025 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2026 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2027 		DRM_INFO("fix gfx.config for vega12\n");
2028 		break;
2029 	case IP_VERSION(9, 4, 0):
2030 		adev->gfx.ras = &gfx_v9_0_ras;
2031 		adev->gfx.config.max_hw_contexts = 8;
2032 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2033 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2034 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2035 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2036 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2037 		gb_addr_config &= ~0xf3e777ff;
2038 		gb_addr_config |= 0x22014042;
2039 		/* check vbios table if gpu info is not available */
2040 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2041 		if (err)
2042 			return err;
2043 		break;
2044 	case IP_VERSION(9, 2, 2):
2045 	case IP_VERSION(9, 1, 0):
2046 		adev->gfx.config.max_hw_contexts = 8;
2047 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2048 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2049 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2050 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2051 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2052 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2053 		else
2054 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2055 		break;
2056 	case IP_VERSION(9, 4, 1):
2057 		adev->gfx.ras = &gfx_v9_4_ras;
2058 		adev->gfx.config.max_hw_contexts = 8;
2059 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2060 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2061 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2062 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2063 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2064 		gb_addr_config &= ~0xf3e777ff;
2065 		gb_addr_config |= 0x22014042;
2066 		break;
2067 	case IP_VERSION(9, 3, 0):
2068 		adev->gfx.config.max_hw_contexts = 8;
2069 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2070 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2071 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2072 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2073 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2074 		gb_addr_config &= ~0xf3e777ff;
2075 		gb_addr_config |= 0x22010042;
2076 		break;
2077 	case IP_VERSION(9, 4, 2):
2078 		adev->gfx.ras = &gfx_v9_4_2_ras;
2079 		adev->gfx.config.max_hw_contexts = 8;
2080 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2081 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2082 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2083 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2084 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2085 		gb_addr_config &= ~0xf3e777ff;
2086 		gb_addr_config |= 0x22014042;
2087 		/* check vbios table if gpu info is not available */
2088 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2089 		if (err)
2090 			return err;
2091 		break;
2092 	default:
2093 		BUG();
2094 		break;
2095 	}
2096 
2097 	adev->gfx.config.gb_addr_config = gb_addr_config;
2098 
2099 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2100 			REG_GET_FIELD(
2101 					adev->gfx.config.gb_addr_config,
2102 					GB_ADDR_CONFIG,
2103 					NUM_PIPES);
2104 
2105 	adev->gfx.config.max_tile_pipes =
2106 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2107 
2108 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2109 			REG_GET_FIELD(
2110 					adev->gfx.config.gb_addr_config,
2111 					GB_ADDR_CONFIG,
2112 					NUM_BANKS);
2113 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2114 			REG_GET_FIELD(
2115 					adev->gfx.config.gb_addr_config,
2116 					GB_ADDR_CONFIG,
2117 					MAX_COMPRESSED_FRAGS);
2118 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2119 			REG_GET_FIELD(
2120 					adev->gfx.config.gb_addr_config,
2121 					GB_ADDR_CONFIG,
2122 					NUM_RB_PER_SE);
2123 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2124 			REG_GET_FIELD(
2125 					adev->gfx.config.gb_addr_config,
2126 					GB_ADDR_CONFIG,
2127 					NUM_SHADER_ENGINES);
2128 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2129 			REG_GET_FIELD(
2130 					adev->gfx.config.gb_addr_config,
2131 					GB_ADDR_CONFIG,
2132 					PIPE_INTERLEAVE_SIZE));
2133 
2134 	return 0;
2135 }
2136 
2137 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2138 				      int mec, int pipe, int queue)
2139 {
2140 	unsigned irq_type;
2141 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2142 	unsigned int hw_prio;
2143 
2144 	ring = &adev->gfx.compute_ring[ring_id];
2145 
2146 	/* mec0 is me1 */
2147 	ring->me = mec + 1;
2148 	ring->pipe = pipe;
2149 	ring->queue = queue;
2150 
2151 	ring->ring_obj = NULL;
2152 	ring->use_doorbell = true;
2153 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2154 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2155 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2156 	ring->vm_hub = AMDGPU_GFXHUB(0);
2157 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2158 
2159 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2160 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2161 		+ ring->pipe;
2162 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2163 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2164 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2165 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2166 				hw_prio, NULL);
2167 }
2168 
2169 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2170 {
2171 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2172 	uint32_t *ptr;
2173 	uint32_t inst;
2174 
2175 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2176 	if (!ptr) {
2177 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2178 		adev->gfx.ip_dump_core = NULL;
2179 	} else {
2180 		adev->gfx.ip_dump_core = ptr;
2181 	}
2182 
2183 	/* Allocate memory for compute queue registers for all the instances */
2184 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2185 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2186 		adev->gfx.mec.num_queue_per_pipe;
2187 
2188 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2189 	if (!ptr) {
2190 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2191 		adev->gfx.ip_dump_compute_queues = NULL;
2192 	} else {
2193 		adev->gfx.ip_dump_compute_queues = ptr;
2194 	}
2195 }
2196 
2197 static int gfx_v9_0_sw_init(void *handle)
2198 {
2199 	int i, j, k, r, ring_id;
2200 	int xcc_id = 0;
2201 	struct amdgpu_ring *ring;
2202 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2203 	unsigned int hw_prio;
2204 
2205 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2206 	case IP_VERSION(9, 0, 1):
2207 	case IP_VERSION(9, 2, 1):
2208 	case IP_VERSION(9, 4, 0):
2209 	case IP_VERSION(9, 2, 2):
2210 	case IP_VERSION(9, 1, 0):
2211 	case IP_VERSION(9, 4, 1):
2212 	case IP_VERSION(9, 3, 0):
2213 	case IP_VERSION(9, 4, 2):
2214 		adev->gfx.mec.num_mec = 2;
2215 		break;
2216 	default:
2217 		adev->gfx.mec.num_mec = 1;
2218 		break;
2219 	}
2220 
2221 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2222 	default:
2223 		adev->gfx.enable_cleaner_shader = false;
2224 		break;
2225 	}
2226 
2227 	adev->gfx.mec.num_pipe_per_mec = 4;
2228 	adev->gfx.mec.num_queue_per_pipe = 8;
2229 
2230 	/* EOP Event */
2231 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2232 	if (r)
2233 		return r;
2234 
2235 	/* Bad opcode Event */
2236 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2237 			      GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2238 			      &adev->gfx.bad_op_irq);
2239 	if (r)
2240 		return r;
2241 
2242 	/* Privileged reg */
2243 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2244 			      &adev->gfx.priv_reg_irq);
2245 	if (r)
2246 		return r;
2247 
2248 	/* Privileged inst */
2249 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2250 			      &adev->gfx.priv_inst_irq);
2251 	if (r)
2252 		return r;
2253 
2254 	/* ECC error */
2255 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2256 			      &adev->gfx.cp_ecc_error_irq);
2257 	if (r)
2258 		return r;
2259 
2260 	/* FUE error */
2261 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2262 			      &adev->gfx.cp_ecc_error_irq);
2263 	if (r)
2264 		return r;
2265 
2266 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2267 
2268 	if (adev->gfx.rlc.funcs) {
2269 		if (adev->gfx.rlc.funcs->init) {
2270 			r = adev->gfx.rlc.funcs->init(adev);
2271 			if (r) {
2272 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2273 				return r;
2274 			}
2275 		}
2276 	}
2277 
2278 	r = gfx_v9_0_mec_init(adev);
2279 	if (r) {
2280 		DRM_ERROR("Failed to init MEC BOs!\n");
2281 		return r;
2282 	}
2283 
2284 	/* set up the gfx ring */
2285 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2286 		ring = &adev->gfx.gfx_ring[i];
2287 		ring->ring_obj = NULL;
2288 		if (!i)
2289 			sprintf(ring->name, "gfx");
2290 		else
2291 			sprintf(ring->name, "gfx_%d", i);
2292 		ring->use_doorbell = true;
2293 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2294 
2295 		/* disable scheduler on the real ring */
2296 		ring->no_scheduler = adev->gfx.mcbp;
2297 		ring->vm_hub = AMDGPU_GFXHUB(0);
2298 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2299 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2300 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2301 		if (r)
2302 			return r;
2303 	}
2304 
2305 	/* set up the software rings */
2306 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2307 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2308 			ring = &adev->gfx.sw_gfx_ring[i];
2309 			ring->ring_obj = NULL;
2310 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2311 			ring->use_doorbell = true;
2312 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2313 			ring->is_sw_ring = true;
2314 			hw_prio = amdgpu_sw_ring_priority(i);
2315 			ring->vm_hub = AMDGPU_GFXHUB(0);
2316 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2317 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2318 					     NULL);
2319 			if (r)
2320 				return r;
2321 			ring->wptr = 0;
2322 		}
2323 
2324 		/* init the muxer and add software rings */
2325 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2326 					 GFX9_NUM_SW_GFX_RINGS);
2327 		if (r) {
2328 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2329 			return r;
2330 		}
2331 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2332 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2333 							&adev->gfx.sw_gfx_ring[i]);
2334 			if (r) {
2335 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2336 				return r;
2337 			}
2338 		}
2339 	}
2340 
2341 	/* set up the compute queues - allocate horizontally across pipes */
2342 	ring_id = 0;
2343 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2344 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2345 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2346 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2347 								     k, j))
2348 					continue;
2349 
2350 				r = gfx_v9_0_compute_ring_init(adev,
2351 							       ring_id,
2352 							       i, k, j);
2353 				if (r)
2354 					return r;
2355 
2356 				ring_id++;
2357 			}
2358 		}
2359 	}
2360 
2361 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2362 	if (r) {
2363 		DRM_ERROR("Failed to init KIQ BOs!\n");
2364 		return r;
2365 	}
2366 
2367 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2368 	if (r)
2369 		return r;
2370 
2371 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2372 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2373 	if (r)
2374 		return r;
2375 
2376 	adev->gfx.ce_ram_size = 0x8000;
2377 
2378 	r = gfx_v9_0_gpu_early_init(adev);
2379 	if (r)
2380 		return r;
2381 
2382 	if (amdgpu_gfx_ras_sw_init(adev)) {
2383 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2384 		return -EINVAL;
2385 	}
2386 
2387 	gfx_v9_0_alloc_ip_dump(adev);
2388 
2389 	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
2390 	if (r)
2391 		return r;
2392 
2393 	return 0;
2394 }
2395 
2396 
2397 static int gfx_v9_0_sw_fini(void *handle)
2398 {
2399 	int i;
2400 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2401 
2402 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2403 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2404 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2405 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2406 	}
2407 
2408 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2409 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2410 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2411 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2412 
2413 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2414 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2415 	amdgpu_gfx_kiq_fini(adev, 0);
2416 
2417 	gfx_v9_0_mec_fini(adev);
2418 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2419 				&adev->gfx.rlc.clear_state_gpu_addr,
2420 				(void **)&adev->gfx.rlc.cs_ptr);
2421 	if (adev->flags & AMD_IS_APU) {
2422 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2423 				&adev->gfx.rlc.cp_table_gpu_addr,
2424 				(void **)&adev->gfx.rlc.cp_table_ptr);
2425 	}
2426 	gfx_v9_0_free_microcode(adev);
2427 
2428 	amdgpu_gfx_sysfs_isolation_shader_fini(adev);
2429 
2430 	kfree(adev->gfx.ip_dump_core);
2431 	kfree(adev->gfx.ip_dump_compute_queues);
2432 
2433 	return 0;
2434 }
2435 
2436 
2437 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2438 {
2439 	/* TODO */
2440 }
2441 
2442 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2443 			   u32 instance, int xcc_id)
2444 {
2445 	u32 data;
2446 
2447 	if (instance == 0xffffffff)
2448 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2449 	else
2450 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2451 
2452 	if (se_num == 0xffffffff)
2453 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2454 	else
2455 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2456 
2457 	if (sh_num == 0xffffffff)
2458 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2459 	else
2460 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2461 
2462 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2463 }
2464 
2465 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2466 {
2467 	u32 data, mask;
2468 
2469 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2470 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2471 
2472 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2473 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2474 
2475 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2476 					 adev->gfx.config.max_sh_per_se);
2477 
2478 	return (~data) & mask;
2479 }
2480 
2481 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2482 {
2483 	int i, j;
2484 	u32 data;
2485 	u32 active_rbs = 0;
2486 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2487 					adev->gfx.config.max_sh_per_se;
2488 
2489 	mutex_lock(&adev->grbm_idx_mutex);
2490 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2491 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2492 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2493 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2494 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2495 					       rb_bitmap_width_per_sh);
2496 		}
2497 	}
2498 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2499 	mutex_unlock(&adev->grbm_idx_mutex);
2500 
2501 	adev->gfx.config.backend_enable_mask = active_rbs;
2502 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2503 }
2504 
2505 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2506 				uint32_t first_vmid,
2507 				uint32_t last_vmid)
2508 {
2509 	uint32_t data;
2510 	uint32_t trap_config_vmid_mask = 0;
2511 	int i;
2512 
2513 	/* Calculate trap config vmid mask */
2514 	for (i = first_vmid; i < last_vmid; i++)
2515 		trap_config_vmid_mask |= (1 << i);
2516 
2517 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2518 			VMID_SEL, trap_config_vmid_mask);
2519 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2520 			TRAP_EN, 1);
2521 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2522 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2523 
2524 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2525 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2526 }
2527 
2528 #define DEFAULT_SH_MEM_BASES	(0x6000)
2529 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2530 {
2531 	int i;
2532 	uint32_t sh_mem_config;
2533 	uint32_t sh_mem_bases;
2534 
2535 	/*
2536 	 * Configure apertures:
2537 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2538 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2539 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2540 	 */
2541 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2542 
2543 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2544 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2545 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2546 
2547 	mutex_lock(&adev->srbm_mutex);
2548 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2549 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2550 		/* CP and shaders */
2551 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2552 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2553 	}
2554 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2555 	mutex_unlock(&adev->srbm_mutex);
2556 
2557 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2558 	   access. These should be enabled by FW for target VMIDs. */
2559 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2560 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2561 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2562 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2563 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2564 	}
2565 }
2566 
2567 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2568 {
2569 	int vmid;
2570 
2571 	/*
2572 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2573 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2574 	 * the driver can enable them for graphics. VMID0 should maintain
2575 	 * access so that HWS firmware can save/restore entries.
2576 	 */
2577 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2578 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2579 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2580 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2581 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2582 	}
2583 }
2584 
2585 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2586 {
2587 	uint32_t tmp;
2588 
2589 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2590 	case IP_VERSION(9, 4, 1):
2591 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2592 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2593 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2594 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2595 		break;
2596 	default:
2597 		break;
2598 	}
2599 }
2600 
2601 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2602 {
2603 	u32 tmp;
2604 	int i;
2605 
2606 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2607 
2608 	gfx_v9_0_tiling_mode_table_init(adev);
2609 
2610 	if (adev->gfx.num_gfx_rings)
2611 		gfx_v9_0_setup_rb(adev);
2612 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2613 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2614 
2615 	/* XXX SH_MEM regs */
2616 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2617 	mutex_lock(&adev->srbm_mutex);
2618 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2619 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2620 		/* CP and shaders */
2621 		if (i == 0) {
2622 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2623 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2624 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2625 					    !!adev->gmc.noretry);
2626 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2627 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2628 		} else {
2629 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2630 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2631 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2632 					    !!adev->gmc.noretry);
2633 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2634 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2635 				(adev->gmc.private_aperture_start >> 48));
2636 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2637 				(adev->gmc.shared_aperture_start >> 48));
2638 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2639 		}
2640 	}
2641 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2642 
2643 	mutex_unlock(&adev->srbm_mutex);
2644 
2645 	gfx_v9_0_init_compute_vmid(adev);
2646 	gfx_v9_0_init_gds_vmid(adev);
2647 	gfx_v9_0_init_sq_config(adev);
2648 }
2649 
2650 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2651 {
2652 	u32 i, j, k;
2653 	u32 mask;
2654 
2655 	mutex_lock(&adev->grbm_idx_mutex);
2656 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2657 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2658 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2659 			for (k = 0; k < adev->usec_timeout; k++) {
2660 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2661 					break;
2662 				udelay(1);
2663 			}
2664 			if (k == adev->usec_timeout) {
2665 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2666 						      0xffffffff, 0xffffffff, 0);
2667 				mutex_unlock(&adev->grbm_idx_mutex);
2668 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2669 					 i, j);
2670 				return;
2671 			}
2672 		}
2673 	}
2674 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2675 	mutex_unlock(&adev->grbm_idx_mutex);
2676 
2677 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2678 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2679 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2680 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2681 	for (k = 0; k < adev->usec_timeout; k++) {
2682 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2683 			break;
2684 		udelay(1);
2685 	}
2686 }
2687 
2688 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2689 					       bool enable)
2690 {
2691 	u32 tmp;
2692 
2693 	/* These interrupts should be enabled to drive DS clock */
2694 
2695 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2696 
2697 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2698 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2699 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2700 	if (adev->gfx.num_gfx_rings)
2701 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2702 
2703 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2704 }
2705 
2706 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2707 {
2708 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2709 	/* csib */
2710 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2711 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2712 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2713 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2714 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2715 			adev->gfx.rlc.clear_state_size);
2716 }
2717 
2718 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2719 				int indirect_offset,
2720 				int list_size,
2721 				int *unique_indirect_regs,
2722 				int unique_indirect_reg_count,
2723 				int *indirect_start_offsets,
2724 				int *indirect_start_offsets_count,
2725 				int max_start_offsets_count)
2726 {
2727 	int idx;
2728 
2729 	for (; indirect_offset < list_size; indirect_offset++) {
2730 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2731 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2732 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2733 
2734 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2735 			indirect_offset += 2;
2736 
2737 			/* look for the matching indice */
2738 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2739 				if (unique_indirect_regs[idx] ==
2740 					register_list_format[indirect_offset] ||
2741 					!unique_indirect_regs[idx])
2742 					break;
2743 			}
2744 
2745 			BUG_ON(idx >= unique_indirect_reg_count);
2746 
2747 			if (!unique_indirect_regs[idx])
2748 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2749 
2750 			indirect_offset++;
2751 		}
2752 	}
2753 }
2754 
2755 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2756 {
2757 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2758 	int unique_indirect_reg_count = 0;
2759 
2760 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2761 	int indirect_start_offsets_count = 0;
2762 
2763 	int list_size = 0;
2764 	int i = 0, j = 0;
2765 	u32 tmp = 0;
2766 
2767 	u32 *register_list_format =
2768 		kmemdup(adev->gfx.rlc.register_list_format,
2769 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2770 	if (!register_list_format)
2771 		return -ENOMEM;
2772 
2773 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2774 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2775 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2776 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2777 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2778 				    unique_indirect_regs,
2779 				    unique_indirect_reg_count,
2780 				    indirect_start_offsets,
2781 				    &indirect_start_offsets_count,
2782 				    ARRAY_SIZE(indirect_start_offsets));
2783 
2784 	/* enable auto inc in case it is disabled */
2785 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2786 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2787 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2788 
2789 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2790 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2791 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2792 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2793 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2794 			adev->gfx.rlc.register_restore[i]);
2795 
2796 	/* load indirect register */
2797 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2798 		adev->gfx.rlc.reg_list_format_start);
2799 
2800 	/* direct register portion */
2801 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2802 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2803 			register_list_format[i]);
2804 
2805 	/* indirect register portion */
2806 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2807 		if (register_list_format[i] == 0xFFFFFFFF) {
2808 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2809 			continue;
2810 		}
2811 
2812 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2813 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2814 
2815 		for (j = 0; j < unique_indirect_reg_count; j++) {
2816 			if (register_list_format[i] == unique_indirect_regs[j]) {
2817 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2818 				break;
2819 			}
2820 		}
2821 
2822 		BUG_ON(j >= unique_indirect_reg_count);
2823 
2824 		i++;
2825 	}
2826 
2827 	/* set save/restore list size */
2828 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2829 	list_size = list_size >> 1;
2830 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2831 		adev->gfx.rlc.reg_restore_list_size);
2832 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2833 
2834 	/* write the starting offsets to RLC scratch ram */
2835 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2836 		adev->gfx.rlc.starting_offsets_start);
2837 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2838 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2839 		       indirect_start_offsets[i]);
2840 
2841 	/* load unique indirect regs*/
2842 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2843 		if (unique_indirect_regs[i] != 0) {
2844 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2845 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2846 			       unique_indirect_regs[i] & 0x3FFFF);
2847 
2848 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2849 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2850 			       unique_indirect_regs[i] >> 20);
2851 		}
2852 	}
2853 
2854 	kfree(register_list_format);
2855 	return 0;
2856 }
2857 
2858 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2859 {
2860 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2861 }
2862 
2863 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2864 					     bool enable)
2865 {
2866 	uint32_t data = 0;
2867 	uint32_t default_data = 0;
2868 
2869 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2870 	if (enable) {
2871 		/* enable GFXIP control over CGPG */
2872 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2873 		if(default_data != data)
2874 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2875 
2876 		/* update status */
2877 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2878 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2879 		if(default_data != data)
2880 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2881 	} else {
2882 		/* restore GFXIP control over GCPG */
2883 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2884 		if(default_data != data)
2885 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2886 	}
2887 }
2888 
2889 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2890 {
2891 	uint32_t data = 0;
2892 
2893 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2894 			      AMD_PG_SUPPORT_GFX_SMG |
2895 			      AMD_PG_SUPPORT_GFX_DMG)) {
2896 		/* init IDLE_POLL_COUNT = 60 */
2897 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2898 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2899 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2900 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2901 
2902 		/* init RLC PG Delay */
2903 		data = 0;
2904 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2905 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2906 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2907 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2908 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2909 
2910 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2911 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2912 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2913 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2914 
2915 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2916 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2917 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2918 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2919 
2920 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2921 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2922 
2923 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2924 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2925 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2926 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2927 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2928 	}
2929 }
2930 
2931 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2932 						bool enable)
2933 {
2934 	uint32_t data = 0;
2935 	uint32_t default_data = 0;
2936 
2937 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2938 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2939 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2940 			     enable ? 1 : 0);
2941 	if (default_data != data)
2942 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2943 }
2944 
2945 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2946 						bool enable)
2947 {
2948 	uint32_t data = 0;
2949 	uint32_t default_data = 0;
2950 
2951 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2952 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2953 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2954 			     enable ? 1 : 0);
2955 	if(default_data != data)
2956 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2957 }
2958 
2959 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2960 					bool enable)
2961 {
2962 	uint32_t data = 0;
2963 	uint32_t default_data = 0;
2964 
2965 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2966 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2967 			     CP_PG_DISABLE,
2968 			     enable ? 0 : 1);
2969 	if(default_data != data)
2970 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2971 }
2972 
2973 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2974 						bool enable)
2975 {
2976 	uint32_t data, default_data;
2977 
2978 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2979 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2980 			     GFX_POWER_GATING_ENABLE,
2981 			     enable ? 1 : 0);
2982 	if(default_data != data)
2983 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2984 }
2985 
2986 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2987 						bool enable)
2988 {
2989 	uint32_t data, default_data;
2990 
2991 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2992 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2993 			     GFX_PIPELINE_PG_ENABLE,
2994 			     enable ? 1 : 0);
2995 	if(default_data != data)
2996 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2997 
2998 	if (!enable)
2999 		/* read any GFX register to wake up GFX */
3000 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3001 }
3002 
3003 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3004 						       bool enable)
3005 {
3006 	uint32_t data, default_data;
3007 
3008 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3009 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3010 			     STATIC_PER_CU_PG_ENABLE,
3011 			     enable ? 1 : 0);
3012 	if(default_data != data)
3013 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3014 }
3015 
3016 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3017 						bool enable)
3018 {
3019 	uint32_t data, default_data;
3020 
3021 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3022 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
3023 			     DYN_PER_CU_PG_ENABLE,
3024 			     enable ? 1 : 0);
3025 	if(default_data != data)
3026 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3027 }
3028 
3029 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3030 {
3031 	gfx_v9_0_init_csb(adev);
3032 
3033 	/*
3034 	 * Rlc save restore list is workable since v2_1.
3035 	 * And it's needed by gfxoff feature.
3036 	 */
3037 	if (adev->gfx.rlc.is_rlc_v2_1) {
3038 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3039 			    IP_VERSION(9, 2, 1) ||
3040 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
3041 			gfx_v9_1_init_rlc_save_restore_list(adev);
3042 		gfx_v9_0_enable_save_restore_machine(adev);
3043 	}
3044 
3045 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3046 			      AMD_PG_SUPPORT_GFX_SMG |
3047 			      AMD_PG_SUPPORT_GFX_DMG |
3048 			      AMD_PG_SUPPORT_CP |
3049 			      AMD_PG_SUPPORT_GDS |
3050 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
3051 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3052 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
3053 		gfx_v9_0_init_gfx_power_gating(adev);
3054 	}
3055 }
3056 
3057 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3058 {
3059 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3060 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3061 	gfx_v9_0_wait_for_rlc_serdes(adev);
3062 }
3063 
3064 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3065 {
3066 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3067 	udelay(50);
3068 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3069 	udelay(50);
3070 }
3071 
3072 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3073 {
3074 #ifdef AMDGPU_RLC_DEBUG_RETRY
3075 	u32 rlc_ucode_ver;
3076 #endif
3077 
3078 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3079 	udelay(50);
3080 
3081 	/* carrizo do enable cp interrupt after cp inited */
3082 	if (!(adev->flags & AMD_IS_APU)) {
3083 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3084 		udelay(50);
3085 	}
3086 
3087 #ifdef AMDGPU_RLC_DEBUG_RETRY
3088 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3089 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3090 	if(rlc_ucode_ver == 0x108) {
3091 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3092 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3093 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3094 		 * default is 0x9C4 to create a 100us interval */
3095 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3096 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3097 		 * to disable the page fault retry interrupts, default is
3098 		 * 0x100 (256) */
3099 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3100 	}
3101 #endif
3102 }
3103 
3104 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3105 {
3106 	const struct rlc_firmware_header_v2_0 *hdr;
3107 	const __le32 *fw_data;
3108 	unsigned i, fw_size;
3109 
3110 	if (!adev->gfx.rlc_fw)
3111 		return -EINVAL;
3112 
3113 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3114 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3115 
3116 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3117 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3118 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3119 
3120 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3121 			RLCG_UCODE_LOADING_START_ADDRESS);
3122 	for (i = 0; i < fw_size; i++)
3123 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3124 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3125 
3126 	return 0;
3127 }
3128 
3129 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3130 {
3131 	int r;
3132 
3133 	if (amdgpu_sriov_vf(adev)) {
3134 		gfx_v9_0_init_csb(adev);
3135 		return 0;
3136 	}
3137 
3138 	adev->gfx.rlc.funcs->stop(adev);
3139 
3140 	/* disable CG */
3141 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3142 
3143 	gfx_v9_0_init_pg(adev);
3144 
3145 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3146 		/* legacy rlc firmware loading */
3147 		r = gfx_v9_0_rlc_load_microcode(adev);
3148 		if (r)
3149 			return r;
3150 	}
3151 
3152 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3153 	case IP_VERSION(9, 2, 2):
3154 	case IP_VERSION(9, 1, 0):
3155 		gfx_v9_0_init_lbpw(adev);
3156 		if (amdgpu_lbpw == 0)
3157 			gfx_v9_0_enable_lbpw(adev, false);
3158 		else
3159 			gfx_v9_0_enable_lbpw(adev, true);
3160 		break;
3161 	case IP_VERSION(9, 4, 0):
3162 		gfx_v9_4_init_lbpw(adev);
3163 		if (amdgpu_lbpw > 0)
3164 			gfx_v9_0_enable_lbpw(adev, true);
3165 		else
3166 			gfx_v9_0_enable_lbpw(adev, false);
3167 		break;
3168 	default:
3169 		break;
3170 	}
3171 
3172 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3173 
3174 	adev->gfx.rlc.funcs->start(adev);
3175 
3176 	return 0;
3177 }
3178 
3179 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3180 {
3181 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3182 
3183 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3184 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3185 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3186 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3187 	udelay(50);
3188 }
3189 
3190 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3191 {
3192 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3193 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3194 	const struct gfx_firmware_header_v1_0 *me_hdr;
3195 	const __le32 *fw_data;
3196 	unsigned i, fw_size;
3197 
3198 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3199 		return -EINVAL;
3200 
3201 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3202 		adev->gfx.pfp_fw->data;
3203 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3204 		adev->gfx.ce_fw->data;
3205 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3206 		adev->gfx.me_fw->data;
3207 
3208 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3209 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3210 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3211 
3212 	gfx_v9_0_cp_gfx_enable(adev, false);
3213 
3214 	/* PFP */
3215 	fw_data = (const __le32 *)
3216 		(adev->gfx.pfp_fw->data +
3217 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3218 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3219 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3220 	for (i = 0; i < fw_size; i++)
3221 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3222 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3223 
3224 	/* CE */
3225 	fw_data = (const __le32 *)
3226 		(adev->gfx.ce_fw->data +
3227 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3228 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3229 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3230 	for (i = 0; i < fw_size; i++)
3231 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3232 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3233 
3234 	/* ME */
3235 	fw_data = (const __le32 *)
3236 		(adev->gfx.me_fw->data +
3237 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3238 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3239 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3240 	for (i = 0; i < fw_size; i++)
3241 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3242 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3243 
3244 	return 0;
3245 }
3246 
3247 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3248 {
3249 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3250 	const struct cs_section_def *sect = NULL;
3251 	const struct cs_extent_def *ext = NULL;
3252 	int r, i, tmp;
3253 
3254 	/* init the CP */
3255 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3256 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3257 
3258 	gfx_v9_0_cp_gfx_enable(adev, true);
3259 
3260 	/* Now only limit the quirk on the APU gfx9 series and already
3261 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3262 	 */
3263 	if (adev->flags & AMD_IS_APU &&
3264 			adev->in_s3 && !adev->suspend_complete) {
3265 		DRM_INFO(" Will skip the CSB packet resubmit\n");
3266 		return 0;
3267 	}
3268 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3269 	if (r) {
3270 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3271 		return r;
3272 	}
3273 
3274 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3275 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3276 
3277 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3278 	amdgpu_ring_write(ring, 0x80000000);
3279 	amdgpu_ring_write(ring, 0x80000000);
3280 
3281 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3282 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3283 			if (sect->id == SECT_CONTEXT) {
3284 				amdgpu_ring_write(ring,
3285 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3286 					       ext->reg_count));
3287 				amdgpu_ring_write(ring,
3288 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3289 				for (i = 0; i < ext->reg_count; i++)
3290 					amdgpu_ring_write(ring, ext->extent[i]);
3291 			}
3292 		}
3293 	}
3294 
3295 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3296 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3297 
3298 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3299 	amdgpu_ring_write(ring, 0);
3300 
3301 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3302 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3303 	amdgpu_ring_write(ring, 0x8000);
3304 	amdgpu_ring_write(ring, 0x8000);
3305 
3306 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3307 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3308 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3309 	amdgpu_ring_write(ring, tmp);
3310 	amdgpu_ring_write(ring, 0);
3311 
3312 	amdgpu_ring_commit(ring);
3313 
3314 	return 0;
3315 }
3316 
3317 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3318 {
3319 	struct amdgpu_ring *ring;
3320 	u32 tmp;
3321 	u32 rb_bufsz;
3322 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3323 
3324 	/* Set the write pointer delay */
3325 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3326 
3327 	/* set the RB to use vmid 0 */
3328 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3329 
3330 	/* Set ring buffer size */
3331 	ring = &adev->gfx.gfx_ring[0];
3332 	rb_bufsz = order_base_2(ring->ring_size / 8);
3333 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3334 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3335 #ifdef __BIG_ENDIAN
3336 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3337 #endif
3338 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3339 
3340 	/* Initialize the ring buffer's write pointers */
3341 	ring->wptr = 0;
3342 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3343 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3344 
3345 	/* set the wb address wether it's enabled or not */
3346 	rptr_addr = ring->rptr_gpu_addr;
3347 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3348 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3349 
3350 	wptr_gpu_addr = ring->wptr_gpu_addr;
3351 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3352 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3353 
3354 	mdelay(1);
3355 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3356 
3357 	rb_addr = ring->gpu_addr >> 8;
3358 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3359 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3360 
3361 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3362 	if (ring->use_doorbell) {
3363 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3364 				    DOORBELL_OFFSET, ring->doorbell_index);
3365 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3366 				    DOORBELL_EN, 1);
3367 	} else {
3368 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3369 	}
3370 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3371 
3372 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3373 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3374 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3375 
3376 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3377 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3378 
3379 
3380 	/* start the ring */
3381 	gfx_v9_0_cp_gfx_start(adev);
3382 
3383 	return 0;
3384 }
3385 
3386 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3387 {
3388 	if (enable) {
3389 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3390 	} else {
3391 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3392 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3393 		adev->gfx.kiq[0].ring.sched.ready = false;
3394 	}
3395 	udelay(50);
3396 }
3397 
3398 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3399 {
3400 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3401 	const __le32 *fw_data;
3402 	unsigned i;
3403 	u32 tmp;
3404 
3405 	if (!adev->gfx.mec_fw)
3406 		return -EINVAL;
3407 
3408 	gfx_v9_0_cp_compute_enable(adev, false);
3409 
3410 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3411 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3412 
3413 	fw_data = (const __le32 *)
3414 		(adev->gfx.mec_fw->data +
3415 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3416 	tmp = 0;
3417 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3418 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3419 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3420 
3421 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3422 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3423 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3424 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3425 
3426 	/* MEC1 */
3427 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3428 			 mec_hdr->jt_offset);
3429 	for (i = 0; i < mec_hdr->jt_size; i++)
3430 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3431 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3432 
3433 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3434 			adev->gfx.mec_fw_version);
3435 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3436 
3437 	return 0;
3438 }
3439 
3440 /* KIQ functions */
3441 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3442 {
3443 	uint32_t tmp;
3444 	struct amdgpu_device *adev = ring->adev;
3445 
3446 	/* tell RLC which is KIQ queue */
3447 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3448 	tmp &= 0xffffff00;
3449 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3450 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3451 	tmp |= 0x80;
3452 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3453 }
3454 
3455 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3456 {
3457 	struct amdgpu_device *adev = ring->adev;
3458 
3459 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3460 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3461 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3462 			mqd->cp_hqd_queue_priority =
3463 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3464 		}
3465 	}
3466 }
3467 
3468 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3469 {
3470 	struct amdgpu_device *adev = ring->adev;
3471 	struct v9_mqd *mqd = ring->mqd_ptr;
3472 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3473 	uint32_t tmp;
3474 
3475 	mqd->header = 0xC0310800;
3476 	mqd->compute_pipelinestat_enable = 0x00000001;
3477 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3478 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3479 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3480 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3481 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3482 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3483 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3484 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3485 	mqd->compute_misc_reserved = 0x00000003;
3486 
3487 	mqd->dynamic_cu_mask_addr_lo =
3488 		lower_32_bits(ring->mqd_gpu_addr
3489 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3490 	mqd->dynamic_cu_mask_addr_hi =
3491 		upper_32_bits(ring->mqd_gpu_addr
3492 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3493 
3494 	eop_base_addr = ring->eop_gpu_addr >> 8;
3495 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3496 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3497 
3498 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3499 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3500 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3501 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3502 
3503 	mqd->cp_hqd_eop_control = tmp;
3504 
3505 	/* enable doorbell? */
3506 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3507 
3508 	if (ring->use_doorbell) {
3509 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3510 				    DOORBELL_OFFSET, ring->doorbell_index);
3511 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3512 				    DOORBELL_EN, 1);
3513 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3514 				    DOORBELL_SOURCE, 0);
3515 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3516 				    DOORBELL_HIT, 0);
3517 	} else {
3518 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3519 					 DOORBELL_EN, 0);
3520 	}
3521 
3522 	mqd->cp_hqd_pq_doorbell_control = tmp;
3523 
3524 	/* disable the queue if it's active */
3525 	ring->wptr = 0;
3526 	mqd->cp_hqd_dequeue_request = 0;
3527 	mqd->cp_hqd_pq_rptr = 0;
3528 	mqd->cp_hqd_pq_wptr_lo = 0;
3529 	mqd->cp_hqd_pq_wptr_hi = 0;
3530 
3531 	/* set the pointer to the MQD */
3532 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3533 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3534 
3535 	/* set MQD vmid to 0 */
3536 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3537 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3538 	mqd->cp_mqd_control = tmp;
3539 
3540 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3541 	hqd_gpu_addr = ring->gpu_addr >> 8;
3542 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3543 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3544 
3545 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3546 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3547 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3548 			    (order_base_2(ring->ring_size / 4) - 1));
3549 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3550 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3551 #ifdef __BIG_ENDIAN
3552 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3553 #endif
3554 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3555 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3556 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3557 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3558 	mqd->cp_hqd_pq_control = tmp;
3559 
3560 	/* set the wb address whether it's enabled or not */
3561 	wb_gpu_addr = ring->rptr_gpu_addr;
3562 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3563 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3564 		upper_32_bits(wb_gpu_addr) & 0xffff;
3565 
3566 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3567 	wb_gpu_addr = ring->wptr_gpu_addr;
3568 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3569 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3570 
3571 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3572 	ring->wptr = 0;
3573 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3574 
3575 	/* set the vmid for the queue */
3576 	mqd->cp_hqd_vmid = 0;
3577 
3578 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3579 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3580 	mqd->cp_hqd_persistent_state = tmp;
3581 
3582 	/* set MIN_IB_AVAIL_SIZE */
3583 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3584 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3585 	mqd->cp_hqd_ib_control = tmp;
3586 
3587 	/* set static priority for a queue/ring */
3588 	gfx_v9_0_mqd_set_priority(ring, mqd);
3589 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3590 
3591 	/* map_queues packet doesn't need activate the queue,
3592 	 * so only kiq need set this field.
3593 	 */
3594 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3595 		mqd->cp_hqd_active = 1;
3596 
3597 	return 0;
3598 }
3599 
3600 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3601 {
3602 	struct amdgpu_device *adev = ring->adev;
3603 	struct v9_mqd *mqd = ring->mqd_ptr;
3604 	int j;
3605 
3606 	/* disable wptr polling */
3607 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3608 
3609 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3610 	       mqd->cp_hqd_eop_base_addr_lo);
3611 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3612 	       mqd->cp_hqd_eop_base_addr_hi);
3613 
3614 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3615 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3616 	       mqd->cp_hqd_eop_control);
3617 
3618 	/* enable doorbell? */
3619 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3620 	       mqd->cp_hqd_pq_doorbell_control);
3621 
3622 	/* disable the queue if it's active */
3623 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3624 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3625 		for (j = 0; j < adev->usec_timeout; j++) {
3626 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3627 				break;
3628 			udelay(1);
3629 		}
3630 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3631 		       mqd->cp_hqd_dequeue_request);
3632 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3633 		       mqd->cp_hqd_pq_rptr);
3634 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3635 		       mqd->cp_hqd_pq_wptr_lo);
3636 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3637 		       mqd->cp_hqd_pq_wptr_hi);
3638 	}
3639 
3640 	/* set the pointer to the MQD */
3641 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3642 	       mqd->cp_mqd_base_addr_lo);
3643 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3644 	       mqd->cp_mqd_base_addr_hi);
3645 
3646 	/* set MQD vmid to 0 */
3647 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3648 	       mqd->cp_mqd_control);
3649 
3650 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3651 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3652 	       mqd->cp_hqd_pq_base_lo);
3653 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3654 	       mqd->cp_hqd_pq_base_hi);
3655 
3656 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3657 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3658 	       mqd->cp_hqd_pq_control);
3659 
3660 	/* set the wb address whether it's enabled or not */
3661 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3662 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3663 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3664 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3665 
3666 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3667 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3668 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3669 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3670 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3671 
3672 	/* enable the doorbell if requested */
3673 	if (ring->use_doorbell) {
3674 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3675 					(adev->doorbell_index.kiq * 2) << 2);
3676 		/* If GC has entered CGPG, ringing doorbell > first page
3677 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3678 		 * workaround this issue. And this change has to align with firmware
3679 		 * update.
3680 		 */
3681 		if (check_if_enlarge_doorbell_range(adev))
3682 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3683 					(adev->doorbell.size - 4));
3684 		else
3685 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3686 					(adev->doorbell_index.userqueue_end * 2) << 2);
3687 	}
3688 
3689 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3690 	       mqd->cp_hqd_pq_doorbell_control);
3691 
3692 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3693 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3694 	       mqd->cp_hqd_pq_wptr_lo);
3695 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3696 	       mqd->cp_hqd_pq_wptr_hi);
3697 
3698 	/* set the vmid for the queue */
3699 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3700 
3701 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3702 	       mqd->cp_hqd_persistent_state);
3703 
3704 	/* activate the queue */
3705 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3706 	       mqd->cp_hqd_active);
3707 
3708 	if (ring->use_doorbell)
3709 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3710 
3711 	return 0;
3712 }
3713 
3714 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3715 {
3716 	struct amdgpu_device *adev = ring->adev;
3717 	int j;
3718 
3719 	/* disable the queue if it's active */
3720 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3721 
3722 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3723 
3724 		for (j = 0; j < adev->usec_timeout; j++) {
3725 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3726 				break;
3727 			udelay(1);
3728 		}
3729 
3730 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3731 			DRM_DEBUG("KIQ dequeue request failed.\n");
3732 
3733 			/* Manual disable if dequeue request times out */
3734 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3735 		}
3736 
3737 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3738 		      0);
3739 	}
3740 
3741 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3742 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3743 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3744 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3745 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3746 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3747 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3748 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3749 
3750 	return 0;
3751 }
3752 
3753 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3754 {
3755 	struct amdgpu_device *adev = ring->adev;
3756 	struct v9_mqd *mqd = ring->mqd_ptr;
3757 	struct v9_mqd *tmp_mqd;
3758 
3759 	gfx_v9_0_kiq_setting(ring);
3760 
3761 	/* GPU could be in bad state during probe, driver trigger the reset
3762 	 * after load the SMU, in this case , the mqd is not be initialized.
3763 	 * driver need to re-init the mqd.
3764 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3765 	 */
3766 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3767 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3768 		/* for GPU_RESET case , reset MQD to a clean status */
3769 		if (adev->gfx.kiq[0].mqd_backup)
3770 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3771 
3772 		/* reset ring buffer */
3773 		ring->wptr = 0;
3774 		amdgpu_ring_clear_ring(ring);
3775 
3776 		mutex_lock(&adev->srbm_mutex);
3777 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3778 		gfx_v9_0_kiq_init_register(ring);
3779 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3780 		mutex_unlock(&adev->srbm_mutex);
3781 	} else {
3782 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3783 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3784 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3785 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3786 			amdgpu_ring_clear_ring(ring);
3787 		mutex_lock(&adev->srbm_mutex);
3788 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3789 		gfx_v9_0_mqd_init(ring);
3790 		gfx_v9_0_kiq_init_register(ring);
3791 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3792 		mutex_unlock(&adev->srbm_mutex);
3793 
3794 		if (adev->gfx.kiq[0].mqd_backup)
3795 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3796 	}
3797 
3798 	return 0;
3799 }
3800 
3801 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3802 {
3803 	struct amdgpu_device *adev = ring->adev;
3804 	struct v9_mqd *mqd = ring->mqd_ptr;
3805 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3806 	struct v9_mqd *tmp_mqd;
3807 
3808 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3809 	 * is not be initialized before
3810 	 */
3811 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3812 
3813 	if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3814 	    (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3815 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3816 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3817 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3818 		mutex_lock(&adev->srbm_mutex);
3819 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3820 		gfx_v9_0_mqd_init(ring);
3821 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3822 		mutex_unlock(&adev->srbm_mutex);
3823 
3824 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3825 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3826 	} else {
3827 		/* restore MQD to a clean status */
3828 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3829 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3830 		/* reset ring buffer */
3831 		ring->wptr = 0;
3832 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3833 		amdgpu_ring_clear_ring(ring);
3834 	}
3835 
3836 	return 0;
3837 }
3838 
3839 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3840 {
3841 	struct amdgpu_ring *ring;
3842 	int r;
3843 
3844 	ring = &adev->gfx.kiq[0].ring;
3845 
3846 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3847 	if (unlikely(r != 0))
3848 		return r;
3849 
3850 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3851 	if (unlikely(r != 0)) {
3852 		amdgpu_bo_unreserve(ring->mqd_obj);
3853 		return r;
3854 	}
3855 
3856 	gfx_v9_0_kiq_init_queue(ring);
3857 	amdgpu_bo_kunmap(ring->mqd_obj);
3858 	ring->mqd_ptr = NULL;
3859 	amdgpu_bo_unreserve(ring->mqd_obj);
3860 	return 0;
3861 }
3862 
3863 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3864 {
3865 	struct amdgpu_ring *ring = NULL;
3866 	int r = 0, i;
3867 
3868 	gfx_v9_0_cp_compute_enable(adev, true);
3869 
3870 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3871 		ring = &adev->gfx.compute_ring[i];
3872 
3873 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3874 		if (unlikely(r != 0))
3875 			goto done;
3876 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3877 		if (!r) {
3878 			r = gfx_v9_0_kcq_init_queue(ring, false);
3879 			amdgpu_bo_kunmap(ring->mqd_obj);
3880 			ring->mqd_ptr = NULL;
3881 		}
3882 		amdgpu_bo_unreserve(ring->mqd_obj);
3883 		if (r)
3884 			goto done;
3885 	}
3886 
3887 	r = amdgpu_gfx_enable_kcq(adev, 0);
3888 done:
3889 	return r;
3890 }
3891 
3892 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3893 {
3894 	int r, i;
3895 	struct amdgpu_ring *ring;
3896 
3897 	if (!(adev->flags & AMD_IS_APU))
3898 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3899 
3900 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3901 		if (adev->gfx.num_gfx_rings) {
3902 			/* legacy firmware loading */
3903 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3904 			if (r)
3905 				return r;
3906 		}
3907 
3908 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3909 		if (r)
3910 			return r;
3911 	}
3912 
3913 	r = gfx_v9_0_kiq_resume(adev);
3914 	if (r)
3915 		return r;
3916 
3917 	if (adev->gfx.num_gfx_rings) {
3918 		r = gfx_v9_0_cp_gfx_resume(adev);
3919 		if (r)
3920 			return r;
3921 	}
3922 
3923 	r = gfx_v9_0_kcq_resume(adev);
3924 	if (r)
3925 		return r;
3926 
3927 	if (adev->gfx.num_gfx_rings) {
3928 		ring = &adev->gfx.gfx_ring[0];
3929 		r = amdgpu_ring_test_helper(ring);
3930 		if (r)
3931 			return r;
3932 	}
3933 
3934 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3935 		ring = &adev->gfx.compute_ring[i];
3936 		amdgpu_ring_test_helper(ring);
3937 	}
3938 
3939 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3940 
3941 	return 0;
3942 }
3943 
3944 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3945 {
3946 	u32 tmp;
3947 
3948 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3949 	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3950 		return;
3951 
3952 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3953 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3954 				adev->df.hash_status.hash_64k);
3955 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3956 				adev->df.hash_status.hash_2m);
3957 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3958 				adev->df.hash_status.hash_1g);
3959 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3960 }
3961 
3962 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3963 {
3964 	if (adev->gfx.num_gfx_rings)
3965 		gfx_v9_0_cp_gfx_enable(adev, enable);
3966 	gfx_v9_0_cp_compute_enable(adev, enable);
3967 }
3968 
3969 static int gfx_v9_0_hw_init(void *handle)
3970 {
3971 	int r;
3972 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3973 
3974 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
3975 				       adev->gfx.cleaner_shader_ptr);
3976 
3977 	if (!amdgpu_sriov_vf(adev))
3978 		gfx_v9_0_init_golden_registers(adev);
3979 
3980 	gfx_v9_0_constants_init(adev);
3981 
3982 	gfx_v9_0_init_tcp_config(adev);
3983 
3984 	r = adev->gfx.rlc.funcs->resume(adev);
3985 	if (r)
3986 		return r;
3987 
3988 	r = gfx_v9_0_cp_resume(adev);
3989 	if (r)
3990 		return r;
3991 
3992 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3993 		gfx_v9_4_2_set_power_brake_sequence(adev);
3994 
3995 	return r;
3996 }
3997 
3998 static int gfx_v9_0_hw_fini(void *handle)
3999 {
4000 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4001 
4002 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4003 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4004 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4005 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4006 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4007 
4008 	/* DF freeze and kcq disable will fail */
4009 	if (!amdgpu_ras_intr_triggered())
4010 		/* disable KCQ to avoid CPC touch memory not valid anymore */
4011 		amdgpu_gfx_disable_kcq(adev, 0);
4012 
4013 	if (amdgpu_sriov_vf(adev)) {
4014 		gfx_v9_0_cp_gfx_enable(adev, false);
4015 		/* must disable polling for SRIOV when hw finished, otherwise
4016 		 * CPC engine may still keep fetching WB address which is already
4017 		 * invalid after sw finished and trigger DMAR reading error in
4018 		 * hypervisor side.
4019 		 */
4020 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4021 		return 0;
4022 	}
4023 
4024 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
4025 	 * otherwise KIQ is hanging when binding back
4026 	 */
4027 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4028 		mutex_lock(&adev->srbm_mutex);
4029 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4030 				adev->gfx.kiq[0].ring.pipe,
4031 				adev->gfx.kiq[0].ring.queue, 0, 0);
4032 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4033 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4034 		mutex_unlock(&adev->srbm_mutex);
4035 	}
4036 
4037 	gfx_v9_0_cp_enable(adev, false);
4038 
4039 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4040 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4041 	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4042 		dev_dbg(adev->dev, "Skipping RLC halt\n");
4043 		return 0;
4044 	}
4045 
4046 	adev->gfx.rlc.funcs->stop(adev);
4047 	return 0;
4048 }
4049 
4050 static int gfx_v9_0_suspend(void *handle)
4051 {
4052 	return gfx_v9_0_hw_fini(handle);
4053 }
4054 
4055 static int gfx_v9_0_resume(void *handle)
4056 {
4057 	return gfx_v9_0_hw_init(handle);
4058 }
4059 
4060 static bool gfx_v9_0_is_idle(void *handle)
4061 {
4062 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4063 
4064 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4065 				GRBM_STATUS, GUI_ACTIVE))
4066 		return false;
4067 	else
4068 		return true;
4069 }
4070 
4071 static int gfx_v9_0_wait_for_idle(void *handle)
4072 {
4073 	unsigned i;
4074 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4075 
4076 	for (i = 0; i < adev->usec_timeout; i++) {
4077 		if (gfx_v9_0_is_idle(handle))
4078 			return 0;
4079 		udelay(1);
4080 	}
4081 	return -ETIMEDOUT;
4082 }
4083 
4084 static int gfx_v9_0_soft_reset(void *handle)
4085 {
4086 	u32 grbm_soft_reset = 0;
4087 	u32 tmp;
4088 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4089 
4090 	/* GRBM_STATUS */
4091 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4092 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4093 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4094 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4095 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4096 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4097 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4098 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4099 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4100 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4101 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4102 	}
4103 
4104 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4105 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4106 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4107 	}
4108 
4109 	/* GRBM_STATUS2 */
4110 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4111 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4112 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4113 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4114 
4115 
4116 	if (grbm_soft_reset) {
4117 		/* stop the rlc */
4118 		adev->gfx.rlc.funcs->stop(adev);
4119 
4120 		if (adev->gfx.num_gfx_rings)
4121 			/* Disable GFX parsing/prefetching */
4122 			gfx_v9_0_cp_gfx_enable(adev, false);
4123 
4124 		/* Disable MEC parsing/prefetching */
4125 		gfx_v9_0_cp_compute_enable(adev, false);
4126 
4127 		if (grbm_soft_reset) {
4128 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4129 			tmp |= grbm_soft_reset;
4130 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4131 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4132 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4133 
4134 			udelay(50);
4135 
4136 			tmp &= ~grbm_soft_reset;
4137 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4138 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4139 		}
4140 
4141 		/* Wait a little for things to settle down */
4142 		udelay(50);
4143 	}
4144 	return 0;
4145 }
4146 
4147 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4148 {
4149 	signed long r, cnt = 0;
4150 	unsigned long flags;
4151 	uint32_t seq, reg_val_offs = 0;
4152 	uint64_t value = 0;
4153 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4154 	struct amdgpu_ring *ring = &kiq->ring;
4155 
4156 	BUG_ON(!ring->funcs->emit_rreg);
4157 
4158 	spin_lock_irqsave(&kiq->ring_lock, flags);
4159 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4160 		pr_err("critical bug! too many kiq readers\n");
4161 		goto failed_unlock;
4162 	}
4163 	amdgpu_ring_alloc(ring, 32);
4164 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4165 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4166 				(5 << 8) |	/* dst: memory */
4167 				(1 << 16) |	/* count sel */
4168 				(1 << 20));	/* write confirm */
4169 	amdgpu_ring_write(ring, 0);
4170 	amdgpu_ring_write(ring, 0);
4171 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4172 				reg_val_offs * 4));
4173 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4174 				reg_val_offs * 4));
4175 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4176 	if (r)
4177 		goto failed_undo;
4178 
4179 	amdgpu_ring_commit(ring);
4180 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4181 
4182 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4183 
4184 	/* don't wait anymore for gpu reset case because this way may
4185 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4186 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4187 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4188 	 * gpu_recover() hang there.
4189 	 *
4190 	 * also don't wait anymore for IRQ context
4191 	 * */
4192 	if (r < 1 && (amdgpu_in_reset(adev)))
4193 		goto failed_kiq_read;
4194 
4195 	might_sleep();
4196 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4197 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4198 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4199 	}
4200 
4201 	if (cnt > MAX_KIQ_REG_TRY)
4202 		goto failed_kiq_read;
4203 
4204 	mb();
4205 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4206 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4207 	amdgpu_device_wb_free(adev, reg_val_offs);
4208 	return value;
4209 
4210 failed_undo:
4211 	amdgpu_ring_undo(ring);
4212 failed_unlock:
4213 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4214 failed_kiq_read:
4215 	if (reg_val_offs)
4216 		amdgpu_device_wb_free(adev, reg_val_offs);
4217 	pr_err("failed to read gpu clock\n");
4218 	return ~0;
4219 }
4220 
4221 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4222 {
4223 	uint64_t clock, clock_lo, clock_hi, hi_check;
4224 
4225 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4226 	case IP_VERSION(9, 3, 0):
4227 		preempt_disable();
4228 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4229 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4230 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4231 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4232 		 * roughly every 42 seconds.
4233 		 */
4234 		if (hi_check != clock_hi) {
4235 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4236 			clock_hi = hi_check;
4237 		}
4238 		preempt_enable();
4239 		clock = clock_lo | (clock_hi << 32ULL);
4240 		break;
4241 	default:
4242 		amdgpu_gfx_off_ctrl(adev, false);
4243 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4244 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4245 			    IP_VERSION(9, 0, 1) &&
4246 		    amdgpu_sriov_runtime(adev)) {
4247 			clock = gfx_v9_0_kiq_read_clock(adev);
4248 		} else {
4249 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4250 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4251 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4252 		}
4253 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4254 		amdgpu_gfx_off_ctrl(adev, true);
4255 		break;
4256 	}
4257 	return clock;
4258 }
4259 
4260 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4261 					  uint32_t vmid,
4262 					  uint32_t gds_base, uint32_t gds_size,
4263 					  uint32_t gws_base, uint32_t gws_size,
4264 					  uint32_t oa_base, uint32_t oa_size)
4265 {
4266 	struct amdgpu_device *adev = ring->adev;
4267 
4268 	/* GDS Base */
4269 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4270 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4271 				   gds_base);
4272 
4273 	/* GDS Size */
4274 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4275 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4276 				   gds_size);
4277 
4278 	/* GWS */
4279 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4280 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4281 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4282 
4283 	/* OA */
4284 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4285 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4286 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4287 }
4288 
4289 static const u32 vgpr_init_compute_shader[] =
4290 {
4291 	0xb07c0000, 0xbe8000ff,
4292 	0x000000f8, 0xbf110800,
4293 	0x7e000280, 0x7e020280,
4294 	0x7e040280, 0x7e060280,
4295 	0x7e080280, 0x7e0a0280,
4296 	0x7e0c0280, 0x7e0e0280,
4297 	0x80808800, 0xbe803200,
4298 	0xbf84fff5, 0xbf9c0000,
4299 	0xd28c0001, 0x0001007f,
4300 	0xd28d0001, 0x0002027e,
4301 	0x10020288, 0xb8810904,
4302 	0xb7814000, 0xd1196a01,
4303 	0x00000301, 0xbe800087,
4304 	0xbefc00c1, 0xd89c4000,
4305 	0x00020201, 0xd89cc080,
4306 	0x00040401, 0x320202ff,
4307 	0x00000800, 0x80808100,
4308 	0xbf84fff8, 0x7e020280,
4309 	0xbf810000, 0x00000000,
4310 };
4311 
4312 static const u32 sgpr_init_compute_shader[] =
4313 {
4314 	0xb07c0000, 0xbe8000ff,
4315 	0x0000005f, 0xbee50080,
4316 	0xbe812c65, 0xbe822c65,
4317 	0xbe832c65, 0xbe842c65,
4318 	0xbe852c65, 0xb77c0005,
4319 	0x80808500, 0xbf84fff8,
4320 	0xbe800080, 0xbf810000,
4321 };
4322 
4323 static const u32 vgpr_init_compute_shader_arcturus[] = {
4324 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4325 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4326 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4327 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4328 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4329 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4330 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4331 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4332 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4333 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4334 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4335 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4336 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4337 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4338 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4339 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4340 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4341 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4342 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4343 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4344 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4345 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4346 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4347 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4348 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4349 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4350 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4351 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4352 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4353 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4354 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4355 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4356 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4357 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4358 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4359 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4360 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4361 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4362 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4363 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4364 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4365 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4366 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4367 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4368 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4369 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4370 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4371 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4372 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4373 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4374 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4375 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4376 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4377 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4378 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4379 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4380 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4381 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4382 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4383 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4384 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4385 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4386 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4387 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4388 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4389 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4390 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4391 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4392 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4393 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4394 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4395 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4396 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4397 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4398 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4399 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4400 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4401 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4402 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4403 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4404 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4405 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4406 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4407 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4408 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4409 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4410 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4411 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4412 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4413 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4414 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4415 	0xbf84fff8, 0xbf810000,
4416 };
4417 
4418 /* When below register arrays changed, please update gpr_reg_size,
4419   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4420   to cover all gfx9 ASICs */
4421 static const struct soc15_reg_entry vgpr_init_regs[] = {
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4431    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4432    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4433    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4436 };
4437 
4438 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4453 };
4454 
4455 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4470 };
4471 
4472 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4487 };
4488 
4489 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4490    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4498    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4499    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4500    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4501    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4502    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4503    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4504    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4506    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4507    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4508    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4510    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4511    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4512    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4513    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4514    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4515    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4516    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4517    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4518    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4519    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4520    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4521    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4522    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4523 };
4524 
4525 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4526 {
4527 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4528 	int i, r;
4529 
4530 	/* only support when RAS is enabled */
4531 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4532 		return 0;
4533 
4534 	r = amdgpu_ring_alloc(ring, 7);
4535 	if (r) {
4536 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4537 			ring->name, r);
4538 		return r;
4539 	}
4540 
4541 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4542 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4543 
4544 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4545 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4546 				PACKET3_DMA_DATA_DST_SEL(1) |
4547 				PACKET3_DMA_DATA_SRC_SEL(2) |
4548 				PACKET3_DMA_DATA_ENGINE(0)));
4549 	amdgpu_ring_write(ring, 0);
4550 	amdgpu_ring_write(ring, 0);
4551 	amdgpu_ring_write(ring, 0);
4552 	amdgpu_ring_write(ring, 0);
4553 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4554 				adev->gds.gds_size);
4555 
4556 	amdgpu_ring_commit(ring);
4557 
4558 	for (i = 0; i < adev->usec_timeout; i++) {
4559 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4560 			break;
4561 		udelay(1);
4562 	}
4563 
4564 	if (i >= adev->usec_timeout)
4565 		r = -ETIMEDOUT;
4566 
4567 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4568 
4569 	return r;
4570 }
4571 
4572 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4573 {
4574 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4575 	struct amdgpu_ib ib;
4576 	struct dma_fence *f = NULL;
4577 	int r, i;
4578 	unsigned total_size, vgpr_offset, sgpr_offset;
4579 	u64 gpu_addr;
4580 
4581 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4582 						adev->gfx.config.max_cu_per_sh *
4583 						adev->gfx.config.max_sh_per_se;
4584 	int sgpr_work_group_size = 5;
4585 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4586 	int vgpr_init_shader_size;
4587 	const u32 *vgpr_init_shader_ptr;
4588 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4589 
4590 	/* only support when RAS is enabled */
4591 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4592 		return 0;
4593 
4594 	/* bail if the compute ring is not ready */
4595 	if (!ring->sched.ready)
4596 		return 0;
4597 
4598 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4599 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4600 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4601 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4602 	} else {
4603 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4604 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4605 		vgpr_init_regs_ptr = vgpr_init_regs;
4606 	}
4607 
4608 	total_size =
4609 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4610 	total_size +=
4611 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4612 	total_size +=
4613 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4614 	total_size = ALIGN(total_size, 256);
4615 	vgpr_offset = total_size;
4616 	total_size += ALIGN(vgpr_init_shader_size, 256);
4617 	sgpr_offset = total_size;
4618 	total_size += sizeof(sgpr_init_compute_shader);
4619 
4620 	/* allocate an indirect buffer to put the commands in */
4621 	memset(&ib, 0, sizeof(ib));
4622 	r = amdgpu_ib_get(adev, NULL, total_size,
4623 					AMDGPU_IB_POOL_DIRECT, &ib);
4624 	if (r) {
4625 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4626 		return r;
4627 	}
4628 
4629 	/* load the compute shaders */
4630 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4631 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4632 
4633 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4634 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4635 
4636 	/* init the ib length to 0 */
4637 	ib.length_dw = 0;
4638 
4639 	/* VGPR */
4640 	/* write the register state for the compute dispatch */
4641 	for (i = 0; i < gpr_reg_size; i++) {
4642 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4643 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4644 								- PACKET3_SET_SH_REG_START;
4645 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4646 	}
4647 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4648 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4649 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4650 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4651 							- PACKET3_SET_SH_REG_START;
4652 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4653 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4654 
4655 	/* write dispatch packet */
4656 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4657 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4658 	ib.ptr[ib.length_dw++] = 1; /* y */
4659 	ib.ptr[ib.length_dw++] = 1; /* z */
4660 	ib.ptr[ib.length_dw++] =
4661 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4662 
4663 	/* write CS partial flush packet */
4664 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4665 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4666 
4667 	/* SGPR1 */
4668 	/* write the register state for the compute dispatch */
4669 	for (i = 0; i < gpr_reg_size; i++) {
4670 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4671 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4672 								- PACKET3_SET_SH_REG_START;
4673 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4674 	}
4675 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4676 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4677 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4678 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4679 							- PACKET3_SET_SH_REG_START;
4680 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4681 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4682 
4683 	/* write dispatch packet */
4684 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4685 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4686 	ib.ptr[ib.length_dw++] = 1; /* y */
4687 	ib.ptr[ib.length_dw++] = 1; /* z */
4688 	ib.ptr[ib.length_dw++] =
4689 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4690 
4691 	/* write CS partial flush packet */
4692 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4693 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4694 
4695 	/* SGPR2 */
4696 	/* write the register state for the compute dispatch */
4697 	for (i = 0; i < gpr_reg_size; i++) {
4698 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4699 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4700 								- PACKET3_SET_SH_REG_START;
4701 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4702 	}
4703 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4704 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4705 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4706 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4707 							- PACKET3_SET_SH_REG_START;
4708 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4709 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4710 
4711 	/* write dispatch packet */
4712 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4713 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4714 	ib.ptr[ib.length_dw++] = 1; /* y */
4715 	ib.ptr[ib.length_dw++] = 1; /* z */
4716 	ib.ptr[ib.length_dw++] =
4717 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4718 
4719 	/* write CS partial flush packet */
4720 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4721 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4722 
4723 	/* shedule the ib on the ring */
4724 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4725 	if (r) {
4726 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4727 		goto fail;
4728 	}
4729 
4730 	/* wait for the GPU to finish processing the IB */
4731 	r = dma_fence_wait(f, false);
4732 	if (r) {
4733 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4734 		goto fail;
4735 	}
4736 
4737 fail:
4738 	amdgpu_ib_free(adev, &ib, NULL);
4739 	dma_fence_put(f);
4740 
4741 	return r;
4742 }
4743 
4744 static int gfx_v9_0_early_init(void *handle)
4745 {
4746 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4747 
4748 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4749 
4750 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4751 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4752 		adev->gfx.num_gfx_rings = 0;
4753 	else
4754 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4755 	adev->gfx.xcc_mask = 1;
4756 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4757 					  AMDGPU_MAX_COMPUTE_RINGS);
4758 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4759 	gfx_v9_0_set_ring_funcs(adev);
4760 	gfx_v9_0_set_irq_funcs(adev);
4761 	gfx_v9_0_set_gds_init(adev);
4762 	gfx_v9_0_set_rlc_funcs(adev);
4763 
4764 	/* init rlcg reg access ctrl */
4765 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4766 
4767 	return gfx_v9_0_init_microcode(adev);
4768 }
4769 
4770 static int gfx_v9_0_ecc_late_init(void *handle)
4771 {
4772 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4773 	int r;
4774 
4775 	/*
4776 	 * Temp workaround to fix the issue that CP firmware fails to
4777 	 * update read pointer when CPDMA is writing clearing operation
4778 	 * to GDS in suspend/resume sequence on several cards. So just
4779 	 * limit this operation in cold boot sequence.
4780 	 */
4781 	if ((!adev->in_suspend) &&
4782 	    (adev->gds.gds_size)) {
4783 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4784 		if (r)
4785 			return r;
4786 	}
4787 
4788 	/* requires IBs so do in late init after IB pool is initialized */
4789 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4790 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4791 	else
4792 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4793 
4794 	if (r)
4795 		return r;
4796 
4797 	if (adev->gfx.ras &&
4798 	    adev->gfx.ras->enable_watchdog_timer)
4799 		adev->gfx.ras->enable_watchdog_timer(adev);
4800 
4801 	return 0;
4802 }
4803 
4804 static int gfx_v9_0_late_init(void *handle)
4805 {
4806 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4807 	int r;
4808 
4809 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4810 	if (r)
4811 		return r;
4812 
4813 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4814 	if (r)
4815 		return r;
4816 
4817 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4818 	if (r)
4819 		return r;
4820 
4821 	r = gfx_v9_0_ecc_late_init(handle);
4822 	if (r)
4823 		return r;
4824 
4825 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4826 		gfx_v9_4_2_debug_trap_config_init(adev,
4827 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4828 	else
4829 		gfx_v9_0_debug_trap_config_init(adev,
4830 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4831 
4832 	return 0;
4833 }
4834 
4835 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4836 {
4837 	uint32_t rlc_setting;
4838 
4839 	/* if RLC is not enabled, do nothing */
4840 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4841 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4842 		return false;
4843 
4844 	return true;
4845 }
4846 
4847 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4848 {
4849 	uint32_t data;
4850 	unsigned i;
4851 
4852 	data = RLC_SAFE_MODE__CMD_MASK;
4853 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4854 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4855 
4856 	/* wait for RLC_SAFE_MODE */
4857 	for (i = 0; i < adev->usec_timeout; i++) {
4858 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4859 			break;
4860 		udelay(1);
4861 	}
4862 }
4863 
4864 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4865 {
4866 	uint32_t data;
4867 
4868 	data = RLC_SAFE_MODE__CMD_MASK;
4869 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4870 }
4871 
4872 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4873 						bool enable)
4874 {
4875 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4876 
4877 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4878 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4879 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4880 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4881 	} else {
4882 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4883 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4884 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4885 	}
4886 
4887 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4888 }
4889 
4890 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4891 						bool enable)
4892 {
4893 	/* TODO: double check if we need to perform under safe mode */
4894 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4895 
4896 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4897 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4898 	else
4899 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4900 
4901 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4902 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4903 	else
4904 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4905 
4906 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4907 }
4908 
4909 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4910 						      bool enable)
4911 {
4912 	uint32_t data, def;
4913 
4914 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4915 
4916 	/* It is disabled by HW by default */
4917 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4918 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4919 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4920 
4921 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4922 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4923 
4924 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4925 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4926 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4927 
4928 		/* only for Vega10 & Raven1 */
4929 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4930 
4931 		if (def != data)
4932 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4933 
4934 		/* MGLS is a global flag to control all MGLS in GFX */
4935 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4936 			/* 2 - RLC memory Light sleep */
4937 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4938 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4939 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4940 				if (def != data)
4941 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4942 			}
4943 			/* 3 - CP memory Light sleep */
4944 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4945 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4946 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4947 				if (def != data)
4948 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4949 			}
4950 		}
4951 	} else {
4952 		/* 1 - MGCG_OVERRIDE */
4953 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4954 
4955 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4956 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4957 
4958 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4959 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4960 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4961 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4962 
4963 		if (def != data)
4964 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4965 
4966 		/* 2 - disable MGLS in RLC */
4967 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4968 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4969 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4970 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4971 		}
4972 
4973 		/* 3 - disable MGLS in CP */
4974 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4975 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4976 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4977 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4978 		}
4979 	}
4980 
4981 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4982 }
4983 
4984 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4985 					   bool enable)
4986 {
4987 	uint32_t data, def;
4988 
4989 	if (!adev->gfx.num_gfx_rings)
4990 		return;
4991 
4992 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4993 
4994 	/* Enable 3D CGCG/CGLS */
4995 	if (enable) {
4996 		/* write cmd to clear cgcg/cgls ov */
4997 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4998 		/* unset CGCG override */
4999 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5000 		/* update CGCG and CGLS override bits */
5001 		if (def != data)
5002 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5003 
5004 		/* enable 3Dcgcg FSM(0x0000363f) */
5005 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5006 
5007 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5008 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5009 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5010 		else
5011 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5012 
5013 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5014 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5015 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5016 		if (def != data)
5017 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5018 
5019 		/* set IDLE_POLL_COUNT(0x00900100) */
5020 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5021 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5022 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5023 		if (def != data)
5024 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5025 	} else {
5026 		/* Disable CGCG/CGLS */
5027 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5028 		/* disable cgcg, cgls should be disabled */
5029 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5030 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5031 		/* disable cgcg and cgls in FSM */
5032 		if (def != data)
5033 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5034 	}
5035 
5036 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5037 }
5038 
5039 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5040 						      bool enable)
5041 {
5042 	uint32_t def, data;
5043 
5044 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5045 
5046 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5047 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5048 		/* unset CGCG override */
5049 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5050 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5051 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5052 		else
5053 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5054 		/* update CGCG and CGLS override bits */
5055 		if (def != data)
5056 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5057 
5058 		/* enable cgcg FSM(0x0000363F) */
5059 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5060 
5061 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5062 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5063 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5064 		else
5065 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5066 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5067 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5068 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5069 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5070 		if (def != data)
5071 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5072 
5073 		/* set IDLE_POLL_COUNT(0x00900100) */
5074 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5075 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5076 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5077 		if (def != data)
5078 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5079 	} else {
5080 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5081 		/* reset CGCG/CGLS bits */
5082 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5083 		/* disable cgcg and cgls in FSM */
5084 		if (def != data)
5085 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5086 	}
5087 
5088 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5089 }
5090 
5091 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5092 					    bool enable)
5093 {
5094 	if (enable) {
5095 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5096 		 * ===  MGCG + MGLS ===
5097 		 */
5098 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5099 		/* ===  CGCG /CGLS for GFX 3D Only === */
5100 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5101 		/* ===  CGCG + CGLS === */
5102 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5103 	} else {
5104 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5105 		 * ===  CGCG + CGLS ===
5106 		 */
5107 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5108 		/* ===  CGCG /CGLS for GFX 3D Only === */
5109 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5110 		/* ===  MGCG + MGLS === */
5111 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5112 	}
5113 	return 0;
5114 }
5115 
5116 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5117 					      unsigned int vmid)
5118 {
5119 	u32 reg, data;
5120 
5121 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5122 	if (amdgpu_sriov_is_pp_one_vf(adev))
5123 		data = RREG32_NO_KIQ(reg);
5124 	else
5125 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5126 
5127 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5128 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5129 
5130 	if (amdgpu_sriov_is_pp_one_vf(adev))
5131 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5132 	else
5133 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5134 }
5135 
5136 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5137 {
5138 	amdgpu_gfx_off_ctrl(adev, false);
5139 
5140 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5141 
5142 	amdgpu_gfx_off_ctrl(adev, true);
5143 }
5144 
5145 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5146 					uint32_t offset,
5147 					struct soc15_reg_rlcg *entries, int arr_size)
5148 {
5149 	int i;
5150 	uint32_t reg;
5151 
5152 	if (!entries)
5153 		return false;
5154 
5155 	for (i = 0; i < arr_size; i++) {
5156 		const struct soc15_reg_rlcg *entry;
5157 
5158 		entry = &entries[i];
5159 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5160 		if (offset == reg)
5161 			return true;
5162 	}
5163 
5164 	return false;
5165 }
5166 
5167 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5168 {
5169 	return gfx_v9_0_check_rlcg_range(adev, offset,
5170 					(void *)rlcg_access_gc_9_0,
5171 					ARRAY_SIZE(rlcg_access_gc_9_0));
5172 }
5173 
5174 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5175 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5176 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5177 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5178 	.init = gfx_v9_0_rlc_init,
5179 	.get_csb_size = gfx_v9_0_get_csb_size,
5180 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5181 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5182 	.resume = gfx_v9_0_rlc_resume,
5183 	.stop = gfx_v9_0_rlc_stop,
5184 	.reset = gfx_v9_0_rlc_reset,
5185 	.start = gfx_v9_0_rlc_start,
5186 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5187 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5188 };
5189 
5190 static int gfx_v9_0_set_powergating_state(void *handle,
5191 					  enum amd_powergating_state state)
5192 {
5193 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5194 	bool enable = (state == AMD_PG_STATE_GATE);
5195 
5196 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5197 	case IP_VERSION(9, 2, 2):
5198 	case IP_VERSION(9, 1, 0):
5199 	case IP_VERSION(9, 3, 0):
5200 		if (!enable)
5201 			amdgpu_gfx_off_ctrl(adev, false);
5202 
5203 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5204 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5205 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5206 		} else {
5207 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5208 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5209 		}
5210 
5211 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5212 			gfx_v9_0_enable_cp_power_gating(adev, true);
5213 		else
5214 			gfx_v9_0_enable_cp_power_gating(adev, false);
5215 
5216 		/* update gfx cgpg state */
5217 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5218 
5219 		/* update mgcg state */
5220 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5221 
5222 		if (enable)
5223 			amdgpu_gfx_off_ctrl(adev, true);
5224 		break;
5225 	case IP_VERSION(9, 2, 1):
5226 		amdgpu_gfx_off_ctrl(adev, enable);
5227 		break;
5228 	default:
5229 		break;
5230 	}
5231 
5232 	return 0;
5233 }
5234 
5235 static int gfx_v9_0_set_clockgating_state(void *handle,
5236 					  enum amd_clockgating_state state)
5237 {
5238 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239 
5240 	if (amdgpu_sriov_vf(adev))
5241 		return 0;
5242 
5243 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5244 	case IP_VERSION(9, 0, 1):
5245 	case IP_VERSION(9, 2, 1):
5246 	case IP_VERSION(9, 4, 0):
5247 	case IP_VERSION(9, 2, 2):
5248 	case IP_VERSION(9, 1, 0):
5249 	case IP_VERSION(9, 4, 1):
5250 	case IP_VERSION(9, 3, 0):
5251 	case IP_VERSION(9, 4, 2):
5252 		gfx_v9_0_update_gfx_clock_gating(adev,
5253 						 state == AMD_CG_STATE_GATE);
5254 		break;
5255 	default:
5256 		break;
5257 	}
5258 	return 0;
5259 }
5260 
5261 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5262 {
5263 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5264 	int data;
5265 
5266 	if (amdgpu_sriov_vf(adev))
5267 		*flags = 0;
5268 
5269 	/* AMD_CG_SUPPORT_GFX_MGCG */
5270 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5271 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5272 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5273 
5274 	/* AMD_CG_SUPPORT_GFX_CGCG */
5275 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5276 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5277 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5278 
5279 	/* AMD_CG_SUPPORT_GFX_CGLS */
5280 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5281 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5282 
5283 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5284 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5285 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5286 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5287 
5288 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5289 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5290 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5291 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5292 
5293 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5294 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5295 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5296 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5297 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5298 
5299 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5300 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5301 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5302 	}
5303 }
5304 
5305 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5306 {
5307 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5308 }
5309 
5310 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5311 {
5312 	struct amdgpu_device *adev = ring->adev;
5313 	u64 wptr;
5314 
5315 	/* XXX check if swapping is necessary on BE */
5316 	if (ring->use_doorbell) {
5317 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5318 	} else {
5319 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5320 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5321 	}
5322 
5323 	return wptr;
5324 }
5325 
5326 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5327 {
5328 	struct amdgpu_device *adev = ring->adev;
5329 
5330 	if (ring->use_doorbell) {
5331 		/* XXX check if swapping is necessary on BE */
5332 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5333 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5334 	} else {
5335 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5336 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5337 	}
5338 }
5339 
5340 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5341 {
5342 	struct amdgpu_device *adev = ring->adev;
5343 	u32 ref_and_mask, reg_mem_engine;
5344 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5345 
5346 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5347 		switch (ring->me) {
5348 		case 1:
5349 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5350 			break;
5351 		case 2:
5352 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5353 			break;
5354 		default:
5355 			return;
5356 		}
5357 		reg_mem_engine = 0;
5358 	} else {
5359 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5360 		reg_mem_engine = 1; /* pfp */
5361 	}
5362 
5363 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5364 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5365 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5366 			      ref_and_mask, ref_and_mask, 0x20);
5367 }
5368 
5369 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5370 					struct amdgpu_job *job,
5371 					struct amdgpu_ib *ib,
5372 					uint32_t flags)
5373 {
5374 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5375 	u32 header, control = 0;
5376 
5377 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5378 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5379 	else
5380 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5381 
5382 	control |= ib->length_dw | (vmid << 24);
5383 
5384 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5385 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5386 
5387 		if (flags & AMDGPU_IB_PREEMPTED)
5388 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5389 
5390 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5391 			gfx_v9_0_ring_emit_de_meta(ring,
5392 						   (!amdgpu_sriov_vf(ring->adev) &&
5393 						   flags & AMDGPU_IB_PREEMPTED) ?
5394 						   true : false,
5395 						   job->gds_size > 0 && job->gds_base != 0);
5396 	}
5397 
5398 	amdgpu_ring_write(ring, header);
5399 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5400 	amdgpu_ring_write(ring,
5401 #ifdef __BIG_ENDIAN
5402 		(2 << 0) |
5403 #endif
5404 		lower_32_bits(ib->gpu_addr));
5405 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5406 	amdgpu_ring_ib_on_emit_cntl(ring);
5407 	amdgpu_ring_write(ring, control);
5408 }
5409 
5410 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5411 				     unsigned offset)
5412 {
5413 	u32 control = ring->ring[offset];
5414 
5415 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5416 	ring->ring[offset] = control;
5417 }
5418 
5419 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5420 					unsigned offset)
5421 {
5422 	struct amdgpu_device *adev = ring->adev;
5423 	void *ce_payload_cpu_addr;
5424 	uint64_t payload_offset, payload_size;
5425 
5426 	payload_size = sizeof(struct v9_ce_ib_state);
5427 
5428 	if (ring->is_mes_queue) {
5429 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5430 					  gfx[0].gfx_meta_data) +
5431 			offsetof(struct v9_gfx_meta_data, ce_payload);
5432 		ce_payload_cpu_addr =
5433 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5434 	} else {
5435 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5436 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5437 	}
5438 
5439 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5440 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5441 	} else {
5442 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5443 		       (ring->buf_mask + 1 - offset) << 2);
5444 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5445 		memcpy((void *)&ring->ring[0],
5446 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5447 		       payload_size);
5448 	}
5449 }
5450 
5451 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5452 					unsigned offset)
5453 {
5454 	struct amdgpu_device *adev = ring->adev;
5455 	void *de_payload_cpu_addr;
5456 	uint64_t payload_offset, payload_size;
5457 
5458 	payload_size = sizeof(struct v9_de_ib_state);
5459 
5460 	if (ring->is_mes_queue) {
5461 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5462 					  gfx[0].gfx_meta_data) +
5463 			offsetof(struct v9_gfx_meta_data, de_payload);
5464 		de_payload_cpu_addr =
5465 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5466 	} else {
5467 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5468 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5469 	}
5470 
5471 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5472 		IB_COMPLETION_STATUS_PREEMPTED;
5473 
5474 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5475 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5476 	} else {
5477 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5478 		       (ring->buf_mask + 1 - offset) << 2);
5479 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5480 		memcpy((void *)&ring->ring[0],
5481 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5482 		       payload_size);
5483 	}
5484 }
5485 
5486 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5487 					  struct amdgpu_job *job,
5488 					  struct amdgpu_ib *ib,
5489 					  uint32_t flags)
5490 {
5491 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5492 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5493 
5494 	/* Currently, there is a high possibility to get wave ID mismatch
5495 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5496 	 * different wave IDs than the GDS expects. This situation happens
5497 	 * randomly when at least 5 compute pipes use GDS ordered append.
5498 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5499 	 * Those are probably bugs somewhere else in the kernel driver.
5500 	 *
5501 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5502 	 * GDS to 0 for this ring (me/pipe).
5503 	 */
5504 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5505 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5506 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5507 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5508 	}
5509 
5510 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5511 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5512 	amdgpu_ring_write(ring,
5513 #ifdef __BIG_ENDIAN
5514 				(2 << 0) |
5515 #endif
5516 				lower_32_bits(ib->gpu_addr));
5517 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5518 	amdgpu_ring_write(ring, control);
5519 }
5520 
5521 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5522 				     u64 seq, unsigned flags)
5523 {
5524 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5525 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5526 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5527 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5528 	uint32_t dw2 = 0;
5529 
5530 	/* RELEASE_MEM - flush caches, send int */
5531 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5532 
5533 	if (writeback) {
5534 		dw2 = EOP_TC_NC_ACTION_EN;
5535 	} else {
5536 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5537 				EOP_TC_MD_ACTION_EN;
5538 	}
5539 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5540 				EVENT_INDEX(5);
5541 	if (exec)
5542 		dw2 |= EOP_EXEC;
5543 
5544 	amdgpu_ring_write(ring, dw2);
5545 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5546 
5547 	/*
5548 	 * the address should be Qword aligned if 64bit write, Dword
5549 	 * aligned if only send 32bit data low (discard data high)
5550 	 */
5551 	if (write64bit)
5552 		BUG_ON(addr & 0x7);
5553 	else
5554 		BUG_ON(addr & 0x3);
5555 	amdgpu_ring_write(ring, lower_32_bits(addr));
5556 	amdgpu_ring_write(ring, upper_32_bits(addr));
5557 	amdgpu_ring_write(ring, lower_32_bits(seq));
5558 	amdgpu_ring_write(ring, upper_32_bits(seq));
5559 	amdgpu_ring_write(ring, 0);
5560 }
5561 
5562 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5563 {
5564 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5565 	uint32_t seq = ring->fence_drv.sync_seq;
5566 	uint64_t addr = ring->fence_drv.gpu_addr;
5567 
5568 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5569 			      lower_32_bits(addr), upper_32_bits(addr),
5570 			      seq, 0xffffffff, 4);
5571 }
5572 
5573 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5574 					unsigned vmid, uint64_t pd_addr)
5575 {
5576 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5577 
5578 	/* compute doesn't have PFP */
5579 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5580 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5581 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5582 		amdgpu_ring_write(ring, 0x0);
5583 	}
5584 }
5585 
5586 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5587 {
5588 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5589 }
5590 
5591 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5592 {
5593 	u64 wptr;
5594 
5595 	/* XXX check if swapping is necessary on BE */
5596 	if (ring->use_doorbell)
5597 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5598 	else
5599 		BUG();
5600 	return wptr;
5601 }
5602 
5603 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5604 {
5605 	struct amdgpu_device *adev = ring->adev;
5606 
5607 	/* XXX check if swapping is necessary on BE */
5608 	if (ring->use_doorbell) {
5609 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5610 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5611 	} else{
5612 		BUG(); /* only DOORBELL method supported on gfx9 now */
5613 	}
5614 }
5615 
5616 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5617 					 u64 seq, unsigned int flags)
5618 {
5619 	struct amdgpu_device *adev = ring->adev;
5620 
5621 	/* we only allocate 32bit for each seq wb address */
5622 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5623 
5624 	/* write fence seq to the "addr" */
5625 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5626 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5627 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5628 	amdgpu_ring_write(ring, lower_32_bits(addr));
5629 	amdgpu_ring_write(ring, upper_32_bits(addr));
5630 	amdgpu_ring_write(ring, lower_32_bits(seq));
5631 
5632 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5633 		/* set register to trigger INT */
5634 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5635 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5636 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5637 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5638 		amdgpu_ring_write(ring, 0);
5639 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5640 	}
5641 }
5642 
5643 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5644 {
5645 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5646 	amdgpu_ring_write(ring, 0);
5647 }
5648 
5649 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5650 {
5651 	struct amdgpu_device *adev = ring->adev;
5652 	struct v9_ce_ib_state ce_payload = {0};
5653 	uint64_t offset, ce_payload_gpu_addr;
5654 	void *ce_payload_cpu_addr;
5655 	int cnt;
5656 
5657 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5658 
5659 	if (ring->is_mes_queue) {
5660 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5661 				  gfx[0].gfx_meta_data) +
5662 			offsetof(struct v9_gfx_meta_data, ce_payload);
5663 		ce_payload_gpu_addr =
5664 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5665 		ce_payload_cpu_addr =
5666 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5667 	} else {
5668 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5669 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5670 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5671 	}
5672 
5673 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5674 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5675 				 WRITE_DATA_DST_SEL(8) |
5676 				 WR_CONFIRM) |
5677 				 WRITE_DATA_CACHE_POLICY(0));
5678 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5679 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5680 
5681 	amdgpu_ring_ib_on_emit_ce(ring);
5682 
5683 	if (resume)
5684 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5685 					   sizeof(ce_payload) >> 2);
5686 	else
5687 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5688 					   sizeof(ce_payload) >> 2);
5689 }
5690 
5691 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5692 {
5693 	int i, r = 0;
5694 	struct amdgpu_device *adev = ring->adev;
5695 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5696 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5697 	unsigned long flags;
5698 
5699 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5700 		return -EINVAL;
5701 
5702 	spin_lock_irqsave(&kiq->ring_lock, flags);
5703 
5704 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5705 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5706 		return -ENOMEM;
5707 	}
5708 
5709 	/* assert preemption condition */
5710 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5711 
5712 	ring->trail_seq += 1;
5713 	amdgpu_ring_alloc(ring, 13);
5714 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5715 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5716 
5717 	/* assert IB preemption, emit the trailing fence */
5718 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5719 				   ring->trail_fence_gpu_addr,
5720 				   ring->trail_seq);
5721 
5722 	amdgpu_ring_commit(kiq_ring);
5723 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5724 
5725 	/* poll the trailing fence */
5726 	for (i = 0; i < adev->usec_timeout; i++) {
5727 		if (ring->trail_seq ==
5728 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5729 			break;
5730 		udelay(1);
5731 	}
5732 
5733 	if (i >= adev->usec_timeout) {
5734 		r = -EINVAL;
5735 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5736 	}
5737 
5738 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5739 	amdgpu_ring_emit_wreg(ring,
5740 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5741 			      0x0);
5742 	amdgpu_ring_commit(ring);
5743 
5744 	/* deassert preemption condition */
5745 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5746 	return r;
5747 }
5748 
5749 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5750 {
5751 	struct amdgpu_device *adev = ring->adev;
5752 	struct v9_de_ib_state de_payload = {0};
5753 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5754 	void *de_payload_cpu_addr;
5755 	int cnt;
5756 
5757 	if (ring->is_mes_queue) {
5758 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5759 				  gfx[0].gfx_meta_data) +
5760 			offsetof(struct v9_gfx_meta_data, de_payload);
5761 		de_payload_gpu_addr =
5762 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5763 		de_payload_cpu_addr =
5764 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5765 
5766 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5767 				  gfx[0].gds_backup) +
5768 			offsetof(struct v9_gfx_meta_data, de_payload);
5769 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5770 	} else {
5771 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5772 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5773 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5774 
5775 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5776 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5777 				 PAGE_SIZE);
5778 	}
5779 
5780 	if (usegds) {
5781 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5782 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5783 	}
5784 
5785 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5786 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5787 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5788 				 WRITE_DATA_DST_SEL(8) |
5789 				 WR_CONFIRM) |
5790 				 WRITE_DATA_CACHE_POLICY(0));
5791 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5792 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5793 
5794 	amdgpu_ring_ib_on_emit_de(ring);
5795 	if (resume)
5796 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5797 					   sizeof(de_payload) >> 2);
5798 	else
5799 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5800 					   sizeof(de_payload) >> 2);
5801 }
5802 
5803 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5804 				   bool secure)
5805 {
5806 	uint32_t v = secure ? FRAME_TMZ : 0;
5807 
5808 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5809 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5810 }
5811 
5812 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5813 {
5814 	uint32_t dw2 = 0;
5815 
5816 	gfx_v9_0_ring_emit_ce_meta(ring,
5817 				   (!amdgpu_sriov_vf(ring->adev) &&
5818 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5819 
5820 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5821 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5822 		/* set load_global_config & load_global_uconfig */
5823 		dw2 |= 0x8001;
5824 		/* set load_cs_sh_regs */
5825 		dw2 |= 0x01000000;
5826 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5827 		dw2 |= 0x10002;
5828 
5829 		/* set load_ce_ram if preamble presented */
5830 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5831 			dw2 |= 0x10000000;
5832 	} else {
5833 		/* still load_ce_ram if this is the first time preamble presented
5834 		 * although there is no context switch happens.
5835 		 */
5836 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5837 			dw2 |= 0x10000000;
5838 	}
5839 
5840 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5841 	amdgpu_ring_write(ring, dw2);
5842 	amdgpu_ring_write(ring, 0);
5843 }
5844 
5845 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5846 						  uint64_t addr)
5847 {
5848 	unsigned ret;
5849 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5850 	amdgpu_ring_write(ring, lower_32_bits(addr));
5851 	amdgpu_ring_write(ring, upper_32_bits(addr));
5852 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5853 	amdgpu_ring_write(ring, 0);
5854 	ret = ring->wptr & ring->buf_mask;
5855 	/* patch dummy value later */
5856 	amdgpu_ring_write(ring, 0);
5857 	return ret;
5858 }
5859 
5860 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5861 				    uint32_t reg_val_offs)
5862 {
5863 	struct amdgpu_device *adev = ring->adev;
5864 
5865 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5866 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5867 				(5 << 8) |	/* dst: memory */
5868 				(1 << 20));	/* write confirm */
5869 	amdgpu_ring_write(ring, reg);
5870 	amdgpu_ring_write(ring, 0);
5871 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5872 				reg_val_offs * 4));
5873 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5874 				reg_val_offs * 4));
5875 }
5876 
5877 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5878 				    uint32_t val)
5879 {
5880 	uint32_t cmd = 0;
5881 
5882 	switch (ring->funcs->type) {
5883 	case AMDGPU_RING_TYPE_GFX:
5884 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5885 		break;
5886 	case AMDGPU_RING_TYPE_KIQ:
5887 		cmd = (1 << 16); /* no inc addr */
5888 		break;
5889 	default:
5890 		cmd = WR_CONFIRM;
5891 		break;
5892 	}
5893 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5894 	amdgpu_ring_write(ring, cmd);
5895 	amdgpu_ring_write(ring, reg);
5896 	amdgpu_ring_write(ring, 0);
5897 	amdgpu_ring_write(ring, val);
5898 }
5899 
5900 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5901 					uint32_t val, uint32_t mask)
5902 {
5903 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5904 }
5905 
5906 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5907 						  uint32_t reg0, uint32_t reg1,
5908 						  uint32_t ref, uint32_t mask)
5909 {
5910 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5911 	struct amdgpu_device *adev = ring->adev;
5912 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5913 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5914 
5915 	if (fw_version_ok)
5916 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5917 				      ref, mask, 0x20);
5918 	else
5919 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5920 							   ref, mask);
5921 }
5922 
5923 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5924 {
5925 	struct amdgpu_device *adev = ring->adev;
5926 	uint32_t value = 0;
5927 
5928 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5929 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5930 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5931 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5932 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5933 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5934 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5935 }
5936 
5937 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5938 						 enum amdgpu_interrupt_state state)
5939 {
5940 	switch (state) {
5941 	case AMDGPU_IRQ_STATE_DISABLE:
5942 	case AMDGPU_IRQ_STATE_ENABLE:
5943 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5944 			       TIME_STAMP_INT_ENABLE,
5945 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5946 		break;
5947 	default:
5948 		break;
5949 	}
5950 }
5951 
5952 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5953 						     int me, int pipe,
5954 						     enum amdgpu_interrupt_state state)
5955 {
5956 	u32 mec_int_cntl, mec_int_cntl_reg;
5957 
5958 	/*
5959 	 * amdgpu controls only the first MEC. That's why this function only
5960 	 * handles the setting of interrupts for this specific MEC. All other
5961 	 * pipes' interrupts are set by amdkfd.
5962 	 */
5963 
5964 	if (me == 1) {
5965 		switch (pipe) {
5966 		case 0:
5967 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5968 			break;
5969 		case 1:
5970 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5971 			break;
5972 		case 2:
5973 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5974 			break;
5975 		case 3:
5976 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5977 			break;
5978 		default:
5979 			DRM_DEBUG("invalid pipe %d\n", pipe);
5980 			return;
5981 		}
5982 	} else {
5983 		DRM_DEBUG("invalid me %d\n", me);
5984 		return;
5985 	}
5986 
5987 	switch (state) {
5988 	case AMDGPU_IRQ_STATE_DISABLE:
5989 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5990 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5991 					     TIME_STAMP_INT_ENABLE, 0);
5992 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5993 		break;
5994 	case AMDGPU_IRQ_STATE_ENABLE:
5995 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5996 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5997 					     TIME_STAMP_INT_ENABLE, 1);
5998 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5999 		break;
6000 	default:
6001 		break;
6002 	}
6003 }
6004 
6005 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6006 				     int me, int pipe)
6007 {
6008 	/*
6009 	 * amdgpu controls only the first MEC. That's why this function only
6010 	 * handles the setting of interrupts for this specific MEC. All other
6011 	 * pipes' interrupts are set by amdkfd.
6012 	 */
6013 	if (me != 1)
6014 		return 0;
6015 
6016 	switch (pipe) {
6017 	case 0:
6018 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6019 	case 1:
6020 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6021 	case 2:
6022 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6023 	case 3:
6024 		return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6025 	default:
6026 		return 0;
6027 	}
6028 }
6029 
6030 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6031 					     struct amdgpu_irq_src *source,
6032 					     unsigned type,
6033 					     enum amdgpu_interrupt_state state)
6034 {
6035 	u32 cp_int_cntl_reg, cp_int_cntl;
6036 	int i, j;
6037 
6038 	switch (state) {
6039 	case AMDGPU_IRQ_STATE_DISABLE:
6040 	case AMDGPU_IRQ_STATE_ENABLE:
6041 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6042 			       PRIV_REG_INT_ENABLE,
6043 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6044 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6045 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6046 				/* MECs start at 1 */
6047 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6048 
6049 				if (cp_int_cntl_reg) {
6050 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6051 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6052 								    PRIV_REG_INT_ENABLE,
6053 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6054 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6055 				}
6056 			}
6057 		}
6058 		break;
6059 	default:
6060 		break;
6061 	}
6062 
6063 	return 0;
6064 }
6065 
6066 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6067 					   struct amdgpu_irq_src *source,
6068 					   unsigned type,
6069 					   enum amdgpu_interrupt_state state)
6070 {
6071 	u32 cp_int_cntl_reg, cp_int_cntl;
6072 	int i, j;
6073 
6074 	switch (state) {
6075 	case AMDGPU_IRQ_STATE_DISABLE:
6076 	case AMDGPU_IRQ_STATE_ENABLE:
6077 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6078 			       OPCODE_ERROR_INT_ENABLE,
6079 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6080 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6081 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6082 				/* MECs start at 1 */
6083 				cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6084 
6085 				if (cp_int_cntl_reg) {
6086 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6087 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6088 								    OPCODE_ERROR_INT_ENABLE,
6089 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6090 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6091 				}
6092 			}
6093 		}
6094 		break;
6095 	default:
6096 		break;
6097 	}
6098 
6099 	return 0;
6100 }
6101 
6102 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6103 					      struct amdgpu_irq_src *source,
6104 					      unsigned type,
6105 					      enum amdgpu_interrupt_state state)
6106 {
6107 	switch (state) {
6108 	case AMDGPU_IRQ_STATE_DISABLE:
6109 	case AMDGPU_IRQ_STATE_ENABLE:
6110 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6111 			       PRIV_INSTR_INT_ENABLE,
6112 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6113 		break;
6114 	default:
6115 		break;
6116 	}
6117 
6118 	return 0;
6119 }
6120 
6121 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
6122 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6123 			CP_ECC_ERROR_INT_ENABLE, 1)
6124 
6125 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
6126 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6127 			CP_ECC_ERROR_INT_ENABLE, 0)
6128 
6129 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6130 					      struct amdgpu_irq_src *source,
6131 					      unsigned type,
6132 					      enum amdgpu_interrupt_state state)
6133 {
6134 	switch (state) {
6135 	case AMDGPU_IRQ_STATE_DISABLE:
6136 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6137 				CP_ECC_ERROR_INT_ENABLE, 0);
6138 		DISABLE_ECC_ON_ME_PIPE(1, 0);
6139 		DISABLE_ECC_ON_ME_PIPE(1, 1);
6140 		DISABLE_ECC_ON_ME_PIPE(1, 2);
6141 		DISABLE_ECC_ON_ME_PIPE(1, 3);
6142 		break;
6143 
6144 	case AMDGPU_IRQ_STATE_ENABLE:
6145 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6146 				CP_ECC_ERROR_INT_ENABLE, 1);
6147 		ENABLE_ECC_ON_ME_PIPE(1, 0);
6148 		ENABLE_ECC_ON_ME_PIPE(1, 1);
6149 		ENABLE_ECC_ON_ME_PIPE(1, 2);
6150 		ENABLE_ECC_ON_ME_PIPE(1, 3);
6151 		break;
6152 	default:
6153 		break;
6154 	}
6155 
6156 	return 0;
6157 }
6158 
6159 
6160 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6161 					    struct amdgpu_irq_src *src,
6162 					    unsigned type,
6163 					    enum amdgpu_interrupt_state state)
6164 {
6165 	switch (type) {
6166 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6167 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6168 		break;
6169 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6170 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6171 		break;
6172 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6173 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6174 		break;
6175 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6176 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6177 		break;
6178 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6179 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6180 		break;
6181 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6182 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6183 		break;
6184 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6185 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6186 		break;
6187 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6188 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6189 		break;
6190 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6191 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6192 		break;
6193 	default:
6194 		break;
6195 	}
6196 	return 0;
6197 }
6198 
6199 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6200 			    struct amdgpu_irq_src *source,
6201 			    struct amdgpu_iv_entry *entry)
6202 {
6203 	int i;
6204 	u8 me_id, pipe_id, queue_id;
6205 	struct amdgpu_ring *ring;
6206 
6207 	DRM_DEBUG("IH: CP EOP\n");
6208 	me_id = (entry->ring_id & 0x0c) >> 2;
6209 	pipe_id = (entry->ring_id & 0x03) >> 0;
6210 	queue_id = (entry->ring_id & 0x70) >> 4;
6211 
6212 	switch (me_id) {
6213 	case 0:
6214 		if (adev->gfx.num_gfx_rings) {
6215 			if (!adev->gfx.mcbp) {
6216 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6217 			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6218 				/* Fence signals are handled on the software rings*/
6219 				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6220 					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6221 			}
6222 		}
6223 		break;
6224 	case 1:
6225 	case 2:
6226 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6227 			ring = &adev->gfx.compute_ring[i];
6228 			/* Per-queue interrupt is supported for MEC starting from VI.
6229 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6230 			  */
6231 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6232 				amdgpu_fence_process(ring);
6233 		}
6234 		break;
6235 	}
6236 	return 0;
6237 }
6238 
6239 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6240 			   struct amdgpu_iv_entry *entry)
6241 {
6242 	u8 me_id, pipe_id, queue_id;
6243 	struct amdgpu_ring *ring;
6244 	int i;
6245 
6246 	me_id = (entry->ring_id & 0x0c) >> 2;
6247 	pipe_id = (entry->ring_id & 0x03) >> 0;
6248 	queue_id = (entry->ring_id & 0x70) >> 4;
6249 
6250 	switch (me_id) {
6251 	case 0:
6252 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6253 		break;
6254 	case 1:
6255 	case 2:
6256 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6257 			ring = &adev->gfx.compute_ring[i];
6258 			if (ring->me == me_id && ring->pipe == pipe_id &&
6259 			    ring->queue == queue_id)
6260 				drm_sched_fault(&ring->sched);
6261 		}
6262 		break;
6263 	}
6264 }
6265 
6266 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6267 				 struct amdgpu_irq_src *source,
6268 				 struct amdgpu_iv_entry *entry)
6269 {
6270 	DRM_ERROR("Illegal register access in command stream\n");
6271 	gfx_v9_0_fault(adev, entry);
6272 	return 0;
6273 }
6274 
6275 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6276 			       struct amdgpu_irq_src *source,
6277 			       struct amdgpu_iv_entry *entry)
6278 {
6279 	DRM_ERROR("Illegal opcode in command stream\n");
6280 	gfx_v9_0_fault(adev, entry);
6281 	return 0;
6282 }
6283 
6284 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6285 				  struct amdgpu_irq_src *source,
6286 				  struct amdgpu_iv_entry *entry)
6287 {
6288 	DRM_ERROR("Illegal instruction in command stream\n");
6289 	gfx_v9_0_fault(adev, entry);
6290 	return 0;
6291 }
6292 
6293 
6294 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6295 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6296 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6297 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6298 	},
6299 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6300 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6301 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6302 	},
6303 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6304 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6305 	  0, 0
6306 	},
6307 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6308 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6309 	  0, 0
6310 	},
6311 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6312 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6313 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6314 	},
6315 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6316 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6317 	  0, 0
6318 	},
6319 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6320 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6321 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6322 	},
6323 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6324 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6325 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6326 	},
6327 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6328 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6329 	  0, 0
6330 	},
6331 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6332 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6333 	  0, 0
6334 	},
6335 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6336 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6337 	  0, 0
6338 	},
6339 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6340 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6341 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6342 	},
6343 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6344 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6345 	  0, 0
6346 	},
6347 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6348 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6349 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6350 	},
6351 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6352 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6353 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6354 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6355 	},
6356 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6357 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6358 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6359 	  0, 0
6360 	},
6361 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6362 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6363 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6364 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6365 	},
6366 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6367 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6368 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6369 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6370 	},
6371 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6372 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6373 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6374 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6375 	},
6376 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6377 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6378 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6379 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6380 	},
6381 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6382 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6383 	  0, 0
6384 	},
6385 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6386 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6387 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6388 	},
6389 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6390 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6391 	  0, 0
6392 	},
6393 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6394 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6395 	  0, 0
6396 	},
6397 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6398 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6399 	  0, 0
6400 	},
6401 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6402 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6403 	  0, 0
6404 	},
6405 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6406 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6407 	  0, 0
6408 	},
6409 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6410 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6411 	  0, 0
6412 	},
6413 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6414 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6415 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6416 	},
6417 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6418 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6419 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6420 	},
6421 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6422 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6423 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6424 	},
6425 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6426 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6427 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6428 	},
6429 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6430 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6431 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6432 	},
6433 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6434 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6435 	  0, 0
6436 	},
6437 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6438 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6439 	  0, 0
6440 	},
6441 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6442 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6443 	  0, 0
6444 	},
6445 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6446 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6447 	  0, 0
6448 	},
6449 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6450 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6451 	  0, 0
6452 	},
6453 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6454 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6455 	  0, 0
6456 	},
6457 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6458 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6459 	  0, 0
6460 	},
6461 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6462 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6463 	  0, 0
6464 	},
6465 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6466 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6467 	  0, 0
6468 	},
6469 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6470 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6471 	  0, 0
6472 	},
6473 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6474 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6475 	  0, 0
6476 	},
6477 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6478 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6479 	  0, 0
6480 	},
6481 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6482 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6483 	  0, 0
6484 	},
6485 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6486 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6487 	  0, 0
6488 	},
6489 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6490 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6491 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6492 	},
6493 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6494 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6495 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6496 	},
6497 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6498 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6499 	  0, 0
6500 	},
6501 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6502 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6503 	  0, 0
6504 	},
6505 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6506 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6507 	  0, 0
6508 	},
6509 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6510 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6511 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6512 	},
6513 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6514 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6515 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6516 	},
6517 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6518 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6519 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6520 	},
6521 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6522 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6523 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6524 	},
6525 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6526 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6527 	  0, 0
6528 	},
6529 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6530 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6531 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6532 	},
6533 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6534 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6535 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6536 	},
6537 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6538 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6539 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6540 	},
6541 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6542 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6543 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6544 	},
6545 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6546 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6547 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6548 	},
6549 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6550 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6551 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6552 	},
6553 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6554 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6555 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6556 	},
6557 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6558 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6559 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6560 	},
6561 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6562 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6563 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6564 	},
6565 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6566 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6567 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6568 	},
6569 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6570 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6571 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6572 	},
6573 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6574 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6575 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6576 	},
6577 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6578 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6579 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6580 	},
6581 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6582 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6583 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6584 	},
6585 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6586 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6587 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6588 	},
6589 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6590 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6591 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6592 	},
6593 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6594 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6595 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6596 	},
6597 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6598 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6599 	  0, 0
6600 	},
6601 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6602 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6603 	  0, 0
6604 	},
6605 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6606 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6607 	  0, 0
6608 	},
6609 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6610 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6611 	  0, 0
6612 	},
6613 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6614 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6615 	  0, 0
6616 	},
6617 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6618 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6619 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6620 	},
6621 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6622 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6623 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6624 	},
6625 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6626 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6627 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6628 	},
6629 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6630 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6631 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6632 	},
6633 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6634 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6635 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6636 	},
6637 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6638 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6639 	  0, 0
6640 	},
6641 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6642 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6643 	  0, 0
6644 	},
6645 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6646 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6647 	  0, 0
6648 	},
6649 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6650 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6651 	  0, 0
6652 	},
6653 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6654 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6655 	  0, 0
6656 	},
6657 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6658 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6659 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6660 	},
6661 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6662 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6663 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6664 	},
6665 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6666 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6667 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6668 	},
6669 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6670 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6671 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6672 	},
6673 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6674 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6675 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6676 	},
6677 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6678 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6679 	  0, 0
6680 	},
6681 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6682 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6683 	  0, 0
6684 	},
6685 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6686 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6687 	  0, 0
6688 	},
6689 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6690 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6691 	  0, 0
6692 	},
6693 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6694 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6695 	  0, 0
6696 	},
6697 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6698 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6699 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6700 	},
6701 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6702 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6703 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6704 	},
6705 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6706 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6707 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6708 	},
6709 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6710 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6711 	  0, 0
6712 	},
6713 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6714 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6715 	  0, 0
6716 	},
6717 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6718 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6719 	  0, 0
6720 	},
6721 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6722 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6723 	  0, 0
6724 	},
6725 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6726 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6727 	  0, 0
6728 	},
6729 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6730 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6731 	  0, 0
6732 	}
6733 };
6734 
6735 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6736 				     void *inject_if, uint32_t instance_mask)
6737 {
6738 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6739 	int ret;
6740 	struct ta_ras_trigger_error_input block_info = { 0 };
6741 
6742 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6743 		return -EINVAL;
6744 
6745 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6746 		return -EINVAL;
6747 
6748 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6749 		return -EPERM;
6750 
6751 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6752 	      info->head.type)) {
6753 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6754 			ras_gfx_subblocks[info->head.sub_block_index].name,
6755 			info->head.type);
6756 		return -EPERM;
6757 	}
6758 
6759 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6760 	      info->head.type)) {
6761 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6762 			ras_gfx_subblocks[info->head.sub_block_index].name,
6763 			info->head.type);
6764 		return -EPERM;
6765 	}
6766 
6767 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6768 	block_info.sub_block_index =
6769 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6770 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6771 	block_info.address = info->address;
6772 	block_info.value = info->value;
6773 
6774 	mutex_lock(&adev->grbm_idx_mutex);
6775 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6776 	mutex_unlock(&adev->grbm_idx_mutex);
6777 
6778 	return ret;
6779 }
6780 
6781 static const char * const vml2_mems[] = {
6782 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6783 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6784 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6785 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6786 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6787 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6788 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6789 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6790 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6791 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6792 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6793 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6794 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6795 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6796 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6797 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6798 };
6799 
6800 static const char * const vml2_walker_mems[] = {
6801 	"UTC_VML2_CACHE_PDE0_MEM0",
6802 	"UTC_VML2_CACHE_PDE0_MEM1",
6803 	"UTC_VML2_CACHE_PDE1_MEM0",
6804 	"UTC_VML2_CACHE_PDE1_MEM1",
6805 	"UTC_VML2_CACHE_PDE2_MEM0",
6806 	"UTC_VML2_CACHE_PDE2_MEM1",
6807 	"UTC_VML2_RDIF_LOG_FIFO",
6808 };
6809 
6810 static const char * const atc_l2_cache_2m_mems[] = {
6811 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6812 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6813 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6814 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6815 };
6816 
6817 static const char *atc_l2_cache_4k_mems[] = {
6818 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6819 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6820 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6821 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6822 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6823 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6824 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6825 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6826 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6827 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6828 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6829 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6830 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6831 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6832 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6833 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6834 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6835 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6836 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6837 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6838 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6839 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6840 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6841 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6842 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6843 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6844 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6845 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6846 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6847 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6848 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6849 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6850 };
6851 
6852 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6853 					 struct ras_err_data *err_data)
6854 {
6855 	uint32_t i, data;
6856 	uint32_t sec_count, ded_count;
6857 
6858 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6859 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6860 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6861 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6862 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6863 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6864 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6865 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6866 
6867 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6868 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6869 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6870 
6871 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6872 		if (sec_count) {
6873 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6874 				"SEC %d\n", i, vml2_mems[i], sec_count);
6875 			err_data->ce_count += sec_count;
6876 		}
6877 
6878 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6879 		if (ded_count) {
6880 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6881 				"DED %d\n", i, vml2_mems[i], ded_count);
6882 			err_data->ue_count += ded_count;
6883 		}
6884 	}
6885 
6886 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6887 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6888 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6889 
6890 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6891 						SEC_COUNT);
6892 		if (sec_count) {
6893 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6894 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6895 			err_data->ce_count += sec_count;
6896 		}
6897 
6898 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6899 						DED_COUNT);
6900 		if (ded_count) {
6901 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6902 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6903 			err_data->ue_count += ded_count;
6904 		}
6905 	}
6906 
6907 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6908 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6909 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6910 
6911 		sec_count = (data & 0x00006000L) >> 0xd;
6912 		if (sec_count) {
6913 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6914 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6915 				sec_count);
6916 			err_data->ce_count += sec_count;
6917 		}
6918 	}
6919 
6920 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6921 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6922 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6923 
6924 		sec_count = (data & 0x00006000L) >> 0xd;
6925 		if (sec_count) {
6926 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6927 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6928 				sec_count);
6929 			err_data->ce_count += sec_count;
6930 		}
6931 
6932 		ded_count = (data & 0x00018000L) >> 0xf;
6933 		if (ded_count) {
6934 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6935 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6936 				ded_count);
6937 			err_data->ue_count += ded_count;
6938 		}
6939 	}
6940 
6941 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6942 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6943 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6944 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6945 
6946 	return 0;
6947 }
6948 
6949 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6950 	const struct soc15_reg_entry *reg,
6951 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6952 	uint32_t *sec_count, uint32_t *ded_count)
6953 {
6954 	uint32_t i;
6955 	uint32_t sec_cnt, ded_cnt;
6956 
6957 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6958 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6959 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6960 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6961 			continue;
6962 
6963 		sec_cnt = (value &
6964 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6965 				gfx_v9_0_ras_fields[i].sec_count_shift;
6966 		if (sec_cnt) {
6967 			dev_info(adev->dev, "GFX SubBlock %s, "
6968 				"Instance[%d][%d], SEC %d\n",
6969 				gfx_v9_0_ras_fields[i].name,
6970 				se_id, inst_id,
6971 				sec_cnt);
6972 			*sec_count += sec_cnt;
6973 		}
6974 
6975 		ded_cnt = (value &
6976 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6977 				gfx_v9_0_ras_fields[i].ded_count_shift;
6978 		if (ded_cnt) {
6979 			dev_info(adev->dev, "GFX SubBlock %s, "
6980 				"Instance[%d][%d], DED %d\n",
6981 				gfx_v9_0_ras_fields[i].name,
6982 				se_id, inst_id,
6983 				ded_cnt);
6984 			*ded_count += ded_cnt;
6985 		}
6986 	}
6987 
6988 	return 0;
6989 }
6990 
6991 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6992 {
6993 	int i, j, k;
6994 
6995 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6996 		return;
6997 
6998 	/* read back registers to clear the counters */
6999 	mutex_lock(&adev->grbm_idx_mutex);
7000 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7001 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7002 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7003 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7004 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7005 			}
7006 		}
7007 	}
7008 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7009 	mutex_unlock(&adev->grbm_idx_mutex);
7010 
7011 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7012 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7013 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7014 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7015 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7016 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7017 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7018 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7019 
7020 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7021 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7022 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7023 	}
7024 
7025 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7026 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7027 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7028 	}
7029 
7030 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7031 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7032 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7033 	}
7034 
7035 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7036 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7037 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7038 	}
7039 
7040 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7041 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7042 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7043 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7044 }
7045 
7046 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7047 					  void *ras_error_status)
7048 {
7049 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7050 	uint32_t sec_count = 0, ded_count = 0;
7051 	uint32_t i, j, k;
7052 	uint32_t reg_value;
7053 
7054 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7055 		return;
7056 
7057 	err_data->ue_count = 0;
7058 	err_data->ce_count = 0;
7059 
7060 	mutex_lock(&adev->grbm_idx_mutex);
7061 
7062 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7063 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7064 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7065 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7066 				reg_value =
7067 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7068 				if (reg_value)
7069 					gfx_v9_0_ras_error_count(adev,
7070 						&gfx_v9_0_edc_counter_regs[i],
7071 						j, k, reg_value,
7072 						&sec_count, &ded_count);
7073 			}
7074 		}
7075 	}
7076 
7077 	err_data->ce_count += sec_count;
7078 	err_data->ue_count += ded_count;
7079 
7080 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7081 	mutex_unlock(&adev->grbm_idx_mutex);
7082 
7083 	gfx_v9_0_query_utc_edc_status(adev, err_data);
7084 }
7085 
7086 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7087 {
7088 	const unsigned int cp_coher_cntl =
7089 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7090 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7091 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7092 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7093 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7094 
7095 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7096 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7097 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7098 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
7099 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
7100 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7101 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
7102 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7103 }
7104 
7105 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7106 					uint32_t pipe, bool enable)
7107 {
7108 	struct amdgpu_device *adev = ring->adev;
7109 	uint32_t val;
7110 	uint32_t wcl_cs_reg;
7111 
7112 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7113 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7114 
7115 	switch (pipe) {
7116 	case 0:
7117 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7118 		break;
7119 	case 1:
7120 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7121 		break;
7122 	case 2:
7123 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7124 		break;
7125 	case 3:
7126 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7127 		break;
7128 	default:
7129 		DRM_DEBUG("invalid pipe %d\n", pipe);
7130 		return;
7131 	}
7132 
7133 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7134 
7135 }
7136 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7137 {
7138 	struct amdgpu_device *adev = ring->adev;
7139 	uint32_t val;
7140 	int i;
7141 
7142 
7143 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7144 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7145 	 * around 25% of gpu resources.
7146 	 */
7147 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7148 	amdgpu_ring_emit_wreg(ring,
7149 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7150 			      val);
7151 
7152 	/* Restrict waves for normal/low priority compute queues as well
7153 	 * to get best QoS for high priority compute jobs.
7154 	 *
7155 	 * amdgpu controls only 1st ME(0-3 CS pipes).
7156 	 */
7157 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7158 		if (i != ring->pipe)
7159 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7160 
7161 	}
7162 }
7163 
7164 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7165 {
7166 	int i;
7167 
7168 	/* Header itself is a NOP packet */
7169 	if (num_nop == 1) {
7170 		amdgpu_ring_write(ring, ring->funcs->nop);
7171 		return;
7172 	}
7173 
7174 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7175 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7176 
7177 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
7178 	for (i = 1; i < num_nop; i++)
7179 		amdgpu_ring_write(ring, ring->funcs->nop);
7180 }
7181 
7182 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7183 {
7184 	struct amdgpu_device *adev = ring->adev;
7185 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7186 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7187 	unsigned long flags;
7188 	u32 tmp;
7189 	int r;
7190 
7191 	if (amdgpu_sriov_vf(adev))
7192 		return -EINVAL;
7193 
7194 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7195 		return -EINVAL;
7196 
7197 	spin_lock_irqsave(&kiq->ring_lock, flags);
7198 
7199 	if (amdgpu_ring_alloc(kiq_ring, 5)) {
7200 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7201 		return -ENOMEM;
7202 	}
7203 
7204 	tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7205 	gfx_v9_0_ring_emit_wreg(kiq_ring,
7206 				 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7207 	amdgpu_ring_commit(kiq_ring);
7208 
7209 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7210 
7211 	r = amdgpu_ring_test_ring(kiq_ring);
7212 	if (r)
7213 		return r;
7214 
7215 	if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7216 		return -ENOMEM;
7217 	gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7218 				 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7219 	gfx_v9_0_ring_emit_reg_wait(ring,
7220 				    SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7221 	gfx_v9_0_ring_emit_wreg(ring,
7222 				SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7223 
7224 	return amdgpu_ring_test_ring(ring);
7225 }
7226 
7227 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7228 			      unsigned int vmid)
7229 {
7230 	struct amdgpu_device *adev = ring->adev;
7231 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7232 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7233 	unsigned long flags;
7234 	int i, r;
7235 
7236 	if (amdgpu_sriov_vf(adev))
7237 		return -EINVAL;
7238 
7239 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7240 		return -EINVAL;
7241 
7242 	spin_lock_irqsave(&kiq->ring_lock, flags);
7243 
7244 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7245 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7246 		return -ENOMEM;
7247 	}
7248 
7249 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7250 				   0, 0);
7251 	amdgpu_ring_commit(kiq_ring);
7252 
7253 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7254 
7255 	r = amdgpu_ring_test_ring(kiq_ring);
7256 	if (r)
7257 		return r;
7258 
7259 	/* make sure dequeue is complete*/
7260 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7261 	mutex_lock(&adev->srbm_mutex);
7262 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7263 	for (i = 0; i < adev->usec_timeout; i++) {
7264 		if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7265 			break;
7266 		udelay(1);
7267 	}
7268 	if (i >= adev->usec_timeout)
7269 		r = -ETIMEDOUT;
7270 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7271 	mutex_unlock(&adev->srbm_mutex);
7272 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7273 	if (r) {
7274 		dev_err(adev->dev, "fail to wait on hqd deactive\n");
7275 		return r;
7276 	}
7277 
7278 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
7279 	if (unlikely(r != 0)){
7280 		dev_err(adev->dev, "fail to resv mqd_obj\n");
7281 		return r;
7282 	}
7283 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7284 	if (!r) {
7285 		r = gfx_v9_0_kcq_init_queue(ring, true);
7286 		amdgpu_bo_kunmap(ring->mqd_obj);
7287 		ring->mqd_ptr = NULL;
7288 	}
7289 	amdgpu_bo_unreserve(ring->mqd_obj);
7290 	if (r) {
7291 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
7292 		return r;
7293 	}
7294 	spin_lock_irqsave(&kiq->ring_lock, flags);
7295 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7296 	if (r) {
7297 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
7298 		return -ENOMEM;
7299 	}
7300 	kiq->pmf->kiq_map_queues(kiq_ring, ring);
7301 	amdgpu_ring_commit(kiq_ring);
7302 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
7303 	r = amdgpu_ring_test_ring(kiq_ring);
7304 	if (r) {
7305 		DRM_ERROR("fail to remap queue\n");
7306 		return r;
7307 	}
7308 	return amdgpu_ring_test_ring(ring);
7309 }
7310 
7311 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7312 {
7313 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7314 	uint32_t i, j, k, reg, index = 0;
7315 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7316 
7317 	if (!adev->gfx.ip_dump_core)
7318 		return;
7319 
7320 	for (i = 0; i < reg_count; i++)
7321 		drm_printf(p, "%-50s \t 0x%08x\n",
7322 			   gc_reg_list_9[i].reg_name,
7323 			   adev->gfx.ip_dump_core[i]);
7324 
7325 	/* print compute queue registers for all instances */
7326 	if (!adev->gfx.ip_dump_compute_queues)
7327 		return;
7328 
7329 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7330 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7331 		   adev->gfx.mec.num_mec,
7332 		   adev->gfx.mec.num_pipe_per_mec,
7333 		   adev->gfx.mec.num_queue_per_pipe);
7334 
7335 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7336 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7337 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7338 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7339 				for (reg = 0; reg < reg_count; reg++) {
7340 					drm_printf(p, "%-50s \t 0x%08x\n",
7341 						   gc_cp_reg_list_9[reg].reg_name,
7342 						   adev->gfx.ip_dump_compute_queues[index + reg]);
7343 				}
7344 				index += reg_count;
7345 			}
7346 		}
7347 	}
7348 
7349 }
7350 
7351 static void gfx_v9_ip_dump(void *handle)
7352 {
7353 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7354 	uint32_t i, j, k, reg, index = 0;
7355 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7356 
7357 	if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7358 		return;
7359 
7360 	amdgpu_gfx_off_ctrl(adev, false);
7361 	for (i = 0; i < reg_count; i++)
7362 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7363 	amdgpu_gfx_off_ctrl(adev, true);
7364 
7365 	/* dump compute queue registers for all instances */
7366 	if (!adev->gfx.ip_dump_compute_queues)
7367 		return;
7368 
7369 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7370 	amdgpu_gfx_off_ctrl(adev, false);
7371 	mutex_lock(&adev->srbm_mutex);
7372 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7373 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7374 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7375 				/* ME0 is for GFX so start from 1 for CP */
7376 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7377 
7378 				for (reg = 0; reg < reg_count; reg++) {
7379 					adev->gfx.ip_dump_compute_queues[index + reg] =
7380 						RREG32(SOC15_REG_ENTRY_OFFSET(
7381 							gc_cp_reg_list_9[reg]));
7382 				}
7383 				index += reg_count;
7384 			}
7385 		}
7386 	}
7387 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7388 	mutex_unlock(&adev->srbm_mutex);
7389 	amdgpu_gfx_off_ctrl(adev, true);
7390 
7391 }
7392 
7393 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7394 {
7395 	/* Emit the cleaner shader */
7396 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7397 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7398 }
7399 
7400 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7401 	.name = "gfx_v9_0",
7402 	.early_init = gfx_v9_0_early_init,
7403 	.late_init = gfx_v9_0_late_init,
7404 	.sw_init = gfx_v9_0_sw_init,
7405 	.sw_fini = gfx_v9_0_sw_fini,
7406 	.hw_init = gfx_v9_0_hw_init,
7407 	.hw_fini = gfx_v9_0_hw_fini,
7408 	.suspend = gfx_v9_0_suspend,
7409 	.resume = gfx_v9_0_resume,
7410 	.is_idle = gfx_v9_0_is_idle,
7411 	.wait_for_idle = gfx_v9_0_wait_for_idle,
7412 	.soft_reset = gfx_v9_0_soft_reset,
7413 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
7414 	.set_powergating_state = gfx_v9_0_set_powergating_state,
7415 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
7416 	.dump_ip_state = gfx_v9_ip_dump,
7417 	.print_ip_state = gfx_v9_ip_print,
7418 };
7419 
7420 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7421 	.type = AMDGPU_RING_TYPE_GFX,
7422 	.align_mask = 0xff,
7423 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7424 	.support_64bit_ptrs = true,
7425 	.secure_submission_supported = true,
7426 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7427 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7428 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7429 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7430 		5 +  /* COND_EXEC */
7431 		7 +  /* PIPELINE_SYNC */
7432 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7433 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7434 		2 + /* VM_FLUSH */
7435 		8 +  /* FENCE for VM_FLUSH */
7436 		20 + /* GDS switch */
7437 		4 + /* double SWITCH_BUFFER,
7438 		       the first COND_EXEC jump to the place just
7439 			   prior to this double SWITCH_BUFFER  */
7440 		5 + /* COND_EXEC */
7441 		7 +	 /*	HDP_flush */
7442 		4 +	 /*	VGT_flush */
7443 		14 + /*	CE_META */
7444 		31 + /*	DE_META */
7445 		3 + /* CNTX_CTRL */
7446 		5 + /* HDP_INVL */
7447 		8 + 8 + /* FENCE x2 */
7448 		2 + /* SWITCH_BUFFER */
7449 		7 + /* gfx_v9_0_emit_mem_sync */
7450 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7451 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7452 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7453 	.emit_fence = gfx_v9_0_ring_emit_fence,
7454 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7455 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7456 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7457 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7458 	.test_ring = gfx_v9_0_ring_test_ring,
7459 	.insert_nop = gfx_v9_ring_insert_nop,
7460 	.pad_ib = amdgpu_ring_generic_pad_ib,
7461 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7462 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7463 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7464 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
7465 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7466 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7467 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7468 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7469 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7470 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7471 	.reset = gfx_v9_0_reset_kgq,
7472 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7473 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7474 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7475 };
7476 
7477 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7478 	.type = AMDGPU_RING_TYPE_GFX,
7479 	.align_mask = 0xff,
7480 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7481 	.support_64bit_ptrs = true,
7482 	.secure_submission_supported = true,
7483 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7484 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7485 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7486 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7487 		5 +  /* COND_EXEC */
7488 		7 +  /* PIPELINE_SYNC */
7489 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7490 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7491 		2 + /* VM_FLUSH */
7492 		8 +  /* FENCE for VM_FLUSH */
7493 		20 + /* GDS switch */
7494 		4 + /* double SWITCH_BUFFER,
7495 		     * the first COND_EXEC jump to the place just
7496 		     * prior to this double SWITCH_BUFFER
7497 		     */
7498 		5 + /* COND_EXEC */
7499 		7 +	 /*	HDP_flush */
7500 		4 +	 /*	VGT_flush */
7501 		14 + /*	CE_META */
7502 		31 + /*	DE_META */
7503 		3 + /* CNTX_CTRL */
7504 		5 + /* HDP_INVL */
7505 		8 + 8 + /* FENCE x2 */
7506 		2 + /* SWITCH_BUFFER */
7507 		7 + /* gfx_v9_0_emit_mem_sync */
7508 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7509 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7510 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7511 	.emit_fence = gfx_v9_0_ring_emit_fence,
7512 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7513 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7514 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7515 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7516 	.test_ring = gfx_v9_0_ring_test_ring,
7517 	.test_ib = gfx_v9_0_ring_test_ib,
7518 	.insert_nop = gfx_v9_ring_insert_nop,
7519 	.pad_ib = amdgpu_ring_generic_pad_ib,
7520 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7521 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7522 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7523 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7524 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7525 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7526 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7527 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7528 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7529 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
7530 	.patch_de = gfx_v9_0_ring_patch_de_meta,
7531 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
7532 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7533 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7534 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7535 };
7536 
7537 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7538 	.type = AMDGPU_RING_TYPE_COMPUTE,
7539 	.align_mask = 0xff,
7540 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7541 	.support_64bit_ptrs = true,
7542 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7543 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7544 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7545 	.emit_frame_size =
7546 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7547 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7548 		5 + /* hdp invalidate */
7549 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7550 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7551 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7552 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7553 		7 + /* gfx_v9_0_emit_mem_sync */
7554 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7555 		15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7556 		2, /* gfx_v9_0_ring_emit_cleaner_shader */
7557 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7558 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7559 	.emit_fence = gfx_v9_0_ring_emit_fence,
7560 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7561 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7562 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7563 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7564 	.test_ring = gfx_v9_0_ring_test_ring,
7565 	.test_ib = gfx_v9_0_ring_test_ib,
7566 	.insert_nop = gfx_v9_ring_insert_nop,
7567 	.pad_ib = amdgpu_ring_generic_pad_ib,
7568 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7569 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7570 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7571 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7572 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7573 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7574 	.reset = gfx_v9_0_reset_kcq,
7575 	.emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7576 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7577 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7578 };
7579 
7580 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7581 	.type = AMDGPU_RING_TYPE_KIQ,
7582 	.align_mask = 0xff,
7583 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7584 	.support_64bit_ptrs = true,
7585 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7586 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7587 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7588 	.emit_frame_size =
7589 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7590 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7591 		5 + /* hdp invalidate */
7592 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7593 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7594 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7595 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7596 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7597 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7598 	.test_ring = gfx_v9_0_ring_test_ring,
7599 	.insert_nop = amdgpu_ring_insert_nop,
7600 	.pad_ib = amdgpu_ring_generic_pad_ib,
7601 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7602 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7603 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7604 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7605 };
7606 
7607 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7608 {
7609 	int i;
7610 
7611 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7612 
7613 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7614 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7615 
7616 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7617 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7618 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7619 	}
7620 
7621 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7622 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7623 }
7624 
7625 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7626 	.set = gfx_v9_0_set_eop_interrupt_state,
7627 	.process = gfx_v9_0_eop_irq,
7628 };
7629 
7630 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7631 	.set = gfx_v9_0_set_priv_reg_fault_state,
7632 	.process = gfx_v9_0_priv_reg_irq,
7633 };
7634 
7635 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7636 	.set = gfx_v9_0_set_bad_op_fault_state,
7637 	.process = gfx_v9_0_bad_op_irq,
7638 };
7639 
7640 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7641 	.set = gfx_v9_0_set_priv_inst_fault_state,
7642 	.process = gfx_v9_0_priv_inst_irq,
7643 };
7644 
7645 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7646 	.set = gfx_v9_0_set_cp_ecc_error_state,
7647 	.process = amdgpu_gfx_cp_ecc_error_irq,
7648 };
7649 
7650 
7651 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7652 {
7653 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7654 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7655 
7656 	adev->gfx.priv_reg_irq.num_types = 1;
7657 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7658 
7659 	adev->gfx.bad_op_irq.num_types = 1;
7660 	adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7661 
7662 	adev->gfx.priv_inst_irq.num_types = 1;
7663 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7664 
7665 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7666 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7667 }
7668 
7669 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7670 {
7671 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7672 	case IP_VERSION(9, 0, 1):
7673 	case IP_VERSION(9, 2, 1):
7674 	case IP_VERSION(9, 4, 0):
7675 	case IP_VERSION(9, 2, 2):
7676 	case IP_VERSION(9, 1, 0):
7677 	case IP_VERSION(9, 4, 1):
7678 	case IP_VERSION(9, 3, 0):
7679 	case IP_VERSION(9, 4, 2):
7680 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7681 		break;
7682 	default:
7683 		break;
7684 	}
7685 }
7686 
7687 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7688 {
7689 	/* init asci gds info */
7690 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7691 	case IP_VERSION(9, 0, 1):
7692 	case IP_VERSION(9, 2, 1):
7693 	case IP_VERSION(9, 4, 0):
7694 		adev->gds.gds_size = 0x10000;
7695 		break;
7696 	case IP_VERSION(9, 2, 2):
7697 	case IP_VERSION(9, 1, 0):
7698 	case IP_VERSION(9, 4, 1):
7699 		adev->gds.gds_size = 0x1000;
7700 		break;
7701 	case IP_VERSION(9, 4, 2):
7702 		/* aldebaran removed all the GDS internal memory,
7703 		 * only support GWS opcode in kernel, like barrier
7704 		 * semaphore.etc */
7705 		adev->gds.gds_size = 0;
7706 		break;
7707 	default:
7708 		adev->gds.gds_size = 0x10000;
7709 		break;
7710 	}
7711 
7712 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7713 	case IP_VERSION(9, 0, 1):
7714 	case IP_VERSION(9, 4, 0):
7715 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7716 		break;
7717 	case IP_VERSION(9, 2, 1):
7718 		adev->gds.gds_compute_max_wave_id = 0x27f;
7719 		break;
7720 	case IP_VERSION(9, 2, 2):
7721 	case IP_VERSION(9, 1, 0):
7722 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7723 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7724 		else
7725 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7726 		break;
7727 	case IP_VERSION(9, 4, 1):
7728 		adev->gds.gds_compute_max_wave_id = 0xfff;
7729 		break;
7730 	case IP_VERSION(9, 4, 2):
7731 		/* deprecated for Aldebaran, no usage at all */
7732 		adev->gds.gds_compute_max_wave_id = 0;
7733 		break;
7734 	default:
7735 		/* this really depends on the chip */
7736 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7737 		break;
7738 	}
7739 
7740 	adev->gds.gws_size = 64;
7741 	adev->gds.oa_size = 16;
7742 }
7743 
7744 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7745 						 u32 bitmap)
7746 {
7747 	u32 data;
7748 
7749 	if (!bitmap)
7750 		return;
7751 
7752 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7753 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7754 
7755 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7756 }
7757 
7758 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7759 {
7760 	u32 data, mask;
7761 
7762 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7763 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7764 
7765 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7766 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7767 
7768 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7769 
7770 	return (~data) & mask;
7771 }
7772 
7773 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7774 				 struct amdgpu_cu_info *cu_info)
7775 {
7776 	int i, j, k, counter, active_cu_number = 0;
7777 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7778 	unsigned disable_masks[4 * 4];
7779 
7780 	if (!adev || !cu_info)
7781 		return -EINVAL;
7782 
7783 	/*
7784 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7785 	 */
7786 	if (adev->gfx.config.max_shader_engines *
7787 		adev->gfx.config.max_sh_per_se > 16)
7788 		return -EINVAL;
7789 
7790 	amdgpu_gfx_parse_disable_cu(disable_masks,
7791 				    adev->gfx.config.max_shader_engines,
7792 				    adev->gfx.config.max_sh_per_se);
7793 
7794 	mutex_lock(&adev->grbm_idx_mutex);
7795 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7796 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7797 			mask = 1;
7798 			ao_bitmap = 0;
7799 			counter = 0;
7800 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7801 			gfx_v9_0_set_user_cu_inactive_bitmap(
7802 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7803 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7804 
7805 			/*
7806 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7807 			 * 4x4 size array, and it's usually suitable for Vega
7808 			 * ASICs which has 4*2 SE/SH layout.
7809 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7810 			 * To mostly reduce the impact, we make it compatible
7811 			 * with current bitmap array as below:
7812 			 *    SE4,SH0 --> bitmap[0][1]
7813 			 *    SE5,SH0 --> bitmap[1][1]
7814 			 *    SE6,SH0 --> bitmap[2][1]
7815 			 *    SE7,SH0 --> bitmap[3][1]
7816 			 */
7817 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7818 
7819 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7820 				if (bitmap & mask) {
7821 					if (counter < adev->gfx.config.max_cu_per_sh)
7822 						ao_bitmap |= mask;
7823 					counter ++;
7824 				}
7825 				mask <<= 1;
7826 			}
7827 			active_cu_number += counter;
7828 			if (i < 2 && j < 2)
7829 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7830 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7831 		}
7832 	}
7833 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7834 	mutex_unlock(&adev->grbm_idx_mutex);
7835 
7836 	cu_info->number = active_cu_number;
7837 	cu_info->ao_cu_mask = ao_cu_mask;
7838 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7839 
7840 	return 0;
7841 }
7842 
7843 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7844 {
7845 	.type = AMD_IP_BLOCK_TYPE_GFX,
7846 	.major = 9,
7847 	.minor = 0,
7848 	.rev = 0,
7849 	.funcs = &gfx_v9_0_ip_funcs,
7850 };
7851