xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 8e1bb4a41aa78d6105e59186af3dcd545fc66e70)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54 
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58 
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133 
134 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
144 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
146 
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151 
152 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
153 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
154 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
155 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
156 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
157 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
158 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
159 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
160 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
161 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
162 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
163 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
164 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
165 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
166 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
167 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
168 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
169 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
170 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
171 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
172 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
173 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
174 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
175 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
177 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
178 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
179 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
180 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
181 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
182 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
183 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
184 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
185 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
186 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
187 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
188 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
189 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
190 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
191 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
192 	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
193 	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
194 	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
195 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
196 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
197 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
198 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
199 	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
200 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
201 	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
202 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
203 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
204 	SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
205 	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
206 	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
207 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
208 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
209 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
210 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
211 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
212 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
213 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
215 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
217 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
218 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
219 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
220 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
221 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
222 	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
223 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
224 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
225 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
226 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
227 	/* cp header registers */
228 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
229 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
230 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
231 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
232 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
233 	/* SE status registers */
234 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
235 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
236 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
237 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
238 };
239 
240 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
241 	/* compute queue registers */
242 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
243 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
244 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
245 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
246 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
247 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
248 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
249 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
250 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
251 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
252 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
253 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
254 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
255 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
256 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
257 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
258 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
259 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
260 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
261 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
262 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
263 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
264 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
265 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
266 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
267 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
268 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
269 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
270 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
271 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
272 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
273 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
274 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
275 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
276 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
277 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
278 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
279 };
280 
281 enum ta_ras_gfx_subblock {
282 	/*CPC*/
283 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
284 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
285 	TA_RAS_BLOCK__GFX_CPC_UCODE,
286 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
287 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
288 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
289 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
290 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
291 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
292 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 	/* CPF*/
294 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
295 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
297 	TA_RAS_BLOCK__GFX_CPF_TAG,
298 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
299 	/* CPG*/
300 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
301 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
303 	TA_RAS_BLOCK__GFX_CPG_TAG,
304 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
305 	/* GDS*/
306 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
307 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
309 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
310 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
311 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
312 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 	/* SPI*/
314 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
315 	/* SQ*/
316 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
317 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
319 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
320 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
321 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
322 	/* SQC (3 ranges)*/
323 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
324 	/* SQC range 0*/
325 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
326 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
327 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
328 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
329 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
330 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
331 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
332 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
333 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
334 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
335 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
336 	/* SQC range 1*/
337 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
338 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
339 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
340 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
341 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
342 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
343 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
344 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
345 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
346 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
347 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
348 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
349 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
350 	/* SQC range 2*/
351 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
352 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
353 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
354 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
355 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
356 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
357 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
358 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
359 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
360 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
361 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
362 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
363 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
364 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
365 	/* TA*/
366 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
367 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
369 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
370 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
371 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
372 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 	/* TCA*/
374 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
375 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
377 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 	/* TCC (5 sub-ranges)*/
379 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
380 	/* TCC range 0*/
381 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
382 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
383 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
384 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
385 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
386 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
387 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
388 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
389 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
390 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 	/* TCC range 1*/
392 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
393 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
395 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
396 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
397 	/* TCC range 2*/
398 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
399 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
401 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
402 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
403 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
404 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
405 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
406 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
407 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
408 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
409 	/* TCC range 3*/
410 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
411 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
413 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
414 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
415 	/* TCC range 4*/
416 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
417 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
418 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
419 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
420 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
421 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
422 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
423 	/* TCI*/
424 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
425 	/* TCP*/
426 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
427 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
429 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
430 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
431 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
432 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
433 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
434 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 	/* TD*/
436 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
437 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
439 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
440 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 	/* EA (3 sub-ranges)*/
442 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
443 	/* EA range 0*/
444 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
445 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
446 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
447 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
448 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
449 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
450 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
451 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
452 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
453 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 	/* EA range 1*/
455 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
456 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
458 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
459 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
460 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
461 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
462 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
463 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 	/* EA range 2*/
465 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
466 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
468 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
469 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
470 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
472 	/* UTC VM L2 bank*/
473 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
474 	/* UTC VM walker*/
475 	TA_RAS_BLOCK__UTC_VML2_WALKER,
476 	/* UTC ATC L2 2MB cache*/
477 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
478 	/* UTC ATC L2 4KB cache*/
479 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
480 	TA_RAS_BLOCK__GFX_MAX
481 };
482 
483 struct ras_gfx_subblock {
484 	unsigned char *name;
485 	int ta_subblock;
486 	int hw_supported_error_type;
487 	int sw_supported_error_type;
488 };
489 
490 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
491 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
492 		#subblock,                                                     \
493 		TA_RAS_BLOCK__##subblock,                                      \
494 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
495 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
496 	}
497 
498 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
499 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
516 			     0),
517 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
518 			     0),
519 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
520 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
521 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
522 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
523 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
524 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
525 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
526 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
527 			     0, 0),
528 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
529 			     0),
530 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
531 			     0, 0),
532 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
533 			     0),
534 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
535 			     0, 0),
536 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
537 			     0),
538 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
539 			     1),
540 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
541 			     0, 0, 0),
542 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
543 			     0),
544 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
545 			     0),
546 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
547 			     0),
548 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
549 			     0),
550 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
551 			     0),
552 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
553 			     0, 0),
554 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
555 			     0),
556 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
557 			     0),
558 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
559 			     0, 0, 0),
560 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
561 			     0),
562 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
563 			     0),
564 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
565 			     0),
566 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
567 			     0),
568 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
569 			     0),
570 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
571 			     0, 0),
572 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
573 			     0),
574 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
575 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
576 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
580 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
581 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
582 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
583 			     1),
584 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
585 			     1),
586 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
587 			     1),
588 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
589 			     0),
590 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
591 			     0),
592 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
593 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
595 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
596 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
597 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
598 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
599 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
600 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
601 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
602 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
603 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
604 			     0),
605 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
606 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
607 			     0),
608 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
609 			     0, 0),
610 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
611 			     0),
612 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
613 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
614 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
615 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
616 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
617 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
618 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
619 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
620 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
621 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
622 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
623 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
624 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
625 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
632 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
643 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
644 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
645 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
646 };
647 
648 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
649 {
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
670 };
671 
672 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
673 {
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
692 };
693 
694 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
695 {
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
707 };
708 
709 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
710 {
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
718 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
719 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
720 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
721 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
722 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
723 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
724 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
725 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
726 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
727 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
728 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
729 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
730 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
731 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
732 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
733 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
734 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
735 };
736 
737 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
738 {
739 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
740 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
741 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
742 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
743 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
744 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
745 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
746 };
747 
748 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
749 {
750 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
751 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
752 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
753 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
754 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
755 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
756 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
757 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
758 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
759 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
760 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
761 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
762 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
763 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
764 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
766 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
767 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
768 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
769 };
770 
771 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
772 {
773 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
774 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
775 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
776 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
777 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
778 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
779 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
780 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
781 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
782 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
783 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
784 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
785 };
786 
787 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
788 {
789 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
790 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
791 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
792 };
793 
794 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
795 {
796 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
797 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
798 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
799 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
800 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
801 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
802 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
803 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
804 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
805 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
806 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
807 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
808 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
809 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
810 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
811 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
812 };
813 
814 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
815 {
816 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
817 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
818 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
819 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
820 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
821 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
822 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
823 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
824 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
825 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
826 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
827 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
828 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
829 };
830 
831 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
832 {
833 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
834 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
835 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
836 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
837 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
838 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
839 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
840 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
841 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
842 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
843 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
844 };
845 
846 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
847 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
848 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
849 };
850 
851 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
852 {
853 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
854 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 };
862 
863 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
864 {
865 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
866 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 };
874 
875 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
876 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
877 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
878 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
879 
880 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
881 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
885 				struct amdgpu_cu_info *cu_info);
886 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
887 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
888 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
889 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
890 					  void *ras_error_status);
891 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
892 				     void *inject_if, uint32_t instance_mask);
893 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
894 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
895 					      unsigned int vmid);
896 
897 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
898 				uint64_t queue_mask)
899 {
900 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
901 	amdgpu_ring_write(kiq_ring,
902 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
903 		/* vmid_mask:0* queue_type:0 (KIQ) */
904 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
905 	amdgpu_ring_write(kiq_ring,
906 			lower_32_bits(queue_mask));	/* queue mask lo */
907 	amdgpu_ring_write(kiq_ring,
908 			upper_32_bits(queue_mask));	/* queue mask hi */
909 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
910 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
911 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
912 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
913 }
914 
915 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
916 				 struct amdgpu_ring *ring)
917 {
918 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
919 	uint64_t wptr_addr = ring->wptr_gpu_addr;
920 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
921 
922 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
923 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
924 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
925 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
926 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
927 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
928 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
929 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
930 			 /*queue_type: normal compute queue */
931 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
932 			 /* alloc format: all_on_one_pipe */
933 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
934 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
935 			 /* num_queues: must be 1 */
936 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
937 	amdgpu_ring_write(kiq_ring,
938 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
939 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
940 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
941 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
942 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
943 }
944 
945 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
946 				   struct amdgpu_ring *ring,
947 				   enum amdgpu_unmap_queues_action action,
948 				   u64 gpu_addr, u64 seq)
949 {
950 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
951 
952 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
953 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
954 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
955 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
956 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
957 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
958 	amdgpu_ring_write(kiq_ring,
959 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
960 
961 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
962 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
963 		amdgpu_ring_write(kiq_ring, 0);
964 		amdgpu_ring_write(kiq_ring, 0);
965 
966 	} else {
967 		amdgpu_ring_write(kiq_ring, 0);
968 		amdgpu_ring_write(kiq_ring, 0);
969 		amdgpu_ring_write(kiq_ring, 0);
970 	}
971 }
972 
973 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
974 				   struct amdgpu_ring *ring,
975 				   u64 addr,
976 				   u64 seq)
977 {
978 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
979 
980 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
981 	amdgpu_ring_write(kiq_ring,
982 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
983 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
984 			  PACKET3_QUERY_STATUS_COMMAND(2));
985 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
986 	amdgpu_ring_write(kiq_ring,
987 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
988 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
989 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
990 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
991 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
992 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
993 }
994 
995 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
996 				uint16_t pasid, uint32_t flush_type,
997 				bool all_hub)
998 {
999 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1000 	amdgpu_ring_write(kiq_ring,
1001 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1002 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1003 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1004 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1005 }
1006 
1007 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1008 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1009 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1010 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1011 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1012 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1013 	.set_resources_size = 8,
1014 	.map_queues_size = 7,
1015 	.unmap_queues_size = 6,
1016 	.query_status_size = 7,
1017 	.invalidate_tlbs_size = 2,
1018 };
1019 
1020 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1021 {
1022 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1023 }
1024 
1025 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1026 {
1027 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1028 	case IP_VERSION(9, 0, 1):
1029 		soc15_program_register_sequence(adev,
1030 						golden_settings_gc_9_0,
1031 						ARRAY_SIZE(golden_settings_gc_9_0));
1032 		soc15_program_register_sequence(adev,
1033 						golden_settings_gc_9_0_vg10,
1034 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1035 		break;
1036 	case IP_VERSION(9, 2, 1):
1037 		soc15_program_register_sequence(adev,
1038 						golden_settings_gc_9_2_1,
1039 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1040 		soc15_program_register_sequence(adev,
1041 						golden_settings_gc_9_2_1_vg12,
1042 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1043 		break;
1044 	case IP_VERSION(9, 4, 0):
1045 		soc15_program_register_sequence(adev,
1046 						golden_settings_gc_9_0,
1047 						ARRAY_SIZE(golden_settings_gc_9_0));
1048 		soc15_program_register_sequence(adev,
1049 						golden_settings_gc_9_0_vg20,
1050 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1051 		break;
1052 	case IP_VERSION(9, 4, 1):
1053 		soc15_program_register_sequence(adev,
1054 						golden_settings_gc_9_4_1_arct,
1055 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1056 		break;
1057 	case IP_VERSION(9, 2, 2):
1058 	case IP_VERSION(9, 1, 0):
1059 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1060 						ARRAY_SIZE(golden_settings_gc_9_1));
1061 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1062 			soc15_program_register_sequence(adev,
1063 							golden_settings_gc_9_1_rv2,
1064 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1065 		else
1066 			soc15_program_register_sequence(adev,
1067 							golden_settings_gc_9_1_rv1,
1068 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1069 		break;
1070 	 case IP_VERSION(9, 3, 0):
1071 		soc15_program_register_sequence(adev,
1072 						golden_settings_gc_9_1_rn,
1073 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1074 		return; /* for renoir, don't need common goldensetting */
1075 	case IP_VERSION(9, 4, 2):
1076 		gfx_v9_4_2_init_golden_registers(adev,
1077 						 adev->smuio.funcs->get_die_id(adev));
1078 		break;
1079 	default:
1080 		break;
1081 	}
1082 
1083 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1084 	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1085 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1086 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1087 }
1088 
1089 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1090 				       bool wc, uint32_t reg, uint32_t val)
1091 {
1092 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1093 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1094 				WRITE_DATA_DST_SEL(0) |
1095 				(wc ? WR_CONFIRM : 0));
1096 	amdgpu_ring_write(ring, reg);
1097 	amdgpu_ring_write(ring, 0);
1098 	amdgpu_ring_write(ring, val);
1099 }
1100 
1101 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1102 				  int mem_space, int opt, uint32_t addr0,
1103 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1104 				  uint32_t inv)
1105 {
1106 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1107 	amdgpu_ring_write(ring,
1108 				 /* memory (1) or register (0) */
1109 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1110 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1111 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1112 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1113 
1114 	if (mem_space)
1115 		BUG_ON(addr0 & 0x3); /* Dword align */
1116 	amdgpu_ring_write(ring, addr0);
1117 	amdgpu_ring_write(ring, addr1);
1118 	amdgpu_ring_write(ring, ref);
1119 	amdgpu_ring_write(ring, mask);
1120 	amdgpu_ring_write(ring, inv); /* poll interval */
1121 }
1122 
1123 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1124 {
1125 	struct amdgpu_device *adev = ring->adev;
1126 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1127 	uint32_t tmp = 0;
1128 	unsigned i;
1129 	int r;
1130 
1131 	WREG32(scratch, 0xCAFEDEAD);
1132 	r = amdgpu_ring_alloc(ring, 3);
1133 	if (r)
1134 		return r;
1135 
1136 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1137 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1138 	amdgpu_ring_write(ring, 0xDEADBEEF);
1139 	amdgpu_ring_commit(ring);
1140 
1141 	for (i = 0; i < adev->usec_timeout; i++) {
1142 		tmp = RREG32(scratch);
1143 		if (tmp == 0xDEADBEEF)
1144 			break;
1145 		udelay(1);
1146 	}
1147 
1148 	if (i >= adev->usec_timeout)
1149 		r = -ETIMEDOUT;
1150 	return r;
1151 }
1152 
1153 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1154 {
1155 	struct amdgpu_device *adev = ring->adev;
1156 	struct amdgpu_ib ib;
1157 	struct dma_fence *f = NULL;
1158 
1159 	unsigned index;
1160 	uint64_t gpu_addr;
1161 	uint32_t tmp;
1162 	long r;
1163 
1164 	r = amdgpu_device_wb_get(adev, &index);
1165 	if (r)
1166 		return r;
1167 
1168 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1169 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1170 	memset(&ib, 0, sizeof(ib));
1171 
1172 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1173 	if (r)
1174 		goto err1;
1175 
1176 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1177 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1178 	ib.ptr[2] = lower_32_bits(gpu_addr);
1179 	ib.ptr[3] = upper_32_bits(gpu_addr);
1180 	ib.ptr[4] = 0xDEADBEEF;
1181 	ib.length_dw = 5;
1182 
1183 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1184 	if (r)
1185 		goto err2;
1186 
1187 	r = dma_fence_wait_timeout(f, false, timeout);
1188 	if (r == 0) {
1189 		r = -ETIMEDOUT;
1190 		goto err2;
1191 	} else if (r < 0) {
1192 		goto err2;
1193 	}
1194 
1195 	tmp = adev->wb.wb[index];
1196 	if (tmp == 0xDEADBEEF)
1197 		r = 0;
1198 	else
1199 		r = -EINVAL;
1200 
1201 err2:
1202 	amdgpu_ib_free(adev, &ib, NULL);
1203 	dma_fence_put(f);
1204 err1:
1205 	amdgpu_device_wb_free(adev, index);
1206 	return r;
1207 }
1208 
1209 
1210 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1211 {
1212 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1213 	amdgpu_ucode_release(&adev->gfx.me_fw);
1214 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1215 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1216 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1217 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1218 
1219 	kfree(adev->gfx.rlc.register_list_format);
1220 }
1221 
1222 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1223 {
1224 	adev->gfx.me_fw_write_wait = false;
1225 	adev->gfx.mec_fw_write_wait = false;
1226 
1227 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1228 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1229 	     (adev->gfx.mec_feature_version < 46) ||
1230 	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1231 	     (adev->gfx.pfp_feature_version < 46)))
1232 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1233 
1234 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1235 	case IP_VERSION(9, 0, 1):
1236 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1237 		    (adev->gfx.me_feature_version >= 42) &&
1238 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1239 		    (adev->gfx.pfp_feature_version >= 42))
1240 			adev->gfx.me_fw_write_wait = true;
1241 
1242 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1243 		    (adev->gfx.mec_feature_version >= 42))
1244 			adev->gfx.mec_fw_write_wait = true;
1245 		break;
1246 	case IP_VERSION(9, 2, 1):
1247 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1248 		    (adev->gfx.me_feature_version >= 44) &&
1249 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1250 		    (adev->gfx.pfp_feature_version >= 44))
1251 			adev->gfx.me_fw_write_wait = true;
1252 
1253 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1254 		    (adev->gfx.mec_feature_version >= 44))
1255 			adev->gfx.mec_fw_write_wait = true;
1256 		break;
1257 	case IP_VERSION(9, 4, 0):
1258 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1259 		    (adev->gfx.me_feature_version >= 44) &&
1260 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1261 		    (adev->gfx.pfp_feature_version >= 44))
1262 			adev->gfx.me_fw_write_wait = true;
1263 
1264 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1265 		    (adev->gfx.mec_feature_version >= 44))
1266 			adev->gfx.mec_fw_write_wait = true;
1267 		break;
1268 	case IP_VERSION(9, 1, 0):
1269 	case IP_VERSION(9, 2, 2):
1270 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1271 		    (adev->gfx.me_feature_version >= 42) &&
1272 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1273 		    (adev->gfx.pfp_feature_version >= 42))
1274 			adev->gfx.me_fw_write_wait = true;
1275 
1276 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1277 		    (adev->gfx.mec_feature_version >= 42))
1278 			adev->gfx.mec_fw_write_wait = true;
1279 		break;
1280 	default:
1281 		adev->gfx.me_fw_write_wait = true;
1282 		adev->gfx.mec_fw_write_wait = true;
1283 		break;
1284 	}
1285 }
1286 
1287 struct amdgpu_gfxoff_quirk {
1288 	u16 chip_vendor;
1289 	u16 chip_device;
1290 	u16 subsys_vendor;
1291 	u16 subsys_device;
1292 	u8 revision;
1293 };
1294 
1295 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1296 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1297 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1298 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1299 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1300 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1301 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1302 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1303 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1304 	{ 0, 0, 0, 0, 0 },
1305 };
1306 
1307 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1308 {
1309 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1310 
1311 	while (p && p->chip_device != 0) {
1312 		if (pdev->vendor == p->chip_vendor &&
1313 		    pdev->device == p->chip_device &&
1314 		    pdev->subsystem_vendor == p->subsys_vendor &&
1315 		    pdev->subsystem_device == p->subsys_device &&
1316 		    pdev->revision == p->revision) {
1317 			return true;
1318 		}
1319 		++p;
1320 	}
1321 	return false;
1322 }
1323 
1324 static bool is_raven_kicker(struct amdgpu_device *adev)
1325 {
1326 	if (adev->pm.fw_version >= 0x41e2b)
1327 		return true;
1328 	else
1329 		return false;
1330 }
1331 
1332 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1333 {
1334 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1335 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1336 	    (adev->gfx.me_feature_version >= 52))
1337 		return true;
1338 	else
1339 		return false;
1340 }
1341 
1342 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1343 {
1344 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1345 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1346 
1347 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1348 	case IP_VERSION(9, 0, 1):
1349 	case IP_VERSION(9, 2, 1):
1350 	case IP_VERSION(9, 4, 0):
1351 		break;
1352 	case IP_VERSION(9, 2, 2):
1353 	case IP_VERSION(9, 1, 0):
1354 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1355 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1356 		    ((!is_raven_kicker(adev) &&
1357 		      adev->gfx.rlc_fw_version < 531) ||
1358 		     (adev->gfx.rlc_feature_version < 1) ||
1359 		     !adev->gfx.rlc.is_rlc_v2_1))
1360 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1361 
1362 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1363 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1364 				AMD_PG_SUPPORT_CP |
1365 				AMD_PG_SUPPORT_RLC_SMU_HS;
1366 		break;
1367 	case IP_VERSION(9, 3, 0):
1368 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1369 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1370 				AMD_PG_SUPPORT_CP |
1371 				AMD_PG_SUPPORT_RLC_SMU_HS;
1372 		break;
1373 	default:
1374 		break;
1375 	}
1376 }
1377 
1378 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1379 					  char *chip_name)
1380 {
1381 	int err;
1382 
1383 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1384 				   "amdgpu/%s_pfp.bin", chip_name);
1385 	if (err)
1386 		goto out;
1387 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1388 
1389 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1390 				   "amdgpu/%s_me.bin", chip_name);
1391 	if (err)
1392 		goto out;
1393 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1394 
1395 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1396 				   "amdgpu/%s_ce.bin", chip_name);
1397 	if (err)
1398 		goto out;
1399 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1400 
1401 out:
1402 	if (err) {
1403 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1404 		amdgpu_ucode_release(&adev->gfx.me_fw);
1405 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1406 	}
1407 	return err;
1408 }
1409 
1410 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1411 				       char *chip_name)
1412 {
1413 	int err;
1414 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1415 	uint16_t version_major;
1416 	uint16_t version_minor;
1417 	uint32_t smu_version;
1418 
1419 	/*
1420 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1421 	 * instead of picasso_rlc.bin.
1422 	 * Judgment method:
1423 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1424 	 *          or revision >= 0xD8 && revision <= 0xDF
1425 	 * otherwise is PCO FP5
1426 	 */
1427 	if (!strcmp(chip_name, "picasso") &&
1428 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1429 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1430 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1431 					   "amdgpu/%s_rlc_am4.bin", chip_name);
1432 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1433 		(smu_version >= 0x41e2b))
1434 		/**
1435 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1436 		*/
1437 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1438 					   "amdgpu/%s_kicker_rlc.bin", chip_name);
1439 	else
1440 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1441 					   "amdgpu/%s_rlc.bin", chip_name);
1442 	if (err)
1443 		goto out;
1444 
1445 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1446 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1447 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1448 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1449 out:
1450 	if (err)
1451 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1452 
1453 	return err;
1454 }
1455 
1456 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1457 {
1458 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1459 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1460 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1461 		return false;
1462 
1463 	return true;
1464 }
1465 
1466 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1467 					      char *chip_name)
1468 {
1469 	int err;
1470 
1471 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1472 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1473 					   "amdgpu/%s_sjt_mec.bin", chip_name);
1474 	else
1475 		err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1476 					   "amdgpu/%s_mec.bin", chip_name);
1477 	if (err)
1478 		goto out;
1479 
1480 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1481 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1482 
1483 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1484 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1485 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1486 						   "amdgpu/%s_sjt_mec2.bin", chip_name);
1487 		else
1488 			err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1489 						   "amdgpu/%s_mec2.bin", chip_name);
1490 		if (!err) {
1491 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1492 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1493 		} else {
1494 			err = 0;
1495 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1496 		}
1497 	} else {
1498 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1499 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1500 	}
1501 
1502 	gfx_v9_0_check_if_need_gfxoff(adev);
1503 	gfx_v9_0_check_fw_write_wait(adev);
1504 
1505 out:
1506 	if (err)
1507 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1508 	return err;
1509 }
1510 
1511 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1512 {
1513 	char ucode_prefix[30];
1514 	int r;
1515 
1516 	DRM_DEBUG("\n");
1517 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1518 
1519 	/* No CPG in Arcturus */
1520 	if (adev->gfx.num_gfx_rings) {
1521 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1522 		if (r)
1523 			return r;
1524 	}
1525 
1526 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1527 	if (r)
1528 		return r;
1529 
1530 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1531 	if (r)
1532 		return r;
1533 
1534 	return r;
1535 }
1536 
1537 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1538 {
1539 	u32 count = 0;
1540 	const struct cs_section_def *sect = NULL;
1541 	const struct cs_extent_def *ext = NULL;
1542 
1543 	/* begin clear state */
1544 	count += 2;
1545 	/* context control state */
1546 	count += 3;
1547 
1548 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1549 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1550 			if (sect->id == SECT_CONTEXT)
1551 				count += 2 + ext->reg_count;
1552 			else
1553 				return 0;
1554 		}
1555 	}
1556 
1557 	/* end clear state */
1558 	count += 2;
1559 	/* clear state */
1560 	count += 2;
1561 
1562 	return count;
1563 }
1564 
1565 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1566 				    volatile u32 *buffer)
1567 {
1568 	u32 count = 0, i;
1569 	const struct cs_section_def *sect = NULL;
1570 	const struct cs_extent_def *ext = NULL;
1571 
1572 	if (adev->gfx.rlc.cs_data == NULL)
1573 		return;
1574 	if (buffer == NULL)
1575 		return;
1576 
1577 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1578 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1579 
1580 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1581 	buffer[count++] = cpu_to_le32(0x80000000);
1582 	buffer[count++] = cpu_to_le32(0x80000000);
1583 
1584 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1585 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1586 			if (sect->id == SECT_CONTEXT) {
1587 				buffer[count++] =
1588 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1589 				buffer[count++] = cpu_to_le32(ext->reg_index -
1590 						PACKET3_SET_CONTEXT_REG_START);
1591 				for (i = 0; i < ext->reg_count; i++)
1592 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1593 			} else {
1594 				return;
1595 			}
1596 		}
1597 	}
1598 
1599 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1600 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1601 
1602 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1603 	buffer[count++] = cpu_to_le32(0);
1604 }
1605 
1606 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1607 {
1608 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1609 	uint32_t pg_always_on_cu_num = 2;
1610 	uint32_t always_on_cu_num;
1611 	uint32_t i, j, k;
1612 	uint32_t mask, cu_bitmap, counter;
1613 
1614 	if (adev->flags & AMD_IS_APU)
1615 		always_on_cu_num = 4;
1616 	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1617 		always_on_cu_num = 8;
1618 	else
1619 		always_on_cu_num = 12;
1620 
1621 	mutex_lock(&adev->grbm_idx_mutex);
1622 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1623 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1624 			mask = 1;
1625 			cu_bitmap = 0;
1626 			counter = 0;
1627 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1628 
1629 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1630 				if (cu_info->bitmap[0][i][j] & mask) {
1631 					if (counter == pg_always_on_cu_num)
1632 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1633 					if (counter < always_on_cu_num)
1634 						cu_bitmap |= mask;
1635 					else
1636 						break;
1637 					counter++;
1638 				}
1639 				mask <<= 1;
1640 			}
1641 
1642 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1643 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1644 		}
1645 	}
1646 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1647 	mutex_unlock(&adev->grbm_idx_mutex);
1648 }
1649 
1650 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1651 {
1652 	uint32_t data;
1653 
1654 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1655 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1656 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1657 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1658 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1659 
1660 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1661 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1662 
1663 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1664 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1665 
1666 	mutex_lock(&adev->grbm_idx_mutex);
1667 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1668 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1669 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1670 
1671 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1672 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1673 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1674 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1675 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1676 
1677 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1678 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1679 	data &= 0x0000FFFF;
1680 	data |= 0x00C00000;
1681 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1682 
1683 	/*
1684 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1685 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1686 	 */
1687 
1688 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1689 	 * but used for RLC_LB_CNTL configuration */
1690 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1691 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1692 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1693 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1694 	mutex_unlock(&adev->grbm_idx_mutex);
1695 
1696 	gfx_v9_0_init_always_on_cu_mask(adev);
1697 }
1698 
1699 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1700 {
1701 	uint32_t data;
1702 
1703 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1704 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1705 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1706 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1707 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1708 
1709 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1710 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1711 
1712 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1713 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1714 
1715 	mutex_lock(&adev->grbm_idx_mutex);
1716 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1717 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1718 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1719 
1720 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1721 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1722 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1723 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1724 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1725 
1726 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1727 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1728 	data &= 0x0000FFFF;
1729 	data |= 0x00C00000;
1730 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1731 
1732 	/*
1733 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1734 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1735 	 */
1736 
1737 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1738 	 * but used for RLC_LB_CNTL configuration */
1739 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1740 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1741 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1742 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1743 	mutex_unlock(&adev->grbm_idx_mutex);
1744 
1745 	gfx_v9_0_init_always_on_cu_mask(adev);
1746 }
1747 
1748 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1749 {
1750 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1751 }
1752 
1753 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1754 {
1755 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1756 		return 5;
1757 	else
1758 		return 4;
1759 }
1760 
1761 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1762 {
1763 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1764 
1765 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1766 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1767 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1768 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1769 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1770 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1771 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1772 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1773 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1774 }
1775 
1776 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1777 {
1778 	const struct cs_section_def *cs_data;
1779 	int r;
1780 
1781 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1782 
1783 	cs_data = adev->gfx.rlc.cs_data;
1784 
1785 	if (cs_data) {
1786 		/* init clear state block */
1787 		r = amdgpu_gfx_rlc_init_csb(adev);
1788 		if (r)
1789 			return r;
1790 	}
1791 
1792 	if (adev->flags & AMD_IS_APU) {
1793 		/* TODO: double check the cp_table_size for RV */
1794 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1795 		r = amdgpu_gfx_rlc_init_cpt(adev);
1796 		if (r)
1797 			return r;
1798 	}
1799 
1800 	return 0;
1801 }
1802 
1803 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1804 {
1805 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1806 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1807 }
1808 
1809 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1810 {
1811 	int r;
1812 	u32 *hpd;
1813 	const __le32 *fw_data;
1814 	unsigned fw_size;
1815 	u32 *fw;
1816 	size_t mec_hpd_size;
1817 
1818 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1819 
1820 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1821 
1822 	/* take ownership of the relevant compute queues */
1823 	amdgpu_gfx_compute_queue_acquire(adev);
1824 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1825 	if (mec_hpd_size) {
1826 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1827 					      AMDGPU_GEM_DOMAIN_VRAM |
1828 					      AMDGPU_GEM_DOMAIN_GTT,
1829 					      &adev->gfx.mec.hpd_eop_obj,
1830 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1831 					      (void **)&hpd);
1832 		if (r) {
1833 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1834 			gfx_v9_0_mec_fini(adev);
1835 			return r;
1836 		}
1837 
1838 		memset(hpd, 0, mec_hpd_size);
1839 
1840 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1841 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1842 	}
1843 
1844 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1845 
1846 	fw_data = (const __le32 *)
1847 		(adev->gfx.mec_fw->data +
1848 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1849 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1850 
1851 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1852 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1853 				      &adev->gfx.mec.mec_fw_obj,
1854 				      &adev->gfx.mec.mec_fw_gpu_addr,
1855 				      (void **)&fw);
1856 	if (r) {
1857 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1858 		gfx_v9_0_mec_fini(adev);
1859 		return r;
1860 	}
1861 
1862 	memcpy(fw, fw_data, fw_size);
1863 
1864 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1865 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1866 
1867 	return 0;
1868 }
1869 
1870 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1871 {
1872 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1873 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1874 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1875 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1876 		(SQ_IND_INDEX__FORCE_READ_MASK));
1877 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1878 }
1879 
1880 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1881 			   uint32_t wave, uint32_t thread,
1882 			   uint32_t regno, uint32_t num, uint32_t *out)
1883 {
1884 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1885 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1886 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1887 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1888 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1889 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1890 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1891 	while (num--)
1892 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1893 }
1894 
1895 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1896 {
1897 	/* type 1 wave data */
1898 	dst[(*no_fields)++] = 1;
1899 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1900 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1901 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1902 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1903 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1904 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1905 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1906 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1907 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1908 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1909 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1910 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1911 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1912 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1913 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1914 }
1915 
1916 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1917 				     uint32_t wave, uint32_t start,
1918 				     uint32_t size, uint32_t *dst)
1919 {
1920 	wave_read_regs(
1921 		adev, simd, wave, 0,
1922 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1923 }
1924 
1925 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1926 				     uint32_t wave, uint32_t thread,
1927 				     uint32_t start, uint32_t size,
1928 				     uint32_t *dst)
1929 {
1930 	wave_read_regs(
1931 		adev, simd, wave, thread,
1932 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1933 }
1934 
1935 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1936 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1937 {
1938 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1939 }
1940 
1941 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1942         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1943         .select_se_sh = &gfx_v9_0_select_se_sh,
1944         .read_wave_data = &gfx_v9_0_read_wave_data,
1945         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1946         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1947         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1948 };
1949 
1950 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1951 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1952 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1953 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1954 };
1955 
1956 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1957 	.ras_block = {
1958 		.hw_ops = &gfx_v9_0_ras_ops,
1959 	},
1960 };
1961 
1962 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1963 {
1964 	u32 gb_addr_config;
1965 	int err;
1966 
1967 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1968 	case IP_VERSION(9, 0, 1):
1969 		adev->gfx.config.max_hw_contexts = 8;
1970 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1971 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1972 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1973 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1974 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1975 		break;
1976 	case IP_VERSION(9, 2, 1):
1977 		adev->gfx.config.max_hw_contexts = 8;
1978 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1979 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1980 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1981 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1982 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1983 		DRM_INFO("fix gfx.config for vega12\n");
1984 		break;
1985 	case IP_VERSION(9, 4, 0):
1986 		adev->gfx.ras = &gfx_v9_0_ras;
1987 		adev->gfx.config.max_hw_contexts = 8;
1988 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1989 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1990 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1991 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1992 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1993 		gb_addr_config &= ~0xf3e777ff;
1994 		gb_addr_config |= 0x22014042;
1995 		/* check vbios table if gpu info is not available */
1996 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1997 		if (err)
1998 			return err;
1999 		break;
2000 	case IP_VERSION(9, 2, 2):
2001 	case IP_VERSION(9, 1, 0):
2002 		adev->gfx.config.max_hw_contexts = 8;
2003 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2004 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2005 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2006 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2007 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2008 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2009 		else
2010 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2011 		break;
2012 	case IP_VERSION(9, 4, 1):
2013 		adev->gfx.ras = &gfx_v9_4_ras;
2014 		adev->gfx.config.max_hw_contexts = 8;
2015 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2016 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2017 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2018 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2019 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2020 		gb_addr_config &= ~0xf3e777ff;
2021 		gb_addr_config |= 0x22014042;
2022 		break;
2023 	case IP_VERSION(9, 3, 0):
2024 		adev->gfx.config.max_hw_contexts = 8;
2025 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2026 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2027 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2028 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2029 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2030 		gb_addr_config &= ~0xf3e777ff;
2031 		gb_addr_config |= 0x22010042;
2032 		break;
2033 	case IP_VERSION(9, 4, 2):
2034 		adev->gfx.ras = &gfx_v9_4_2_ras;
2035 		adev->gfx.config.max_hw_contexts = 8;
2036 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2037 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2038 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2039 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2040 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2041 		gb_addr_config &= ~0xf3e777ff;
2042 		gb_addr_config |= 0x22014042;
2043 		/* check vbios table if gpu info is not available */
2044 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2045 		if (err)
2046 			return err;
2047 		break;
2048 	default:
2049 		BUG();
2050 		break;
2051 	}
2052 
2053 	adev->gfx.config.gb_addr_config = gb_addr_config;
2054 
2055 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2056 			REG_GET_FIELD(
2057 					adev->gfx.config.gb_addr_config,
2058 					GB_ADDR_CONFIG,
2059 					NUM_PIPES);
2060 
2061 	adev->gfx.config.max_tile_pipes =
2062 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2063 
2064 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2065 			REG_GET_FIELD(
2066 					adev->gfx.config.gb_addr_config,
2067 					GB_ADDR_CONFIG,
2068 					NUM_BANKS);
2069 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2070 			REG_GET_FIELD(
2071 					adev->gfx.config.gb_addr_config,
2072 					GB_ADDR_CONFIG,
2073 					MAX_COMPRESSED_FRAGS);
2074 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2075 			REG_GET_FIELD(
2076 					adev->gfx.config.gb_addr_config,
2077 					GB_ADDR_CONFIG,
2078 					NUM_RB_PER_SE);
2079 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2080 			REG_GET_FIELD(
2081 					adev->gfx.config.gb_addr_config,
2082 					GB_ADDR_CONFIG,
2083 					NUM_SHADER_ENGINES);
2084 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2085 			REG_GET_FIELD(
2086 					adev->gfx.config.gb_addr_config,
2087 					GB_ADDR_CONFIG,
2088 					PIPE_INTERLEAVE_SIZE));
2089 
2090 	return 0;
2091 }
2092 
2093 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2094 				      int mec, int pipe, int queue)
2095 {
2096 	unsigned irq_type;
2097 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2098 	unsigned int hw_prio;
2099 
2100 	ring = &adev->gfx.compute_ring[ring_id];
2101 
2102 	/* mec0 is me1 */
2103 	ring->me = mec + 1;
2104 	ring->pipe = pipe;
2105 	ring->queue = queue;
2106 
2107 	ring->ring_obj = NULL;
2108 	ring->use_doorbell = true;
2109 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2110 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2111 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2112 	ring->vm_hub = AMDGPU_GFXHUB(0);
2113 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2114 
2115 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2116 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2117 		+ ring->pipe;
2118 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2119 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2120 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2121 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2122 				hw_prio, NULL);
2123 }
2124 
2125 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2126 {
2127 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2128 	uint32_t *ptr;
2129 	uint32_t inst;
2130 
2131 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2132 	if (ptr == NULL) {
2133 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2134 		adev->gfx.ip_dump_core = NULL;
2135 	} else {
2136 		adev->gfx.ip_dump_core = ptr;
2137 	}
2138 
2139 	/* Allocate memory for compute queue registers for all the instances */
2140 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2141 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2142 		adev->gfx.mec.num_queue_per_pipe;
2143 
2144 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2145 	if (ptr == NULL) {
2146 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2147 		adev->gfx.ip_dump_compute_queues = NULL;
2148 	} else {
2149 		adev->gfx.ip_dump_compute_queues = ptr;
2150 	}
2151 }
2152 
2153 static int gfx_v9_0_sw_init(void *handle)
2154 {
2155 	int i, j, k, r, ring_id;
2156 	int xcc_id = 0;
2157 	struct amdgpu_ring *ring;
2158 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2159 	unsigned int hw_prio;
2160 
2161 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2162 	case IP_VERSION(9, 0, 1):
2163 	case IP_VERSION(9, 2, 1):
2164 	case IP_VERSION(9, 4, 0):
2165 	case IP_VERSION(9, 2, 2):
2166 	case IP_VERSION(9, 1, 0):
2167 	case IP_VERSION(9, 4, 1):
2168 	case IP_VERSION(9, 3, 0):
2169 	case IP_VERSION(9, 4, 2):
2170 		adev->gfx.mec.num_mec = 2;
2171 		break;
2172 	default:
2173 		adev->gfx.mec.num_mec = 1;
2174 		break;
2175 	}
2176 
2177 	adev->gfx.mec.num_pipe_per_mec = 4;
2178 	adev->gfx.mec.num_queue_per_pipe = 8;
2179 
2180 	/* EOP Event */
2181 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2182 	if (r)
2183 		return r;
2184 
2185 	/* Privileged reg */
2186 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2187 			      &adev->gfx.priv_reg_irq);
2188 	if (r)
2189 		return r;
2190 
2191 	/* Privileged inst */
2192 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2193 			      &adev->gfx.priv_inst_irq);
2194 	if (r)
2195 		return r;
2196 
2197 	/* ECC error */
2198 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2199 			      &adev->gfx.cp_ecc_error_irq);
2200 	if (r)
2201 		return r;
2202 
2203 	/* FUE error */
2204 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2205 			      &adev->gfx.cp_ecc_error_irq);
2206 	if (r)
2207 		return r;
2208 
2209 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2210 
2211 	if (adev->gfx.rlc.funcs) {
2212 		if (adev->gfx.rlc.funcs->init) {
2213 			r = adev->gfx.rlc.funcs->init(adev);
2214 			if (r) {
2215 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2216 				return r;
2217 			}
2218 		}
2219 	}
2220 
2221 	r = gfx_v9_0_mec_init(adev);
2222 	if (r) {
2223 		DRM_ERROR("Failed to init MEC BOs!\n");
2224 		return r;
2225 	}
2226 
2227 	/* set up the gfx ring */
2228 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2229 		ring = &adev->gfx.gfx_ring[i];
2230 		ring->ring_obj = NULL;
2231 		if (!i)
2232 			sprintf(ring->name, "gfx");
2233 		else
2234 			sprintf(ring->name, "gfx_%d", i);
2235 		ring->use_doorbell = true;
2236 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2237 
2238 		/* disable scheduler on the real ring */
2239 		ring->no_scheduler = adev->gfx.mcbp;
2240 		ring->vm_hub = AMDGPU_GFXHUB(0);
2241 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2242 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2243 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2244 		if (r)
2245 			return r;
2246 	}
2247 
2248 	/* set up the software rings */
2249 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2250 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2251 			ring = &adev->gfx.sw_gfx_ring[i];
2252 			ring->ring_obj = NULL;
2253 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2254 			ring->use_doorbell = true;
2255 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2256 			ring->is_sw_ring = true;
2257 			hw_prio = amdgpu_sw_ring_priority(i);
2258 			ring->vm_hub = AMDGPU_GFXHUB(0);
2259 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2260 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2261 					     NULL);
2262 			if (r)
2263 				return r;
2264 			ring->wptr = 0;
2265 		}
2266 
2267 		/* init the muxer and add software rings */
2268 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2269 					 GFX9_NUM_SW_GFX_RINGS);
2270 		if (r) {
2271 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2272 			return r;
2273 		}
2274 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2275 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2276 							&adev->gfx.sw_gfx_ring[i]);
2277 			if (r) {
2278 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2279 				return r;
2280 			}
2281 		}
2282 	}
2283 
2284 	/* set up the compute queues - allocate horizontally across pipes */
2285 	ring_id = 0;
2286 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2287 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2288 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2289 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2290 								     k, j))
2291 					continue;
2292 
2293 				r = gfx_v9_0_compute_ring_init(adev,
2294 							       ring_id,
2295 							       i, k, j);
2296 				if (r)
2297 					return r;
2298 
2299 				ring_id++;
2300 			}
2301 		}
2302 	}
2303 
2304 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2305 	if (r) {
2306 		DRM_ERROR("Failed to init KIQ BOs!\n");
2307 		return r;
2308 	}
2309 
2310 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2311 	if (r)
2312 		return r;
2313 
2314 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2315 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2316 	if (r)
2317 		return r;
2318 
2319 	adev->gfx.ce_ram_size = 0x8000;
2320 
2321 	r = gfx_v9_0_gpu_early_init(adev);
2322 	if (r)
2323 		return r;
2324 
2325 	if (amdgpu_gfx_ras_sw_init(adev)) {
2326 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2327 		return -EINVAL;
2328 	}
2329 
2330 	gfx_v9_0_alloc_ip_dump(adev);
2331 
2332 	return 0;
2333 }
2334 
2335 
2336 static int gfx_v9_0_sw_fini(void *handle)
2337 {
2338 	int i;
2339 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2340 
2341 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2342 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2343 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2344 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2345 	}
2346 
2347 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2348 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2349 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2350 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2351 
2352 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2353 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2354 	amdgpu_gfx_kiq_fini(adev, 0);
2355 
2356 	gfx_v9_0_mec_fini(adev);
2357 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2358 				&adev->gfx.rlc.clear_state_gpu_addr,
2359 				(void **)&adev->gfx.rlc.cs_ptr);
2360 	if (adev->flags & AMD_IS_APU) {
2361 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2362 				&adev->gfx.rlc.cp_table_gpu_addr,
2363 				(void **)&adev->gfx.rlc.cp_table_ptr);
2364 	}
2365 	gfx_v9_0_free_microcode(adev);
2366 
2367 	kfree(adev->gfx.ip_dump_core);
2368 	kfree(adev->gfx.ip_dump_compute_queues);
2369 
2370 	return 0;
2371 }
2372 
2373 
2374 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2375 {
2376 	/* TODO */
2377 }
2378 
2379 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2380 			   u32 instance, int xcc_id)
2381 {
2382 	u32 data;
2383 
2384 	if (instance == 0xffffffff)
2385 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2386 	else
2387 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2388 
2389 	if (se_num == 0xffffffff)
2390 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2391 	else
2392 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2393 
2394 	if (sh_num == 0xffffffff)
2395 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2396 	else
2397 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2398 
2399 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2400 }
2401 
2402 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2403 {
2404 	u32 data, mask;
2405 
2406 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2407 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2408 
2409 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2410 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2411 
2412 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2413 					 adev->gfx.config.max_sh_per_se);
2414 
2415 	return (~data) & mask;
2416 }
2417 
2418 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2419 {
2420 	int i, j;
2421 	u32 data;
2422 	u32 active_rbs = 0;
2423 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2424 					adev->gfx.config.max_sh_per_se;
2425 
2426 	mutex_lock(&adev->grbm_idx_mutex);
2427 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2428 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2429 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2430 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2431 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2432 					       rb_bitmap_width_per_sh);
2433 		}
2434 	}
2435 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2436 	mutex_unlock(&adev->grbm_idx_mutex);
2437 
2438 	adev->gfx.config.backend_enable_mask = active_rbs;
2439 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2440 }
2441 
2442 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2443 				uint32_t first_vmid,
2444 				uint32_t last_vmid)
2445 {
2446 	uint32_t data;
2447 	uint32_t trap_config_vmid_mask = 0;
2448 	int i;
2449 
2450 	/* Calculate trap config vmid mask */
2451 	for (i = first_vmid; i < last_vmid; i++)
2452 		trap_config_vmid_mask |= (1 << i);
2453 
2454 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2455 			VMID_SEL, trap_config_vmid_mask);
2456 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2457 			TRAP_EN, 1);
2458 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2459 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2460 
2461 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2462 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2463 }
2464 
2465 #define DEFAULT_SH_MEM_BASES	(0x6000)
2466 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2467 {
2468 	int i;
2469 	uint32_t sh_mem_config;
2470 	uint32_t sh_mem_bases;
2471 
2472 	/*
2473 	 * Configure apertures:
2474 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2475 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2476 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2477 	 */
2478 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2479 
2480 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2481 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2482 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2483 
2484 	mutex_lock(&adev->srbm_mutex);
2485 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2486 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2487 		/* CP and shaders */
2488 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2489 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2490 	}
2491 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2492 	mutex_unlock(&adev->srbm_mutex);
2493 
2494 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2495 	   access. These should be enabled by FW for target VMIDs. */
2496 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2497 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2498 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2499 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2500 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2501 	}
2502 }
2503 
2504 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2505 {
2506 	int vmid;
2507 
2508 	/*
2509 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2510 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2511 	 * the driver can enable them for graphics. VMID0 should maintain
2512 	 * access so that HWS firmware can save/restore entries.
2513 	 */
2514 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2515 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2516 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2517 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2518 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2519 	}
2520 }
2521 
2522 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2523 {
2524 	uint32_t tmp;
2525 
2526 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2527 	case IP_VERSION(9, 4, 1):
2528 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2529 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2530 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2531 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2532 		break;
2533 	default:
2534 		break;
2535 	}
2536 }
2537 
2538 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2539 {
2540 	u32 tmp;
2541 	int i;
2542 
2543 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2544 
2545 	gfx_v9_0_tiling_mode_table_init(adev);
2546 
2547 	if (adev->gfx.num_gfx_rings)
2548 		gfx_v9_0_setup_rb(adev);
2549 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2550 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2551 
2552 	/* XXX SH_MEM regs */
2553 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2554 	mutex_lock(&adev->srbm_mutex);
2555 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2556 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2557 		/* CP and shaders */
2558 		if (i == 0) {
2559 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2560 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2561 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2562 					    !!adev->gmc.noretry);
2563 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2564 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2565 		} else {
2566 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2567 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2568 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2569 					    !!adev->gmc.noretry);
2570 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2571 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2572 				(adev->gmc.private_aperture_start >> 48));
2573 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2574 				(adev->gmc.shared_aperture_start >> 48));
2575 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2576 		}
2577 	}
2578 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2579 
2580 	mutex_unlock(&adev->srbm_mutex);
2581 
2582 	gfx_v9_0_init_compute_vmid(adev);
2583 	gfx_v9_0_init_gds_vmid(adev);
2584 	gfx_v9_0_init_sq_config(adev);
2585 }
2586 
2587 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2588 {
2589 	u32 i, j, k;
2590 	u32 mask;
2591 
2592 	mutex_lock(&adev->grbm_idx_mutex);
2593 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2594 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2595 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2596 			for (k = 0; k < adev->usec_timeout; k++) {
2597 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2598 					break;
2599 				udelay(1);
2600 			}
2601 			if (k == adev->usec_timeout) {
2602 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2603 						      0xffffffff, 0xffffffff, 0);
2604 				mutex_unlock(&adev->grbm_idx_mutex);
2605 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2606 					 i, j);
2607 				return;
2608 			}
2609 		}
2610 	}
2611 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2612 	mutex_unlock(&adev->grbm_idx_mutex);
2613 
2614 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2615 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2616 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2617 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2618 	for (k = 0; k < adev->usec_timeout; k++) {
2619 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2620 			break;
2621 		udelay(1);
2622 	}
2623 }
2624 
2625 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2626 					       bool enable)
2627 {
2628 	u32 tmp;
2629 
2630 	/* These interrupts should be enabled to drive DS clock */
2631 
2632 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2633 
2634 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2635 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2636 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2637 	if(adev->gfx.num_gfx_rings)
2638 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2639 
2640 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2641 }
2642 
2643 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2644 {
2645 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2646 	/* csib */
2647 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2648 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2649 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2650 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2651 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2652 			adev->gfx.rlc.clear_state_size);
2653 }
2654 
2655 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2656 				int indirect_offset,
2657 				int list_size,
2658 				int *unique_indirect_regs,
2659 				int unique_indirect_reg_count,
2660 				int *indirect_start_offsets,
2661 				int *indirect_start_offsets_count,
2662 				int max_start_offsets_count)
2663 {
2664 	int idx;
2665 
2666 	for (; indirect_offset < list_size; indirect_offset++) {
2667 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2668 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2669 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2670 
2671 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2672 			indirect_offset += 2;
2673 
2674 			/* look for the matching indice */
2675 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2676 				if (unique_indirect_regs[idx] ==
2677 					register_list_format[indirect_offset] ||
2678 					!unique_indirect_regs[idx])
2679 					break;
2680 			}
2681 
2682 			BUG_ON(idx >= unique_indirect_reg_count);
2683 
2684 			if (!unique_indirect_regs[idx])
2685 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2686 
2687 			indirect_offset++;
2688 		}
2689 	}
2690 }
2691 
2692 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2693 {
2694 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2695 	int unique_indirect_reg_count = 0;
2696 
2697 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2698 	int indirect_start_offsets_count = 0;
2699 
2700 	int list_size = 0;
2701 	int i = 0, j = 0;
2702 	u32 tmp = 0;
2703 
2704 	u32 *register_list_format =
2705 		kmemdup(adev->gfx.rlc.register_list_format,
2706 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2707 	if (!register_list_format)
2708 		return -ENOMEM;
2709 
2710 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2711 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2712 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2713 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2714 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2715 				    unique_indirect_regs,
2716 				    unique_indirect_reg_count,
2717 				    indirect_start_offsets,
2718 				    &indirect_start_offsets_count,
2719 				    ARRAY_SIZE(indirect_start_offsets));
2720 
2721 	/* enable auto inc in case it is disabled */
2722 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2723 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2724 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2725 
2726 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2727 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2728 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2729 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2730 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2731 			adev->gfx.rlc.register_restore[i]);
2732 
2733 	/* load indirect register */
2734 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2735 		adev->gfx.rlc.reg_list_format_start);
2736 
2737 	/* direct register portion */
2738 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2739 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2740 			register_list_format[i]);
2741 
2742 	/* indirect register portion */
2743 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2744 		if (register_list_format[i] == 0xFFFFFFFF) {
2745 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2746 			continue;
2747 		}
2748 
2749 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2750 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2751 
2752 		for (j = 0; j < unique_indirect_reg_count; j++) {
2753 			if (register_list_format[i] == unique_indirect_regs[j]) {
2754 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2755 				break;
2756 			}
2757 		}
2758 
2759 		BUG_ON(j >= unique_indirect_reg_count);
2760 
2761 		i++;
2762 	}
2763 
2764 	/* set save/restore list size */
2765 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2766 	list_size = list_size >> 1;
2767 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2768 		adev->gfx.rlc.reg_restore_list_size);
2769 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2770 
2771 	/* write the starting offsets to RLC scratch ram */
2772 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2773 		adev->gfx.rlc.starting_offsets_start);
2774 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2775 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2776 		       indirect_start_offsets[i]);
2777 
2778 	/* load unique indirect regs*/
2779 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2780 		if (unique_indirect_regs[i] != 0) {
2781 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2782 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2783 			       unique_indirect_regs[i] & 0x3FFFF);
2784 
2785 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2786 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2787 			       unique_indirect_regs[i] >> 20);
2788 		}
2789 	}
2790 
2791 	kfree(register_list_format);
2792 	return 0;
2793 }
2794 
2795 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2796 {
2797 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2798 }
2799 
2800 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2801 					     bool enable)
2802 {
2803 	uint32_t data = 0;
2804 	uint32_t default_data = 0;
2805 
2806 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2807 	if (enable) {
2808 		/* enable GFXIP control over CGPG */
2809 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2810 		if(default_data != data)
2811 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2812 
2813 		/* update status */
2814 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2815 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2816 		if(default_data != data)
2817 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2818 	} else {
2819 		/* restore GFXIP control over GCPG */
2820 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2821 		if(default_data != data)
2822 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2823 	}
2824 }
2825 
2826 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2827 {
2828 	uint32_t data = 0;
2829 
2830 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2831 			      AMD_PG_SUPPORT_GFX_SMG |
2832 			      AMD_PG_SUPPORT_GFX_DMG)) {
2833 		/* init IDLE_POLL_COUNT = 60 */
2834 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2835 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2836 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2837 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2838 
2839 		/* init RLC PG Delay */
2840 		data = 0;
2841 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2842 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2843 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2844 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2845 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2846 
2847 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2848 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2849 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2850 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2851 
2852 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2853 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2854 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2855 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2856 
2857 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2858 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2859 
2860 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2861 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2862 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2863 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2864 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2865 	}
2866 }
2867 
2868 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2869 						bool enable)
2870 {
2871 	uint32_t data = 0;
2872 	uint32_t default_data = 0;
2873 
2874 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2875 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2876 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2877 			     enable ? 1 : 0);
2878 	if (default_data != data)
2879 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2880 }
2881 
2882 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2883 						bool enable)
2884 {
2885 	uint32_t data = 0;
2886 	uint32_t default_data = 0;
2887 
2888 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2889 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2890 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2891 			     enable ? 1 : 0);
2892 	if(default_data != data)
2893 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2894 }
2895 
2896 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2897 					bool enable)
2898 {
2899 	uint32_t data = 0;
2900 	uint32_t default_data = 0;
2901 
2902 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2903 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2904 			     CP_PG_DISABLE,
2905 			     enable ? 0 : 1);
2906 	if(default_data != data)
2907 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2908 }
2909 
2910 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2911 						bool enable)
2912 {
2913 	uint32_t data, default_data;
2914 
2915 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2916 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2917 			     GFX_POWER_GATING_ENABLE,
2918 			     enable ? 1 : 0);
2919 	if(default_data != data)
2920 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2921 }
2922 
2923 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2924 						bool enable)
2925 {
2926 	uint32_t data, default_data;
2927 
2928 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2929 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2930 			     GFX_PIPELINE_PG_ENABLE,
2931 			     enable ? 1 : 0);
2932 	if(default_data != data)
2933 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2934 
2935 	if (!enable)
2936 		/* read any GFX register to wake up GFX */
2937 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2938 }
2939 
2940 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2941 						       bool enable)
2942 {
2943 	uint32_t data, default_data;
2944 
2945 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2946 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2947 			     STATIC_PER_CU_PG_ENABLE,
2948 			     enable ? 1 : 0);
2949 	if(default_data != data)
2950 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2951 }
2952 
2953 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2954 						bool enable)
2955 {
2956 	uint32_t data, default_data;
2957 
2958 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2959 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2960 			     DYN_PER_CU_PG_ENABLE,
2961 			     enable ? 1 : 0);
2962 	if(default_data != data)
2963 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2964 }
2965 
2966 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2967 {
2968 	gfx_v9_0_init_csb(adev);
2969 
2970 	/*
2971 	 * Rlc save restore list is workable since v2_1.
2972 	 * And it's needed by gfxoff feature.
2973 	 */
2974 	if (adev->gfx.rlc.is_rlc_v2_1) {
2975 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2976 			    IP_VERSION(9, 2, 1) ||
2977 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2978 			gfx_v9_1_init_rlc_save_restore_list(adev);
2979 		gfx_v9_0_enable_save_restore_machine(adev);
2980 	}
2981 
2982 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2983 			      AMD_PG_SUPPORT_GFX_SMG |
2984 			      AMD_PG_SUPPORT_GFX_DMG |
2985 			      AMD_PG_SUPPORT_CP |
2986 			      AMD_PG_SUPPORT_GDS |
2987 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2988 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2989 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2990 		gfx_v9_0_init_gfx_power_gating(adev);
2991 	}
2992 }
2993 
2994 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2995 {
2996 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2997 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2998 	gfx_v9_0_wait_for_rlc_serdes(adev);
2999 }
3000 
3001 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3002 {
3003 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3004 	udelay(50);
3005 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3006 	udelay(50);
3007 }
3008 
3009 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3010 {
3011 #ifdef AMDGPU_RLC_DEBUG_RETRY
3012 	u32 rlc_ucode_ver;
3013 #endif
3014 
3015 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3016 	udelay(50);
3017 
3018 	/* carrizo do enable cp interrupt after cp inited */
3019 	if (!(adev->flags & AMD_IS_APU)) {
3020 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3021 		udelay(50);
3022 	}
3023 
3024 #ifdef AMDGPU_RLC_DEBUG_RETRY
3025 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3026 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3027 	if(rlc_ucode_ver == 0x108) {
3028 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3029 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3030 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3031 		 * default is 0x9C4 to create a 100us interval */
3032 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3033 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3034 		 * to disable the page fault retry interrupts, default is
3035 		 * 0x100 (256) */
3036 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3037 	}
3038 #endif
3039 }
3040 
3041 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3042 {
3043 	const struct rlc_firmware_header_v2_0 *hdr;
3044 	const __le32 *fw_data;
3045 	unsigned i, fw_size;
3046 
3047 	if (!adev->gfx.rlc_fw)
3048 		return -EINVAL;
3049 
3050 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3051 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3052 
3053 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3054 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3055 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3056 
3057 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3058 			RLCG_UCODE_LOADING_START_ADDRESS);
3059 	for (i = 0; i < fw_size; i++)
3060 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3061 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3062 
3063 	return 0;
3064 }
3065 
3066 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3067 {
3068 	int r;
3069 
3070 	if (amdgpu_sriov_vf(adev)) {
3071 		gfx_v9_0_init_csb(adev);
3072 		return 0;
3073 	}
3074 
3075 	adev->gfx.rlc.funcs->stop(adev);
3076 
3077 	/* disable CG */
3078 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3079 
3080 	gfx_v9_0_init_pg(adev);
3081 
3082 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3083 		/* legacy rlc firmware loading */
3084 		r = gfx_v9_0_rlc_load_microcode(adev);
3085 		if (r)
3086 			return r;
3087 	}
3088 
3089 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3090 	case IP_VERSION(9, 2, 2):
3091 	case IP_VERSION(9, 1, 0):
3092 		gfx_v9_0_init_lbpw(adev);
3093 		if (amdgpu_lbpw == 0)
3094 			gfx_v9_0_enable_lbpw(adev, false);
3095 		else
3096 			gfx_v9_0_enable_lbpw(adev, true);
3097 		break;
3098 	case IP_VERSION(9, 4, 0):
3099 		gfx_v9_4_init_lbpw(adev);
3100 		if (amdgpu_lbpw > 0)
3101 			gfx_v9_0_enable_lbpw(adev, true);
3102 		else
3103 			gfx_v9_0_enable_lbpw(adev, false);
3104 		break;
3105 	default:
3106 		break;
3107 	}
3108 
3109 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3110 
3111 	adev->gfx.rlc.funcs->start(adev);
3112 
3113 	return 0;
3114 }
3115 
3116 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3117 {
3118 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3119 
3120 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3121 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3122 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3123 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3124 	udelay(50);
3125 }
3126 
3127 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3128 {
3129 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3130 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3131 	const struct gfx_firmware_header_v1_0 *me_hdr;
3132 	const __le32 *fw_data;
3133 	unsigned i, fw_size;
3134 
3135 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3136 		return -EINVAL;
3137 
3138 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3139 		adev->gfx.pfp_fw->data;
3140 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3141 		adev->gfx.ce_fw->data;
3142 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3143 		adev->gfx.me_fw->data;
3144 
3145 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3146 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3147 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3148 
3149 	gfx_v9_0_cp_gfx_enable(adev, false);
3150 
3151 	/* PFP */
3152 	fw_data = (const __le32 *)
3153 		(adev->gfx.pfp_fw->data +
3154 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3155 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3156 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3157 	for (i = 0; i < fw_size; i++)
3158 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3159 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3160 
3161 	/* CE */
3162 	fw_data = (const __le32 *)
3163 		(adev->gfx.ce_fw->data +
3164 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3165 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3166 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3167 	for (i = 0; i < fw_size; i++)
3168 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3169 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3170 
3171 	/* ME */
3172 	fw_data = (const __le32 *)
3173 		(adev->gfx.me_fw->data +
3174 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3175 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3176 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3177 	for (i = 0; i < fw_size; i++)
3178 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3179 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3180 
3181 	return 0;
3182 }
3183 
3184 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3185 {
3186 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3187 	const struct cs_section_def *sect = NULL;
3188 	const struct cs_extent_def *ext = NULL;
3189 	int r, i, tmp;
3190 
3191 	/* init the CP */
3192 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3193 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3194 
3195 	gfx_v9_0_cp_gfx_enable(adev, true);
3196 
3197 	/* Now only limit the quirk on the APU gfx9 series and already
3198 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3199 	 */
3200 	if (adev->flags & AMD_IS_APU &&
3201 			adev->in_s3 && !adev->suspend_complete) {
3202 		DRM_INFO(" Will skip the CSB packet resubmit\n");
3203 		return 0;
3204 	}
3205 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3206 	if (r) {
3207 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3208 		return r;
3209 	}
3210 
3211 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3212 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3213 
3214 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3215 	amdgpu_ring_write(ring, 0x80000000);
3216 	amdgpu_ring_write(ring, 0x80000000);
3217 
3218 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3219 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3220 			if (sect->id == SECT_CONTEXT) {
3221 				amdgpu_ring_write(ring,
3222 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3223 					       ext->reg_count));
3224 				amdgpu_ring_write(ring,
3225 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3226 				for (i = 0; i < ext->reg_count; i++)
3227 					amdgpu_ring_write(ring, ext->extent[i]);
3228 			}
3229 		}
3230 	}
3231 
3232 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3233 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3234 
3235 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3236 	amdgpu_ring_write(ring, 0);
3237 
3238 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3239 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3240 	amdgpu_ring_write(ring, 0x8000);
3241 	amdgpu_ring_write(ring, 0x8000);
3242 
3243 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3244 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3245 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3246 	amdgpu_ring_write(ring, tmp);
3247 	amdgpu_ring_write(ring, 0);
3248 
3249 	amdgpu_ring_commit(ring);
3250 
3251 	return 0;
3252 }
3253 
3254 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3255 {
3256 	struct amdgpu_ring *ring;
3257 	u32 tmp;
3258 	u32 rb_bufsz;
3259 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3260 
3261 	/* Set the write pointer delay */
3262 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3263 
3264 	/* set the RB to use vmid 0 */
3265 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3266 
3267 	/* Set ring buffer size */
3268 	ring = &adev->gfx.gfx_ring[0];
3269 	rb_bufsz = order_base_2(ring->ring_size / 8);
3270 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3271 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3272 #ifdef __BIG_ENDIAN
3273 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3274 #endif
3275 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3276 
3277 	/* Initialize the ring buffer's write pointers */
3278 	ring->wptr = 0;
3279 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3280 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3281 
3282 	/* set the wb address wether it's enabled or not */
3283 	rptr_addr = ring->rptr_gpu_addr;
3284 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3285 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3286 
3287 	wptr_gpu_addr = ring->wptr_gpu_addr;
3288 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3289 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3290 
3291 	mdelay(1);
3292 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3293 
3294 	rb_addr = ring->gpu_addr >> 8;
3295 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3296 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3297 
3298 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3299 	if (ring->use_doorbell) {
3300 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3301 				    DOORBELL_OFFSET, ring->doorbell_index);
3302 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3303 				    DOORBELL_EN, 1);
3304 	} else {
3305 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3306 	}
3307 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3308 
3309 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3310 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3311 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3312 
3313 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3314 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3315 
3316 
3317 	/* start the ring */
3318 	gfx_v9_0_cp_gfx_start(adev);
3319 
3320 	return 0;
3321 }
3322 
3323 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3324 {
3325 	if (enable) {
3326 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3327 	} else {
3328 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3329 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3330 		adev->gfx.kiq[0].ring.sched.ready = false;
3331 	}
3332 	udelay(50);
3333 }
3334 
3335 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3336 {
3337 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3338 	const __le32 *fw_data;
3339 	unsigned i;
3340 	u32 tmp;
3341 
3342 	if (!adev->gfx.mec_fw)
3343 		return -EINVAL;
3344 
3345 	gfx_v9_0_cp_compute_enable(adev, false);
3346 
3347 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3348 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3349 
3350 	fw_data = (const __le32 *)
3351 		(adev->gfx.mec_fw->data +
3352 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3353 	tmp = 0;
3354 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3355 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3356 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3357 
3358 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3359 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3360 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3361 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3362 
3363 	/* MEC1 */
3364 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3365 			 mec_hdr->jt_offset);
3366 	for (i = 0; i < mec_hdr->jt_size; i++)
3367 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3368 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3369 
3370 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3371 			adev->gfx.mec_fw_version);
3372 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3373 
3374 	return 0;
3375 }
3376 
3377 /* KIQ functions */
3378 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3379 {
3380 	uint32_t tmp;
3381 	struct amdgpu_device *adev = ring->adev;
3382 
3383 	/* tell RLC which is KIQ queue */
3384 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3385 	tmp &= 0xffffff00;
3386 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3387 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3388 	tmp |= 0x80;
3389 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3390 }
3391 
3392 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3393 {
3394 	struct amdgpu_device *adev = ring->adev;
3395 
3396 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3397 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3398 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3399 			mqd->cp_hqd_queue_priority =
3400 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3401 		}
3402 	}
3403 }
3404 
3405 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3406 {
3407 	struct amdgpu_device *adev = ring->adev;
3408 	struct v9_mqd *mqd = ring->mqd_ptr;
3409 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3410 	uint32_t tmp;
3411 
3412 	mqd->header = 0xC0310800;
3413 	mqd->compute_pipelinestat_enable = 0x00000001;
3414 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3415 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3416 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3417 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3418 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3419 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3420 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3421 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3422 	mqd->compute_misc_reserved = 0x00000003;
3423 
3424 	mqd->dynamic_cu_mask_addr_lo =
3425 		lower_32_bits(ring->mqd_gpu_addr
3426 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3427 	mqd->dynamic_cu_mask_addr_hi =
3428 		upper_32_bits(ring->mqd_gpu_addr
3429 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3430 
3431 	eop_base_addr = ring->eop_gpu_addr >> 8;
3432 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3433 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3434 
3435 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3436 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3437 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3438 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3439 
3440 	mqd->cp_hqd_eop_control = tmp;
3441 
3442 	/* enable doorbell? */
3443 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3444 
3445 	if (ring->use_doorbell) {
3446 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3447 				    DOORBELL_OFFSET, ring->doorbell_index);
3448 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3449 				    DOORBELL_EN, 1);
3450 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451 				    DOORBELL_SOURCE, 0);
3452 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453 				    DOORBELL_HIT, 0);
3454 	} else {
3455 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456 					 DOORBELL_EN, 0);
3457 	}
3458 
3459 	mqd->cp_hqd_pq_doorbell_control = tmp;
3460 
3461 	/* disable the queue if it's active */
3462 	ring->wptr = 0;
3463 	mqd->cp_hqd_dequeue_request = 0;
3464 	mqd->cp_hqd_pq_rptr = 0;
3465 	mqd->cp_hqd_pq_wptr_lo = 0;
3466 	mqd->cp_hqd_pq_wptr_hi = 0;
3467 
3468 	/* set the pointer to the MQD */
3469 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3470 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3471 
3472 	/* set MQD vmid to 0 */
3473 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3474 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3475 	mqd->cp_mqd_control = tmp;
3476 
3477 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3478 	hqd_gpu_addr = ring->gpu_addr >> 8;
3479 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3480 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3481 
3482 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3483 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3484 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3485 			    (order_base_2(ring->ring_size / 4) - 1));
3486 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3487 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3488 #ifdef __BIG_ENDIAN
3489 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3490 #endif
3491 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3492 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3493 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3494 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3495 	mqd->cp_hqd_pq_control = tmp;
3496 
3497 	/* set the wb address whether it's enabled or not */
3498 	wb_gpu_addr = ring->rptr_gpu_addr;
3499 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3500 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3501 		upper_32_bits(wb_gpu_addr) & 0xffff;
3502 
3503 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3504 	wb_gpu_addr = ring->wptr_gpu_addr;
3505 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3506 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3507 
3508 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3509 	ring->wptr = 0;
3510 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3511 
3512 	/* set the vmid for the queue */
3513 	mqd->cp_hqd_vmid = 0;
3514 
3515 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3516 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3517 	mqd->cp_hqd_persistent_state = tmp;
3518 
3519 	/* set MIN_IB_AVAIL_SIZE */
3520 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3521 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3522 	mqd->cp_hqd_ib_control = tmp;
3523 
3524 	/* set static priority for a queue/ring */
3525 	gfx_v9_0_mqd_set_priority(ring, mqd);
3526 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3527 
3528 	/* map_queues packet doesn't need activate the queue,
3529 	 * so only kiq need set this field.
3530 	 */
3531 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3532 		mqd->cp_hqd_active = 1;
3533 
3534 	return 0;
3535 }
3536 
3537 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3538 {
3539 	struct amdgpu_device *adev = ring->adev;
3540 	struct v9_mqd *mqd = ring->mqd_ptr;
3541 	int j;
3542 
3543 	/* disable wptr polling */
3544 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3545 
3546 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3547 	       mqd->cp_hqd_eop_base_addr_lo);
3548 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3549 	       mqd->cp_hqd_eop_base_addr_hi);
3550 
3551 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3552 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3553 	       mqd->cp_hqd_eop_control);
3554 
3555 	/* enable doorbell? */
3556 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3557 	       mqd->cp_hqd_pq_doorbell_control);
3558 
3559 	/* disable the queue if it's active */
3560 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3561 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3562 		for (j = 0; j < adev->usec_timeout; j++) {
3563 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3564 				break;
3565 			udelay(1);
3566 		}
3567 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3568 		       mqd->cp_hqd_dequeue_request);
3569 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3570 		       mqd->cp_hqd_pq_rptr);
3571 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572 		       mqd->cp_hqd_pq_wptr_lo);
3573 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574 		       mqd->cp_hqd_pq_wptr_hi);
3575 	}
3576 
3577 	/* set the pointer to the MQD */
3578 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3579 	       mqd->cp_mqd_base_addr_lo);
3580 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3581 	       mqd->cp_mqd_base_addr_hi);
3582 
3583 	/* set MQD vmid to 0 */
3584 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3585 	       mqd->cp_mqd_control);
3586 
3587 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3588 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3589 	       mqd->cp_hqd_pq_base_lo);
3590 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3591 	       mqd->cp_hqd_pq_base_hi);
3592 
3593 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3594 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3595 	       mqd->cp_hqd_pq_control);
3596 
3597 	/* set the wb address whether it's enabled or not */
3598 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3599 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3600 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3601 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3602 
3603 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3604 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3605 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3606 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3607 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3608 
3609 	/* enable the doorbell if requested */
3610 	if (ring->use_doorbell) {
3611 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3612 					(adev->doorbell_index.kiq * 2) << 2);
3613 		/* If GC has entered CGPG, ringing doorbell > first page
3614 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3615 		 * workaround this issue. And this change has to align with firmware
3616 		 * update.
3617 		 */
3618 		if (check_if_enlarge_doorbell_range(adev))
3619 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3620 					(adev->doorbell.size - 4));
3621 		else
3622 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3623 					(adev->doorbell_index.userqueue_end * 2) << 2);
3624 	}
3625 
3626 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3627 	       mqd->cp_hqd_pq_doorbell_control);
3628 
3629 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3630 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3631 	       mqd->cp_hqd_pq_wptr_lo);
3632 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3633 	       mqd->cp_hqd_pq_wptr_hi);
3634 
3635 	/* set the vmid for the queue */
3636 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3637 
3638 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3639 	       mqd->cp_hqd_persistent_state);
3640 
3641 	/* activate the queue */
3642 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3643 	       mqd->cp_hqd_active);
3644 
3645 	if (ring->use_doorbell)
3646 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3647 
3648 	return 0;
3649 }
3650 
3651 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3652 {
3653 	struct amdgpu_device *adev = ring->adev;
3654 	int j;
3655 
3656 	/* disable the queue if it's active */
3657 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3658 
3659 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3660 
3661 		for (j = 0; j < adev->usec_timeout; j++) {
3662 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3663 				break;
3664 			udelay(1);
3665 		}
3666 
3667 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3668 			DRM_DEBUG("KIQ dequeue request failed.\n");
3669 
3670 			/* Manual disable if dequeue request times out */
3671 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3672 		}
3673 
3674 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3675 		      0);
3676 	}
3677 
3678 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3679 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3680 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3681 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3682 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3683 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3684 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3685 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3686 
3687 	return 0;
3688 }
3689 
3690 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3691 {
3692 	struct amdgpu_device *adev = ring->adev;
3693 	struct v9_mqd *mqd = ring->mqd_ptr;
3694 	struct v9_mqd *tmp_mqd;
3695 
3696 	gfx_v9_0_kiq_setting(ring);
3697 
3698 	/* GPU could be in bad state during probe, driver trigger the reset
3699 	 * after load the SMU, in this case , the mqd is not be initialized.
3700 	 * driver need to re-init the mqd.
3701 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3702 	 */
3703 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3704 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3705 		/* for GPU_RESET case , reset MQD to a clean status */
3706 		if (adev->gfx.kiq[0].mqd_backup)
3707 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3708 
3709 		/* reset ring buffer */
3710 		ring->wptr = 0;
3711 		amdgpu_ring_clear_ring(ring);
3712 
3713 		mutex_lock(&adev->srbm_mutex);
3714 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3715 		gfx_v9_0_kiq_init_register(ring);
3716 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3717 		mutex_unlock(&adev->srbm_mutex);
3718 	} else {
3719 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3720 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3721 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3722 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3723 			amdgpu_ring_clear_ring(ring);
3724 		mutex_lock(&adev->srbm_mutex);
3725 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3726 		gfx_v9_0_mqd_init(ring);
3727 		gfx_v9_0_kiq_init_register(ring);
3728 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3729 		mutex_unlock(&adev->srbm_mutex);
3730 
3731 		if (adev->gfx.kiq[0].mqd_backup)
3732 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3733 	}
3734 
3735 	return 0;
3736 }
3737 
3738 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3739 {
3740 	struct amdgpu_device *adev = ring->adev;
3741 	struct v9_mqd *mqd = ring->mqd_ptr;
3742 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3743 	struct v9_mqd *tmp_mqd;
3744 
3745 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3746 	 * is not be initialized before
3747 	 */
3748 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3749 
3750 	if (!tmp_mqd->cp_hqd_pq_control ||
3751 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3752 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3753 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3754 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3755 		mutex_lock(&adev->srbm_mutex);
3756 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3757 		gfx_v9_0_mqd_init(ring);
3758 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3759 		mutex_unlock(&adev->srbm_mutex);
3760 
3761 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3762 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3763 	} else {
3764 		/* restore MQD to a clean status */
3765 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3766 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3767 		/* reset ring buffer */
3768 		ring->wptr = 0;
3769 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3770 		amdgpu_ring_clear_ring(ring);
3771 	}
3772 
3773 	return 0;
3774 }
3775 
3776 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3777 {
3778 	struct amdgpu_ring *ring;
3779 	int r;
3780 
3781 	ring = &adev->gfx.kiq[0].ring;
3782 
3783 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3784 	if (unlikely(r != 0))
3785 		return r;
3786 
3787 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3788 	if (unlikely(r != 0)) {
3789 		amdgpu_bo_unreserve(ring->mqd_obj);
3790 		return r;
3791 	}
3792 
3793 	gfx_v9_0_kiq_init_queue(ring);
3794 	amdgpu_bo_kunmap(ring->mqd_obj);
3795 	ring->mqd_ptr = NULL;
3796 	amdgpu_bo_unreserve(ring->mqd_obj);
3797 	return 0;
3798 }
3799 
3800 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3801 {
3802 	struct amdgpu_ring *ring = NULL;
3803 	int r = 0, i;
3804 
3805 	gfx_v9_0_cp_compute_enable(adev, true);
3806 
3807 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3808 		ring = &adev->gfx.compute_ring[i];
3809 
3810 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3811 		if (unlikely(r != 0))
3812 			goto done;
3813 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3814 		if (!r) {
3815 			r = gfx_v9_0_kcq_init_queue(ring);
3816 			amdgpu_bo_kunmap(ring->mqd_obj);
3817 			ring->mqd_ptr = NULL;
3818 		}
3819 		amdgpu_bo_unreserve(ring->mqd_obj);
3820 		if (r)
3821 			goto done;
3822 	}
3823 
3824 	r = amdgpu_gfx_enable_kcq(adev, 0);
3825 done:
3826 	return r;
3827 }
3828 
3829 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3830 {
3831 	int r, i;
3832 	struct amdgpu_ring *ring;
3833 
3834 	if (!(adev->flags & AMD_IS_APU))
3835 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3836 
3837 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3838 		if (adev->gfx.num_gfx_rings) {
3839 			/* legacy firmware loading */
3840 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3841 			if (r)
3842 				return r;
3843 		}
3844 
3845 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3846 		if (r)
3847 			return r;
3848 	}
3849 
3850 	r = gfx_v9_0_kiq_resume(adev);
3851 	if (r)
3852 		return r;
3853 
3854 	if (adev->gfx.num_gfx_rings) {
3855 		r = gfx_v9_0_cp_gfx_resume(adev);
3856 		if (r)
3857 			return r;
3858 	}
3859 
3860 	r = gfx_v9_0_kcq_resume(adev);
3861 	if (r)
3862 		return r;
3863 
3864 	if (adev->gfx.num_gfx_rings) {
3865 		ring = &adev->gfx.gfx_ring[0];
3866 		r = amdgpu_ring_test_helper(ring);
3867 		if (r)
3868 			return r;
3869 	}
3870 
3871 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3872 		ring = &adev->gfx.compute_ring[i];
3873 		amdgpu_ring_test_helper(ring);
3874 	}
3875 
3876 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3877 
3878 	return 0;
3879 }
3880 
3881 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3882 {
3883 	u32 tmp;
3884 
3885 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3886 	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3887 		return;
3888 
3889 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3890 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3891 				adev->df.hash_status.hash_64k);
3892 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3893 				adev->df.hash_status.hash_2m);
3894 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3895 				adev->df.hash_status.hash_1g);
3896 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3897 }
3898 
3899 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3900 {
3901 	if (adev->gfx.num_gfx_rings)
3902 		gfx_v9_0_cp_gfx_enable(adev, enable);
3903 	gfx_v9_0_cp_compute_enable(adev, enable);
3904 }
3905 
3906 static int gfx_v9_0_hw_init(void *handle)
3907 {
3908 	int r;
3909 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3910 
3911 	if (!amdgpu_sriov_vf(adev))
3912 		gfx_v9_0_init_golden_registers(adev);
3913 
3914 	gfx_v9_0_constants_init(adev);
3915 
3916 	gfx_v9_0_init_tcp_config(adev);
3917 
3918 	r = adev->gfx.rlc.funcs->resume(adev);
3919 	if (r)
3920 		return r;
3921 
3922 	r = gfx_v9_0_cp_resume(adev);
3923 	if (r)
3924 		return r;
3925 
3926 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3927 		gfx_v9_4_2_set_power_brake_sequence(adev);
3928 
3929 	return r;
3930 }
3931 
3932 static int gfx_v9_0_hw_fini(void *handle)
3933 {
3934 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3935 
3936 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3937 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3938 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3939 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3940 
3941 	/* DF freeze and kcq disable will fail */
3942 	if (!amdgpu_ras_intr_triggered())
3943 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3944 		amdgpu_gfx_disable_kcq(adev, 0);
3945 
3946 	if (amdgpu_sriov_vf(adev)) {
3947 		gfx_v9_0_cp_gfx_enable(adev, false);
3948 		/* must disable polling for SRIOV when hw finished, otherwise
3949 		 * CPC engine may still keep fetching WB address which is already
3950 		 * invalid after sw finished and trigger DMAR reading error in
3951 		 * hypervisor side.
3952 		 */
3953 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3954 		return 0;
3955 	}
3956 
3957 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3958 	 * otherwise KIQ is hanging when binding back
3959 	 */
3960 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3961 		mutex_lock(&adev->srbm_mutex);
3962 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3963 				adev->gfx.kiq[0].ring.pipe,
3964 				adev->gfx.kiq[0].ring.queue, 0, 0);
3965 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3966 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3967 		mutex_unlock(&adev->srbm_mutex);
3968 	}
3969 
3970 	gfx_v9_0_cp_enable(adev, false);
3971 
3972 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3973 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3974 	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
3975 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3976 		return 0;
3977 	}
3978 
3979 	adev->gfx.rlc.funcs->stop(adev);
3980 	return 0;
3981 }
3982 
3983 static int gfx_v9_0_suspend(void *handle)
3984 {
3985 	return gfx_v9_0_hw_fini(handle);
3986 }
3987 
3988 static int gfx_v9_0_resume(void *handle)
3989 {
3990 	return gfx_v9_0_hw_init(handle);
3991 }
3992 
3993 static bool gfx_v9_0_is_idle(void *handle)
3994 {
3995 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3996 
3997 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3998 				GRBM_STATUS, GUI_ACTIVE))
3999 		return false;
4000 	else
4001 		return true;
4002 }
4003 
4004 static int gfx_v9_0_wait_for_idle(void *handle)
4005 {
4006 	unsigned i;
4007 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4008 
4009 	for (i = 0; i < adev->usec_timeout; i++) {
4010 		if (gfx_v9_0_is_idle(handle))
4011 			return 0;
4012 		udelay(1);
4013 	}
4014 	return -ETIMEDOUT;
4015 }
4016 
4017 static int gfx_v9_0_soft_reset(void *handle)
4018 {
4019 	u32 grbm_soft_reset = 0;
4020 	u32 tmp;
4021 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4022 
4023 	/* GRBM_STATUS */
4024 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4025 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4026 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4027 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4028 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4029 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4030 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4031 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4032 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4033 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4034 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4035 	}
4036 
4037 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4038 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4039 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4040 	}
4041 
4042 	/* GRBM_STATUS2 */
4043 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4044 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4045 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4046 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4047 
4048 
4049 	if (grbm_soft_reset) {
4050 		/* stop the rlc */
4051 		adev->gfx.rlc.funcs->stop(adev);
4052 
4053 		if (adev->gfx.num_gfx_rings)
4054 			/* Disable GFX parsing/prefetching */
4055 			gfx_v9_0_cp_gfx_enable(adev, false);
4056 
4057 		/* Disable MEC parsing/prefetching */
4058 		gfx_v9_0_cp_compute_enable(adev, false);
4059 
4060 		if (grbm_soft_reset) {
4061 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4062 			tmp |= grbm_soft_reset;
4063 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4064 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4065 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4066 
4067 			udelay(50);
4068 
4069 			tmp &= ~grbm_soft_reset;
4070 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4071 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4072 		}
4073 
4074 		/* Wait a little for things to settle down */
4075 		udelay(50);
4076 	}
4077 	return 0;
4078 }
4079 
4080 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4081 {
4082 	signed long r, cnt = 0;
4083 	unsigned long flags;
4084 	uint32_t seq, reg_val_offs = 0;
4085 	uint64_t value = 0;
4086 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4087 	struct amdgpu_ring *ring = &kiq->ring;
4088 
4089 	BUG_ON(!ring->funcs->emit_rreg);
4090 
4091 	spin_lock_irqsave(&kiq->ring_lock, flags);
4092 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4093 		pr_err("critical bug! too many kiq readers\n");
4094 		goto failed_unlock;
4095 	}
4096 	amdgpu_ring_alloc(ring, 32);
4097 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4098 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4099 				(5 << 8) |	/* dst: memory */
4100 				(1 << 16) |	/* count sel */
4101 				(1 << 20));	/* write confirm */
4102 	amdgpu_ring_write(ring, 0);
4103 	amdgpu_ring_write(ring, 0);
4104 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4105 				reg_val_offs * 4));
4106 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4107 				reg_val_offs * 4));
4108 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4109 	if (r)
4110 		goto failed_undo;
4111 
4112 	amdgpu_ring_commit(ring);
4113 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4114 
4115 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4116 
4117 	/* don't wait anymore for gpu reset case because this way may
4118 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4119 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4120 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4121 	 * gpu_recover() hang there.
4122 	 *
4123 	 * also don't wait anymore for IRQ context
4124 	 * */
4125 	if (r < 1 && (amdgpu_in_reset(adev)))
4126 		goto failed_kiq_read;
4127 
4128 	might_sleep();
4129 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4130 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4131 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4132 	}
4133 
4134 	if (cnt > MAX_KIQ_REG_TRY)
4135 		goto failed_kiq_read;
4136 
4137 	mb();
4138 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4139 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4140 	amdgpu_device_wb_free(adev, reg_val_offs);
4141 	return value;
4142 
4143 failed_undo:
4144 	amdgpu_ring_undo(ring);
4145 failed_unlock:
4146 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4147 failed_kiq_read:
4148 	if (reg_val_offs)
4149 		amdgpu_device_wb_free(adev, reg_val_offs);
4150 	pr_err("failed to read gpu clock\n");
4151 	return ~0;
4152 }
4153 
4154 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4155 {
4156 	uint64_t clock, clock_lo, clock_hi, hi_check;
4157 
4158 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4159 	case IP_VERSION(9, 3, 0):
4160 		preempt_disable();
4161 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4162 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4163 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4164 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4165 		 * roughly every 42 seconds.
4166 		 */
4167 		if (hi_check != clock_hi) {
4168 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4169 			clock_hi = hi_check;
4170 		}
4171 		preempt_enable();
4172 		clock = clock_lo | (clock_hi << 32ULL);
4173 		break;
4174 	default:
4175 		amdgpu_gfx_off_ctrl(adev, false);
4176 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4177 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4178 			    IP_VERSION(9, 0, 1) &&
4179 		    amdgpu_sriov_runtime(adev)) {
4180 			clock = gfx_v9_0_kiq_read_clock(adev);
4181 		} else {
4182 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4183 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4184 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4185 		}
4186 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4187 		amdgpu_gfx_off_ctrl(adev, true);
4188 		break;
4189 	}
4190 	return clock;
4191 }
4192 
4193 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4194 					  uint32_t vmid,
4195 					  uint32_t gds_base, uint32_t gds_size,
4196 					  uint32_t gws_base, uint32_t gws_size,
4197 					  uint32_t oa_base, uint32_t oa_size)
4198 {
4199 	struct amdgpu_device *adev = ring->adev;
4200 
4201 	/* GDS Base */
4202 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4203 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4204 				   gds_base);
4205 
4206 	/* GDS Size */
4207 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4208 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4209 				   gds_size);
4210 
4211 	/* GWS */
4212 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4213 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4214 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4215 
4216 	/* OA */
4217 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4218 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4219 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4220 }
4221 
4222 static const u32 vgpr_init_compute_shader[] =
4223 {
4224 	0xb07c0000, 0xbe8000ff,
4225 	0x000000f8, 0xbf110800,
4226 	0x7e000280, 0x7e020280,
4227 	0x7e040280, 0x7e060280,
4228 	0x7e080280, 0x7e0a0280,
4229 	0x7e0c0280, 0x7e0e0280,
4230 	0x80808800, 0xbe803200,
4231 	0xbf84fff5, 0xbf9c0000,
4232 	0xd28c0001, 0x0001007f,
4233 	0xd28d0001, 0x0002027e,
4234 	0x10020288, 0xb8810904,
4235 	0xb7814000, 0xd1196a01,
4236 	0x00000301, 0xbe800087,
4237 	0xbefc00c1, 0xd89c4000,
4238 	0x00020201, 0xd89cc080,
4239 	0x00040401, 0x320202ff,
4240 	0x00000800, 0x80808100,
4241 	0xbf84fff8, 0x7e020280,
4242 	0xbf810000, 0x00000000,
4243 };
4244 
4245 static const u32 sgpr_init_compute_shader[] =
4246 {
4247 	0xb07c0000, 0xbe8000ff,
4248 	0x0000005f, 0xbee50080,
4249 	0xbe812c65, 0xbe822c65,
4250 	0xbe832c65, 0xbe842c65,
4251 	0xbe852c65, 0xb77c0005,
4252 	0x80808500, 0xbf84fff8,
4253 	0xbe800080, 0xbf810000,
4254 };
4255 
4256 static const u32 vgpr_init_compute_shader_arcturus[] = {
4257 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4258 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4259 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4260 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4261 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4262 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4263 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4264 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4265 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4266 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4267 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4268 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4269 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4270 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4271 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4272 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4273 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4274 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4275 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4276 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4277 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4278 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4279 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4280 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4281 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4282 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4283 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4284 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4285 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4286 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4287 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4288 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4289 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4290 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4291 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4292 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4293 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4294 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4295 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4296 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4297 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4298 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4299 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4300 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4301 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4302 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4303 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4304 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4305 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4306 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4307 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4308 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4309 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4310 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4311 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4312 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4313 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4314 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4315 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4316 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4317 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4318 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4319 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4320 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4321 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4322 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4323 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4324 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4325 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4326 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4327 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4328 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4329 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4330 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4331 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4332 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4333 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4334 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4335 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4336 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4337 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4338 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4339 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4340 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4341 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4342 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4343 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4344 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4345 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4346 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4347 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4348 	0xbf84fff8, 0xbf810000,
4349 };
4350 
4351 /* When below register arrays changed, please update gpr_reg_size,
4352   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4353   to cover all gfx9 ASICs */
4354 static const struct soc15_reg_entry vgpr_init_regs[] = {
4355    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4361    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4362    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4363    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4364    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4365    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4366    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4367    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4368    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4369 };
4370 
4371 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4372    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4373    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4374    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4375    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4376    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4377    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4378    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4379    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4380    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4381    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4382    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4383    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4384    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4385    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4386 };
4387 
4388 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4389    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4390    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4391    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4392    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4393    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4394    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4395    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4396    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4397    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4398    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4399    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4400    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4401    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4402    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4403 };
4404 
4405 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4406    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4407    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4408    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4409    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4410    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4411    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4412    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4413    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4414    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4415    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4416    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4420 };
4421 
4422 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4423    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4424    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4425    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4426    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4427    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4428    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4429    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4430    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4431    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4432    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4433    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4434    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4435    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4436    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4437    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4438    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4439    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4440    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4441    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4442    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4443    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4444    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4445    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4446    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4447    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4448    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4449    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4450    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4451    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4452    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4453    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4454    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4455    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4456 };
4457 
4458 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4459 {
4460 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4461 	int i, r;
4462 
4463 	/* only support when RAS is enabled */
4464 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4465 		return 0;
4466 
4467 	r = amdgpu_ring_alloc(ring, 7);
4468 	if (r) {
4469 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4470 			ring->name, r);
4471 		return r;
4472 	}
4473 
4474 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4475 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4476 
4477 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4478 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4479 				PACKET3_DMA_DATA_DST_SEL(1) |
4480 				PACKET3_DMA_DATA_SRC_SEL(2) |
4481 				PACKET3_DMA_DATA_ENGINE(0)));
4482 	amdgpu_ring_write(ring, 0);
4483 	amdgpu_ring_write(ring, 0);
4484 	amdgpu_ring_write(ring, 0);
4485 	amdgpu_ring_write(ring, 0);
4486 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4487 				adev->gds.gds_size);
4488 
4489 	amdgpu_ring_commit(ring);
4490 
4491 	for (i = 0; i < adev->usec_timeout; i++) {
4492 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4493 			break;
4494 		udelay(1);
4495 	}
4496 
4497 	if (i >= adev->usec_timeout)
4498 		r = -ETIMEDOUT;
4499 
4500 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4501 
4502 	return r;
4503 }
4504 
4505 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4506 {
4507 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4508 	struct amdgpu_ib ib;
4509 	struct dma_fence *f = NULL;
4510 	int r, i;
4511 	unsigned total_size, vgpr_offset, sgpr_offset;
4512 	u64 gpu_addr;
4513 
4514 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4515 						adev->gfx.config.max_cu_per_sh *
4516 						adev->gfx.config.max_sh_per_se;
4517 	int sgpr_work_group_size = 5;
4518 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4519 	int vgpr_init_shader_size;
4520 	const u32 *vgpr_init_shader_ptr;
4521 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4522 
4523 	/* only support when RAS is enabled */
4524 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4525 		return 0;
4526 
4527 	/* bail if the compute ring is not ready */
4528 	if (!ring->sched.ready)
4529 		return 0;
4530 
4531 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4532 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4533 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4534 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4535 	} else {
4536 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4537 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4538 		vgpr_init_regs_ptr = vgpr_init_regs;
4539 	}
4540 
4541 	total_size =
4542 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4543 	total_size +=
4544 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4545 	total_size +=
4546 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4547 	total_size = ALIGN(total_size, 256);
4548 	vgpr_offset = total_size;
4549 	total_size += ALIGN(vgpr_init_shader_size, 256);
4550 	sgpr_offset = total_size;
4551 	total_size += sizeof(sgpr_init_compute_shader);
4552 
4553 	/* allocate an indirect buffer to put the commands in */
4554 	memset(&ib, 0, sizeof(ib));
4555 	r = amdgpu_ib_get(adev, NULL, total_size,
4556 					AMDGPU_IB_POOL_DIRECT, &ib);
4557 	if (r) {
4558 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4559 		return r;
4560 	}
4561 
4562 	/* load the compute shaders */
4563 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4564 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4565 
4566 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4567 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4568 
4569 	/* init the ib length to 0 */
4570 	ib.length_dw = 0;
4571 
4572 	/* VGPR */
4573 	/* write the register state for the compute dispatch */
4574 	for (i = 0; i < gpr_reg_size; i++) {
4575 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4576 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4577 								- PACKET3_SET_SH_REG_START;
4578 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4579 	}
4580 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4581 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4582 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4583 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4584 							- PACKET3_SET_SH_REG_START;
4585 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4586 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4587 
4588 	/* write dispatch packet */
4589 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4590 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4591 	ib.ptr[ib.length_dw++] = 1; /* y */
4592 	ib.ptr[ib.length_dw++] = 1; /* z */
4593 	ib.ptr[ib.length_dw++] =
4594 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4595 
4596 	/* write CS partial flush packet */
4597 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4598 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4599 
4600 	/* SGPR1 */
4601 	/* write the register state for the compute dispatch */
4602 	for (i = 0; i < gpr_reg_size; i++) {
4603 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4604 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4605 								- PACKET3_SET_SH_REG_START;
4606 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4607 	}
4608 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4609 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4610 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4611 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4612 							- PACKET3_SET_SH_REG_START;
4613 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4614 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4615 
4616 	/* write dispatch packet */
4617 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4618 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4619 	ib.ptr[ib.length_dw++] = 1; /* y */
4620 	ib.ptr[ib.length_dw++] = 1; /* z */
4621 	ib.ptr[ib.length_dw++] =
4622 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4623 
4624 	/* write CS partial flush packet */
4625 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4626 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4627 
4628 	/* SGPR2 */
4629 	/* write the register state for the compute dispatch */
4630 	for (i = 0; i < gpr_reg_size; i++) {
4631 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4632 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4633 								- PACKET3_SET_SH_REG_START;
4634 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4635 	}
4636 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4637 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4638 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4639 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4640 							- PACKET3_SET_SH_REG_START;
4641 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4642 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4643 
4644 	/* write dispatch packet */
4645 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4646 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4647 	ib.ptr[ib.length_dw++] = 1; /* y */
4648 	ib.ptr[ib.length_dw++] = 1; /* z */
4649 	ib.ptr[ib.length_dw++] =
4650 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4651 
4652 	/* write CS partial flush packet */
4653 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4654 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4655 
4656 	/* shedule the ib on the ring */
4657 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4658 	if (r) {
4659 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4660 		goto fail;
4661 	}
4662 
4663 	/* wait for the GPU to finish processing the IB */
4664 	r = dma_fence_wait(f, false);
4665 	if (r) {
4666 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4667 		goto fail;
4668 	}
4669 
4670 fail:
4671 	amdgpu_ib_free(adev, &ib, NULL);
4672 	dma_fence_put(f);
4673 
4674 	return r;
4675 }
4676 
4677 static int gfx_v9_0_early_init(void *handle)
4678 {
4679 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4680 
4681 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4682 
4683 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4684 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4685 		adev->gfx.num_gfx_rings = 0;
4686 	else
4687 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4688 	adev->gfx.xcc_mask = 1;
4689 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4690 					  AMDGPU_MAX_COMPUTE_RINGS);
4691 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4692 	gfx_v9_0_set_ring_funcs(adev);
4693 	gfx_v9_0_set_irq_funcs(adev);
4694 	gfx_v9_0_set_gds_init(adev);
4695 	gfx_v9_0_set_rlc_funcs(adev);
4696 
4697 	/* init rlcg reg access ctrl */
4698 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4699 
4700 	return gfx_v9_0_init_microcode(adev);
4701 }
4702 
4703 static int gfx_v9_0_ecc_late_init(void *handle)
4704 {
4705 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4706 	int r;
4707 
4708 	/*
4709 	 * Temp workaround to fix the issue that CP firmware fails to
4710 	 * update read pointer when CPDMA is writing clearing operation
4711 	 * to GDS in suspend/resume sequence on several cards. So just
4712 	 * limit this operation in cold boot sequence.
4713 	 */
4714 	if ((!adev->in_suspend) &&
4715 	    (adev->gds.gds_size)) {
4716 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4717 		if (r)
4718 			return r;
4719 	}
4720 
4721 	/* requires IBs so do in late init after IB pool is initialized */
4722 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4723 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4724 	else
4725 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4726 
4727 	if (r)
4728 		return r;
4729 
4730 	if (adev->gfx.ras &&
4731 	    adev->gfx.ras->enable_watchdog_timer)
4732 		adev->gfx.ras->enable_watchdog_timer(adev);
4733 
4734 	return 0;
4735 }
4736 
4737 static int gfx_v9_0_late_init(void *handle)
4738 {
4739 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4740 	int r;
4741 
4742 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4743 	if (r)
4744 		return r;
4745 
4746 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4747 	if (r)
4748 		return r;
4749 
4750 	r = gfx_v9_0_ecc_late_init(handle);
4751 	if (r)
4752 		return r;
4753 
4754 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4755 		gfx_v9_4_2_debug_trap_config_init(adev,
4756 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4757 	else
4758 		gfx_v9_0_debug_trap_config_init(adev,
4759 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4760 
4761 	return 0;
4762 }
4763 
4764 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4765 {
4766 	uint32_t rlc_setting;
4767 
4768 	/* if RLC is not enabled, do nothing */
4769 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4770 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4771 		return false;
4772 
4773 	return true;
4774 }
4775 
4776 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4777 {
4778 	uint32_t data;
4779 	unsigned i;
4780 
4781 	data = RLC_SAFE_MODE__CMD_MASK;
4782 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4783 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4784 
4785 	/* wait for RLC_SAFE_MODE */
4786 	for (i = 0; i < adev->usec_timeout; i++) {
4787 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4788 			break;
4789 		udelay(1);
4790 	}
4791 }
4792 
4793 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4794 {
4795 	uint32_t data;
4796 
4797 	data = RLC_SAFE_MODE__CMD_MASK;
4798 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4799 }
4800 
4801 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4802 						bool enable)
4803 {
4804 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4805 
4806 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4807 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4808 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4809 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4810 	} else {
4811 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4812 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4813 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4814 	}
4815 
4816 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4817 }
4818 
4819 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4820 						bool enable)
4821 {
4822 	/* TODO: double check if we need to perform under safe mode */
4823 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4824 
4825 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4826 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4827 	else
4828 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4829 
4830 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4831 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4832 	else
4833 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4834 
4835 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4836 }
4837 
4838 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4839 						      bool enable)
4840 {
4841 	uint32_t data, def;
4842 
4843 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4844 
4845 	/* It is disabled by HW by default */
4846 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4847 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4848 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4849 
4850 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4851 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4852 
4853 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4854 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4855 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4856 
4857 		/* only for Vega10 & Raven1 */
4858 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4859 
4860 		if (def != data)
4861 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4862 
4863 		/* MGLS is a global flag to control all MGLS in GFX */
4864 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4865 			/* 2 - RLC memory Light sleep */
4866 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4867 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4868 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4869 				if (def != data)
4870 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4871 			}
4872 			/* 3 - CP memory Light sleep */
4873 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4874 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4875 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4876 				if (def != data)
4877 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4878 			}
4879 		}
4880 	} else {
4881 		/* 1 - MGCG_OVERRIDE */
4882 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4883 
4884 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4885 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4886 
4887 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4888 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4889 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4890 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4891 
4892 		if (def != data)
4893 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4894 
4895 		/* 2 - disable MGLS in RLC */
4896 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4897 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4898 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4899 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4900 		}
4901 
4902 		/* 3 - disable MGLS in CP */
4903 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4904 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4905 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4906 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4907 		}
4908 	}
4909 
4910 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4911 }
4912 
4913 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4914 					   bool enable)
4915 {
4916 	uint32_t data, def;
4917 
4918 	if (!adev->gfx.num_gfx_rings)
4919 		return;
4920 
4921 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4922 
4923 	/* Enable 3D CGCG/CGLS */
4924 	if (enable) {
4925 		/* write cmd to clear cgcg/cgls ov */
4926 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4927 		/* unset CGCG override */
4928 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4929 		/* update CGCG and CGLS override bits */
4930 		if (def != data)
4931 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4932 
4933 		/* enable 3Dcgcg FSM(0x0000363f) */
4934 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4935 
4936 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4937 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4938 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4939 		else
4940 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4941 
4942 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4943 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4944 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4945 		if (def != data)
4946 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4947 
4948 		/* set IDLE_POLL_COUNT(0x00900100) */
4949 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4950 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4951 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4952 		if (def != data)
4953 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4954 	} else {
4955 		/* Disable CGCG/CGLS */
4956 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4957 		/* disable cgcg, cgls should be disabled */
4958 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4959 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4960 		/* disable cgcg and cgls in FSM */
4961 		if (def != data)
4962 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4963 	}
4964 
4965 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4966 }
4967 
4968 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4969 						      bool enable)
4970 {
4971 	uint32_t def, data;
4972 
4973 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4974 
4975 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4976 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4977 		/* unset CGCG override */
4978 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4979 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4980 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4981 		else
4982 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4983 		/* update CGCG and CGLS override bits */
4984 		if (def != data)
4985 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4986 
4987 		/* enable cgcg FSM(0x0000363F) */
4988 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4989 
4990 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
4991 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4992 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4993 		else
4994 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4995 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4996 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4997 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4998 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4999 		if (def != data)
5000 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5001 
5002 		/* set IDLE_POLL_COUNT(0x00900100) */
5003 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5004 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5005 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5006 		if (def != data)
5007 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5008 	} else {
5009 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5010 		/* reset CGCG/CGLS bits */
5011 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5012 		/* disable cgcg and cgls in FSM */
5013 		if (def != data)
5014 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5015 	}
5016 
5017 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5018 }
5019 
5020 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5021 					    bool enable)
5022 {
5023 	if (enable) {
5024 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5025 		 * ===  MGCG + MGLS ===
5026 		 */
5027 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5028 		/* ===  CGCG /CGLS for GFX 3D Only === */
5029 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5030 		/* ===  CGCG + CGLS === */
5031 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5032 	} else {
5033 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5034 		 * ===  CGCG + CGLS ===
5035 		 */
5036 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5037 		/* ===  CGCG /CGLS for GFX 3D Only === */
5038 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5039 		/* ===  MGCG + MGLS === */
5040 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5041 	}
5042 	return 0;
5043 }
5044 
5045 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5046 					      unsigned int vmid)
5047 {
5048 	u32 reg, data;
5049 
5050 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5051 	if (amdgpu_sriov_is_pp_one_vf(adev))
5052 		data = RREG32_NO_KIQ(reg);
5053 	else
5054 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5055 
5056 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5057 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5058 
5059 	if (amdgpu_sriov_is_pp_one_vf(adev))
5060 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5061 	else
5062 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5063 }
5064 
5065 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5066 {
5067 	amdgpu_gfx_off_ctrl(adev, false);
5068 
5069 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5070 
5071 	amdgpu_gfx_off_ctrl(adev, true);
5072 }
5073 
5074 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5075 					uint32_t offset,
5076 					struct soc15_reg_rlcg *entries, int arr_size)
5077 {
5078 	int i;
5079 	uint32_t reg;
5080 
5081 	if (!entries)
5082 		return false;
5083 
5084 	for (i = 0; i < arr_size; i++) {
5085 		const struct soc15_reg_rlcg *entry;
5086 
5087 		entry = &entries[i];
5088 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5089 		if (offset == reg)
5090 			return true;
5091 	}
5092 
5093 	return false;
5094 }
5095 
5096 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5097 {
5098 	return gfx_v9_0_check_rlcg_range(adev, offset,
5099 					(void *)rlcg_access_gc_9_0,
5100 					ARRAY_SIZE(rlcg_access_gc_9_0));
5101 }
5102 
5103 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5104 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5105 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5106 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5107 	.init = gfx_v9_0_rlc_init,
5108 	.get_csb_size = gfx_v9_0_get_csb_size,
5109 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5110 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5111 	.resume = gfx_v9_0_rlc_resume,
5112 	.stop = gfx_v9_0_rlc_stop,
5113 	.reset = gfx_v9_0_rlc_reset,
5114 	.start = gfx_v9_0_rlc_start,
5115 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5116 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5117 };
5118 
5119 static int gfx_v9_0_set_powergating_state(void *handle,
5120 					  enum amd_powergating_state state)
5121 {
5122 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5123 	bool enable = (state == AMD_PG_STATE_GATE);
5124 
5125 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5126 	case IP_VERSION(9, 2, 2):
5127 	case IP_VERSION(9, 1, 0):
5128 	case IP_VERSION(9, 3, 0):
5129 		if (!enable)
5130 			amdgpu_gfx_off_ctrl(adev, false);
5131 
5132 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5133 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5134 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5135 		} else {
5136 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5137 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5138 		}
5139 
5140 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5141 			gfx_v9_0_enable_cp_power_gating(adev, true);
5142 		else
5143 			gfx_v9_0_enable_cp_power_gating(adev, false);
5144 
5145 		/* update gfx cgpg state */
5146 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5147 
5148 		/* update mgcg state */
5149 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5150 
5151 		if (enable)
5152 			amdgpu_gfx_off_ctrl(adev, true);
5153 		break;
5154 	case IP_VERSION(9, 2, 1):
5155 		amdgpu_gfx_off_ctrl(adev, enable);
5156 		break;
5157 	default:
5158 		break;
5159 	}
5160 
5161 	return 0;
5162 }
5163 
5164 static int gfx_v9_0_set_clockgating_state(void *handle,
5165 					  enum amd_clockgating_state state)
5166 {
5167 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5168 
5169 	if (amdgpu_sriov_vf(adev))
5170 		return 0;
5171 
5172 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5173 	case IP_VERSION(9, 0, 1):
5174 	case IP_VERSION(9, 2, 1):
5175 	case IP_VERSION(9, 4, 0):
5176 	case IP_VERSION(9, 2, 2):
5177 	case IP_VERSION(9, 1, 0):
5178 	case IP_VERSION(9, 4, 1):
5179 	case IP_VERSION(9, 3, 0):
5180 	case IP_VERSION(9, 4, 2):
5181 		gfx_v9_0_update_gfx_clock_gating(adev,
5182 						 state == AMD_CG_STATE_GATE);
5183 		break;
5184 	default:
5185 		break;
5186 	}
5187 	return 0;
5188 }
5189 
5190 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5191 {
5192 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5193 	int data;
5194 
5195 	if (amdgpu_sriov_vf(adev))
5196 		*flags = 0;
5197 
5198 	/* AMD_CG_SUPPORT_GFX_MGCG */
5199 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5200 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5201 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5202 
5203 	/* AMD_CG_SUPPORT_GFX_CGCG */
5204 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5205 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5206 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5207 
5208 	/* AMD_CG_SUPPORT_GFX_CGLS */
5209 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5210 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5211 
5212 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5213 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5214 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5215 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5216 
5217 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5218 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5219 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5220 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5221 
5222 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5223 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5224 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5225 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5226 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5227 
5228 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5229 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5230 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5231 	}
5232 }
5233 
5234 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5235 {
5236 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5237 }
5238 
5239 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5240 {
5241 	struct amdgpu_device *adev = ring->adev;
5242 	u64 wptr;
5243 
5244 	/* XXX check if swapping is necessary on BE */
5245 	if (ring->use_doorbell) {
5246 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5247 	} else {
5248 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5249 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5250 	}
5251 
5252 	return wptr;
5253 }
5254 
5255 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5256 {
5257 	struct amdgpu_device *adev = ring->adev;
5258 
5259 	if (ring->use_doorbell) {
5260 		/* XXX check if swapping is necessary on BE */
5261 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5262 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5263 	} else {
5264 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5265 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5266 	}
5267 }
5268 
5269 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5270 {
5271 	struct amdgpu_device *adev = ring->adev;
5272 	u32 ref_and_mask, reg_mem_engine;
5273 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5274 
5275 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5276 		switch (ring->me) {
5277 		case 1:
5278 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5279 			break;
5280 		case 2:
5281 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5282 			break;
5283 		default:
5284 			return;
5285 		}
5286 		reg_mem_engine = 0;
5287 	} else {
5288 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5289 		reg_mem_engine = 1; /* pfp */
5290 	}
5291 
5292 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5293 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5294 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5295 			      ref_and_mask, ref_and_mask, 0x20);
5296 }
5297 
5298 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5299 					struct amdgpu_job *job,
5300 					struct amdgpu_ib *ib,
5301 					uint32_t flags)
5302 {
5303 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5304 	u32 header, control = 0;
5305 
5306 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5307 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5308 	else
5309 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5310 
5311 	control |= ib->length_dw | (vmid << 24);
5312 
5313 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5314 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5315 
5316 		if (flags & AMDGPU_IB_PREEMPTED)
5317 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5318 
5319 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5320 			gfx_v9_0_ring_emit_de_meta(ring,
5321 						   (!amdgpu_sriov_vf(ring->adev) &&
5322 						   flags & AMDGPU_IB_PREEMPTED) ?
5323 						   true : false,
5324 						   job->gds_size > 0 && job->gds_base != 0);
5325 	}
5326 
5327 	amdgpu_ring_write(ring, header);
5328 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5329 	amdgpu_ring_write(ring,
5330 #ifdef __BIG_ENDIAN
5331 		(2 << 0) |
5332 #endif
5333 		lower_32_bits(ib->gpu_addr));
5334 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5335 	amdgpu_ring_ib_on_emit_cntl(ring);
5336 	amdgpu_ring_write(ring, control);
5337 }
5338 
5339 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5340 				     unsigned offset)
5341 {
5342 	u32 control = ring->ring[offset];
5343 
5344 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5345 	ring->ring[offset] = control;
5346 }
5347 
5348 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5349 					unsigned offset)
5350 {
5351 	struct amdgpu_device *adev = ring->adev;
5352 	void *ce_payload_cpu_addr;
5353 	uint64_t payload_offset, payload_size;
5354 
5355 	payload_size = sizeof(struct v9_ce_ib_state);
5356 
5357 	if (ring->is_mes_queue) {
5358 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5359 					  gfx[0].gfx_meta_data) +
5360 			offsetof(struct v9_gfx_meta_data, ce_payload);
5361 		ce_payload_cpu_addr =
5362 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5363 	} else {
5364 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5365 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5366 	}
5367 
5368 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5369 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5370 	} else {
5371 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5372 		       (ring->buf_mask + 1 - offset) << 2);
5373 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5374 		memcpy((void *)&ring->ring[0],
5375 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5376 		       payload_size);
5377 	}
5378 }
5379 
5380 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5381 					unsigned offset)
5382 {
5383 	struct amdgpu_device *adev = ring->adev;
5384 	void *de_payload_cpu_addr;
5385 	uint64_t payload_offset, payload_size;
5386 
5387 	payload_size = sizeof(struct v9_de_ib_state);
5388 
5389 	if (ring->is_mes_queue) {
5390 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5391 					  gfx[0].gfx_meta_data) +
5392 			offsetof(struct v9_gfx_meta_data, de_payload);
5393 		de_payload_cpu_addr =
5394 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5395 	} else {
5396 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5397 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5398 	}
5399 
5400 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5401 		IB_COMPLETION_STATUS_PREEMPTED;
5402 
5403 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5404 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5405 	} else {
5406 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5407 		       (ring->buf_mask + 1 - offset) << 2);
5408 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5409 		memcpy((void *)&ring->ring[0],
5410 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5411 		       payload_size);
5412 	}
5413 }
5414 
5415 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5416 					  struct amdgpu_job *job,
5417 					  struct amdgpu_ib *ib,
5418 					  uint32_t flags)
5419 {
5420 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5421 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5422 
5423 	/* Currently, there is a high possibility to get wave ID mismatch
5424 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5425 	 * different wave IDs than the GDS expects. This situation happens
5426 	 * randomly when at least 5 compute pipes use GDS ordered append.
5427 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5428 	 * Those are probably bugs somewhere else in the kernel driver.
5429 	 *
5430 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5431 	 * GDS to 0 for this ring (me/pipe).
5432 	 */
5433 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5434 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5435 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5436 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5437 	}
5438 
5439 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5440 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5441 	amdgpu_ring_write(ring,
5442 #ifdef __BIG_ENDIAN
5443 				(2 << 0) |
5444 #endif
5445 				lower_32_bits(ib->gpu_addr));
5446 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5447 	amdgpu_ring_write(ring, control);
5448 }
5449 
5450 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5451 				     u64 seq, unsigned flags)
5452 {
5453 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5454 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5455 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5456 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5457 	uint32_t dw2 = 0;
5458 
5459 	/* RELEASE_MEM - flush caches, send int */
5460 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5461 
5462 	if (writeback) {
5463 		dw2 = EOP_TC_NC_ACTION_EN;
5464 	} else {
5465 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5466 				EOP_TC_MD_ACTION_EN;
5467 	}
5468 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5469 				EVENT_INDEX(5);
5470 	if (exec)
5471 		dw2 |= EOP_EXEC;
5472 
5473 	amdgpu_ring_write(ring, dw2);
5474 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5475 
5476 	/*
5477 	 * the address should be Qword aligned if 64bit write, Dword
5478 	 * aligned if only send 32bit data low (discard data high)
5479 	 */
5480 	if (write64bit)
5481 		BUG_ON(addr & 0x7);
5482 	else
5483 		BUG_ON(addr & 0x3);
5484 	amdgpu_ring_write(ring, lower_32_bits(addr));
5485 	amdgpu_ring_write(ring, upper_32_bits(addr));
5486 	amdgpu_ring_write(ring, lower_32_bits(seq));
5487 	amdgpu_ring_write(ring, upper_32_bits(seq));
5488 	amdgpu_ring_write(ring, 0);
5489 }
5490 
5491 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5492 {
5493 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5494 	uint32_t seq = ring->fence_drv.sync_seq;
5495 	uint64_t addr = ring->fence_drv.gpu_addr;
5496 
5497 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5498 			      lower_32_bits(addr), upper_32_bits(addr),
5499 			      seq, 0xffffffff, 4);
5500 }
5501 
5502 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5503 					unsigned vmid, uint64_t pd_addr)
5504 {
5505 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5506 
5507 	/* compute doesn't have PFP */
5508 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5509 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5510 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5511 		amdgpu_ring_write(ring, 0x0);
5512 	}
5513 }
5514 
5515 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5516 {
5517 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5518 }
5519 
5520 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5521 {
5522 	u64 wptr;
5523 
5524 	/* XXX check if swapping is necessary on BE */
5525 	if (ring->use_doorbell)
5526 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5527 	else
5528 		BUG();
5529 	return wptr;
5530 }
5531 
5532 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5533 {
5534 	struct amdgpu_device *adev = ring->adev;
5535 
5536 	/* XXX check if swapping is necessary on BE */
5537 	if (ring->use_doorbell) {
5538 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5539 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5540 	} else{
5541 		BUG(); /* only DOORBELL method supported on gfx9 now */
5542 	}
5543 }
5544 
5545 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5546 					 u64 seq, unsigned int flags)
5547 {
5548 	struct amdgpu_device *adev = ring->adev;
5549 
5550 	/* we only allocate 32bit for each seq wb address */
5551 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5552 
5553 	/* write fence seq to the "addr" */
5554 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5555 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5556 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5557 	amdgpu_ring_write(ring, lower_32_bits(addr));
5558 	amdgpu_ring_write(ring, upper_32_bits(addr));
5559 	amdgpu_ring_write(ring, lower_32_bits(seq));
5560 
5561 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5562 		/* set register to trigger INT */
5563 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5564 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5565 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5566 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5567 		amdgpu_ring_write(ring, 0);
5568 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5569 	}
5570 }
5571 
5572 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5573 {
5574 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5575 	amdgpu_ring_write(ring, 0);
5576 }
5577 
5578 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5579 {
5580 	struct amdgpu_device *adev = ring->adev;
5581 	struct v9_ce_ib_state ce_payload = {0};
5582 	uint64_t offset, ce_payload_gpu_addr;
5583 	void *ce_payload_cpu_addr;
5584 	int cnt;
5585 
5586 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5587 
5588 	if (ring->is_mes_queue) {
5589 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5590 				  gfx[0].gfx_meta_data) +
5591 			offsetof(struct v9_gfx_meta_data, ce_payload);
5592 		ce_payload_gpu_addr =
5593 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5594 		ce_payload_cpu_addr =
5595 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5596 	} else {
5597 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5598 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5599 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5600 	}
5601 
5602 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5603 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5604 				 WRITE_DATA_DST_SEL(8) |
5605 				 WR_CONFIRM) |
5606 				 WRITE_DATA_CACHE_POLICY(0));
5607 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5608 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5609 
5610 	amdgpu_ring_ib_on_emit_ce(ring);
5611 
5612 	if (resume)
5613 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5614 					   sizeof(ce_payload) >> 2);
5615 	else
5616 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5617 					   sizeof(ce_payload) >> 2);
5618 }
5619 
5620 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5621 {
5622 	int i, r = 0;
5623 	struct amdgpu_device *adev = ring->adev;
5624 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5625 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5626 	unsigned long flags;
5627 
5628 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5629 		return -EINVAL;
5630 
5631 	spin_lock_irqsave(&kiq->ring_lock, flags);
5632 
5633 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5634 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5635 		return -ENOMEM;
5636 	}
5637 
5638 	/* assert preemption condition */
5639 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5640 
5641 	ring->trail_seq += 1;
5642 	amdgpu_ring_alloc(ring, 13);
5643 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5644 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5645 
5646 	/* assert IB preemption, emit the trailing fence */
5647 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5648 				   ring->trail_fence_gpu_addr,
5649 				   ring->trail_seq);
5650 
5651 	amdgpu_ring_commit(kiq_ring);
5652 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5653 
5654 	/* poll the trailing fence */
5655 	for (i = 0; i < adev->usec_timeout; i++) {
5656 		if (ring->trail_seq ==
5657 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5658 			break;
5659 		udelay(1);
5660 	}
5661 
5662 	if (i >= adev->usec_timeout) {
5663 		r = -EINVAL;
5664 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5665 	}
5666 
5667 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5668 	amdgpu_ring_emit_wreg(ring,
5669 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5670 			      0x0);
5671 	amdgpu_ring_commit(ring);
5672 
5673 	/* deassert preemption condition */
5674 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5675 	return r;
5676 }
5677 
5678 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5679 {
5680 	struct amdgpu_device *adev = ring->adev;
5681 	struct v9_de_ib_state de_payload = {0};
5682 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5683 	void *de_payload_cpu_addr;
5684 	int cnt;
5685 
5686 	if (ring->is_mes_queue) {
5687 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5688 				  gfx[0].gfx_meta_data) +
5689 			offsetof(struct v9_gfx_meta_data, de_payload);
5690 		de_payload_gpu_addr =
5691 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5692 		de_payload_cpu_addr =
5693 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5694 
5695 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5696 				  gfx[0].gds_backup) +
5697 			offsetof(struct v9_gfx_meta_data, de_payload);
5698 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5699 	} else {
5700 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5701 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5702 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5703 
5704 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5705 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5706 				 PAGE_SIZE);
5707 	}
5708 
5709 	if (usegds) {
5710 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5711 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5712 	}
5713 
5714 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5715 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5716 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5717 				 WRITE_DATA_DST_SEL(8) |
5718 				 WR_CONFIRM) |
5719 				 WRITE_DATA_CACHE_POLICY(0));
5720 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5721 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5722 
5723 	amdgpu_ring_ib_on_emit_de(ring);
5724 	if (resume)
5725 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5726 					   sizeof(de_payload) >> 2);
5727 	else
5728 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5729 					   sizeof(de_payload) >> 2);
5730 }
5731 
5732 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5733 				   bool secure)
5734 {
5735 	uint32_t v = secure ? FRAME_TMZ : 0;
5736 
5737 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5738 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5739 }
5740 
5741 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5742 {
5743 	uint32_t dw2 = 0;
5744 
5745 	gfx_v9_0_ring_emit_ce_meta(ring,
5746 				   (!amdgpu_sriov_vf(ring->adev) &&
5747 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5748 
5749 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5750 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5751 		/* set load_global_config & load_global_uconfig */
5752 		dw2 |= 0x8001;
5753 		/* set load_cs_sh_regs */
5754 		dw2 |= 0x01000000;
5755 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5756 		dw2 |= 0x10002;
5757 
5758 		/* set load_ce_ram if preamble presented */
5759 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5760 			dw2 |= 0x10000000;
5761 	} else {
5762 		/* still load_ce_ram if this is the first time preamble presented
5763 		 * although there is no context switch happens.
5764 		 */
5765 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5766 			dw2 |= 0x10000000;
5767 	}
5768 
5769 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5770 	amdgpu_ring_write(ring, dw2);
5771 	amdgpu_ring_write(ring, 0);
5772 }
5773 
5774 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5775 						  uint64_t addr)
5776 {
5777 	unsigned ret;
5778 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5779 	amdgpu_ring_write(ring, lower_32_bits(addr));
5780 	amdgpu_ring_write(ring, upper_32_bits(addr));
5781 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5782 	amdgpu_ring_write(ring, 0);
5783 	ret = ring->wptr & ring->buf_mask;
5784 	/* patch dummy value later */
5785 	amdgpu_ring_write(ring, 0);
5786 	return ret;
5787 }
5788 
5789 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5790 				    uint32_t reg_val_offs)
5791 {
5792 	struct amdgpu_device *adev = ring->adev;
5793 
5794 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5795 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5796 				(5 << 8) |	/* dst: memory */
5797 				(1 << 20));	/* write confirm */
5798 	amdgpu_ring_write(ring, reg);
5799 	amdgpu_ring_write(ring, 0);
5800 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5801 				reg_val_offs * 4));
5802 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5803 				reg_val_offs * 4));
5804 }
5805 
5806 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5807 				    uint32_t val)
5808 {
5809 	uint32_t cmd = 0;
5810 
5811 	switch (ring->funcs->type) {
5812 	case AMDGPU_RING_TYPE_GFX:
5813 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5814 		break;
5815 	case AMDGPU_RING_TYPE_KIQ:
5816 		cmd = (1 << 16); /* no inc addr */
5817 		break;
5818 	default:
5819 		cmd = WR_CONFIRM;
5820 		break;
5821 	}
5822 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823 	amdgpu_ring_write(ring, cmd);
5824 	amdgpu_ring_write(ring, reg);
5825 	amdgpu_ring_write(ring, 0);
5826 	amdgpu_ring_write(ring, val);
5827 }
5828 
5829 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5830 					uint32_t val, uint32_t mask)
5831 {
5832 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5833 }
5834 
5835 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5836 						  uint32_t reg0, uint32_t reg1,
5837 						  uint32_t ref, uint32_t mask)
5838 {
5839 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5840 	struct amdgpu_device *adev = ring->adev;
5841 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5842 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5843 
5844 	if (fw_version_ok)
5845 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5846 				      ref, mask, 0x20);
5847 	else
5848 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5849 							   ref, mask);
5850 }
5851 
5852 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5853 {
5854 	struct amdgpu_device *adev = ring->adev;
5855 	uint32_t value = 0;
5856 
5857 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5858 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5859 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5860 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5861 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5862 }
5863 
5864 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5865 						 enum amdgpu_interrupt_state state)
5866 {
5867 	switch (state) {
5868 	case AMDGPU_IRQ_STATE_DISABLE:
5869 	case AMDGPU_IRQ_STATE_ENABLE:
5870 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5871 			       TIME_STAMP_INT_ENABLE,
5872 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5873 		break;
5874 	default:
5875 		break;
5876 	}
5877 }
5878 
5879 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5880 						     int me, int pipe,
5881 						     enum amdgpu_interrupt_state state)
5882 {
5883 	u32 mec_int_cntl, mec_int_cntl_reg;
5884 
5885 	/*
5886 	 * amdgpu controls only the first MEC. That's why this function only
5887 	 * handles the setting of interrupts for this specific MEC. All other
5888 	 * pipes' interrupts are set by amdkfd.
5889 	 */
5890 
5891 	if (me == 1) {
5892 		switch (pipe) {
5893 		case 0:
5894 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5895 			break;
5896 		case 1:
5897 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5898 			break;
5899 		case 2:
5900 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5901 			break;
5902 		case 3:
5903 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5904 			break;
5905 		default:
5906 			DRM_DEBUG("invalid pipe %d\n", pipe);
5907 			return;
5908 		}
5909 	} else {
5910 		DRM_DEBUG("invalid me %d\n", me);
5911 		return;
5912 	}
5913 
5914 	switch (state) {
5915 	case AMDGPU_IRQ_STATE_DISABLE:
5916 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5917 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5918 					     TIME_STAMP_INT_ENABLE, 0);
5919 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5920 		break;
5921 	case AMDGPU_IRQ_STATE_ENABLE:
5922 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5923 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5924 					     TIME_STAMP_INT_ENABLE, 1);
5925 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5926 		break;
5927 	default:
5928 		break;
5929 	}
5930 }
5931 
5932 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5933 					     struct amdgpu_irq_src *source,
5934 					     unsigned type,
5935 					     enum amdgpu_interrupt_state state)
5936 {
5937 	switch (state) {
5938 	case AMDGPU_IRQ_STATE_DISABLE:
5939 	case AMDGPU_IRQ_STATE_ENABLE:
5940 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5941 			       PRIV_REG_INT_ENABLE,
5942 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5943 		break;
5944 	default:
5945 		break;
5946 	}
5947 
5948 	return 0;
5949 }
5950 
5951 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5952 					      struct amdgpu_irq_src *source,
5953 					      unsigned type,
5954 					      enum amdgpu_interrupt_state state)
5955 {
5956 	switch (state) {
5957 	case AMDGPU_IRQ_STATE_DISABLE:
5958 	case AMDGPU_IRQ_STATE_ENABLE:
5959 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5960 			       PRIV_INSTR_INT_ENABLE,
5961 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5962 		break;
5963 	default:
5964 		break;
5965 	}
5966 
5967 	return 0;
5968 }
5969 
5970 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5971 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5972 			CP_ECC_ERROR_INT_ENABLE, 1)
5973 
5974 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5975 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5976 			CP_ECC_ERROR_INT_ENABLE, 0)
5977 
5978 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5979 					      struct amdgpu_irq_src *source,
5980 					      unsigned type,
5981 					      enum amdgpu_interrupt_state state)
5982 {
5983 	switch (state) {
5984 	case AMDGPU_IRQ_STATE_DISABLE:
5985 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5986 				CP_ECC_ERROR_INT_ENABLE, 0);
5987 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5988 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5989 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5990 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5991 		break;
5992 
5993 	case AMDGPU_IRQ_STATE_ENABLE:
5994 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5995 				CP_ECC_ERROR_INT_ENABLE, 1);
5996 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5997 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5998 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5999 		ENABLE_ECC_ON_ME_PIPE(1, 3);
6000 		break;
6001 	default:
6002 		break;
6003 	}
6004 
6005 	return 0;
6006 }
6007 
6008 
6009 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6010 					    struct amdgpu_irq_src *src,
6011 					    unsigned type,
6012 					    enum amdgpu_interrupt_state state)
6013 {
6014 	switch (type) {
6015 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6016 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6017 		break;
6018 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6019 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6020 		break;
6021 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6022 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6023 		break;
6024 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6025 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6026 		break;
6027 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6028 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6029 		break;
6030 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6031 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6032 		break;
6033 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6034 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6035 		break;
6036 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6037 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6038 		break;
6039 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6040 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6041 		break;
6042 	default:
6043 		break;
6044 	}
6045 	return 0;
6046 }
6047 
6048 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6049 			    struct amdgpu_irq_src *source,
6050 			    struct amdgpu_iv_entry *entry)
6051 {
6052 	int i;
6053 	u8 me_id, pipe_id, queue_id;
6054 	struct amdgpu_ring *ring;
6055 
6056 	DRM_DEBUG("IH: CP EOP\n");
6057 	me_id = (entry->ring_id & 0x0c) >> 2;
6058 	pipe_id = (entry->ring_id & 0x03) >> 0;
6059 	queue_id = (entry->ring_id & 0x70) >> 4;
6060 
6061 	switch (me_id) {
6062 	case 0:
6063 		if (adev->gfx.num_gfx_rings) {
6064 			if (!adev->gfx.mcbp) {
6065 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6066 			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6067 				/* Fence signals are handled on the software rings*/
6068 				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6069 					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6070 			}
6071 		}
6072 		break;
6073 	case 1:
6074 	case 2:
6075 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6076 			ring = &adev->gfx.compute_ring[i];
6077 			/* Per-queue interrupt is supported for MEC starting from VI.
6078 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6079 			  */
6080 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6081 				amdgpu_fence_process(ring);
6082 		}
6083 		break;
6084 	}
6085 	return 0;
6086 }
6087 
6088 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6089 			   struct amdgpu_iv_entry *entry)
6090 {
6091 	u8 me_id, pipe_id, queue_id;
6092 	struct amdgpu_ring *ring;
6093 	int i;
6094 
6095 	me_id = (entry->ring_id & 0x0c) >> 2;
6096 	pipe_id = (entry->ring_id & 0x03) >> 0;
6097 	queue_id = (entry->ring_id & 0x70) >> 4;
6098 
6099 	switch (me_id) {
6100 	case 0:
6101 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6102 		break;
6103 	case 1:
6104 	case 2:
6105 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6106 			ring = &adev->gfx.compute_ring[i];
6107 			if (ring->me == me_id && ring->pipe == pipe_id &&
6108 			    ring->queue == queue_id)
6109 				drm_sched_fault(&ring->sched);
6110 		}
6111 		break;
6112 	}
6113 }
6114 
6115 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6116 				 struct amdgpu_irq_src *source,
6117 				 struct amdgpu_iv_entry *entry)
6118 {
6119 	DRM_ERROR("Illegal register access in command stream\n");
6120 	gfx_v9_0_fault(adev, entry);
6121 	return 0;
6122 }
6123 
6124 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6125 				  struct amdgpu_irq_src *source,
6126 				  struct amdgpu_iv_entry *entry)
6127 {
6128 	DRM_ERROR("Illegal instruction in command stream\n");
6129 	gfx_v9_0_fault(adev, entry);
6130 	return 0;
6131 }
6132 
6133 
6134 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6135 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6136 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6137 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6138 	},
6139 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6140 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6141 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6142 	},
6143 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6144 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6145 	  0, 0
6146 	},
6147 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6148 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6149 	  0, 0
6150 	},
6151 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6152 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6153 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6154 	},
6155 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6156 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6157 	  0, 0
6158 	},
6159 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6160 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6161 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6162 	},
6163 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6164 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6165 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6166 	},
6167 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6168 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6169 	  0, 0
6170 	},
6171 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6172 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6173 	  0, 0
6174 	},
6175 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6176 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6177 	  0, 0
6178 	},
6179 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6180 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6181 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6182 	},
6183 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6184 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6185 	  0, 0
6186 	},
6187 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6188 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6189 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6190 	},
6191 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6192 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6193 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6194 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6195 	},
6196 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6197 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6198 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6199 	  0, 0
6200 	},
6201 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6202 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6203 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6204 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6205 	},
6206 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6207 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6208 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6209 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6210 	},
6211 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6212 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6213 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6214 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6215 	},
6216 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6217 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6218 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6219 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6220 	},
6221 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6222 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6223 	  0, 0
6224 	},
6225 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6226 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6227 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6228 	},
6229 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6230 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6231 	  0, 0
6232 	},
6233 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6234 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6235 	  0, 0
6236 	},
6237 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6238 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6239 	  0, 0
6240 	},
6241 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6242 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6243 	  0, 0
6244 	},
6245 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6246 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6247 	  0, 0
6248 	},
6249 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6250 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6251 	  0, 0
6252 	},
6253 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6254 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6255 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6256 	},
6257 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6258 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6259 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6260 	},
6261 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6262 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6263 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6264 	},
6265 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6266 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6267 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6268 	},
6269 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6270 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6271 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6272 	},
6273 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6274 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6275 	  0, 0
6276 	},
6277 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6278 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6279 	  0, 0
6280 	},
6281 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6282 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6283 	  0, 0
6284 	},
6285 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6286 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6287 	  0, 0
6288 	},
6289 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6290 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6291 	  0, 0
6292 	},
6293 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6294 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6295 	  0, 0
6296 	},
6297 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6298 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6299 	  0, 0
6300 	},
6301 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6302 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6303 	  0, 0
6304 	},
6305 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6306 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6307 	  0, 0
6308 	},
6309 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6310 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6311 	  0, 0
6312 	},
6313 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6314 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6315 	  0, 0
6316 	},
6317 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6318 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6319 	  0, 0
6320 	},
6321 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6322 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6323 	  0, 0
6324 	},
6325 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6326 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6327 	  0, 0
6328 	},
6329 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6330 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6331 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6332 	},
6333 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6334 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6335 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6336 	},
6337 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6338 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6339 	  0, 0
6340 	},
6341 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6342 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6343 	  0, 0
6344 	},
6345 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6346 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6347 	  0, 0
6348 	},
6349 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6350 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6351 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6352 	},
6353 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6354 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6355 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6356 	},
6357 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6358 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6359 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6360 	},
6361 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6362 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6363 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6364 	},
6365 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6366 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6367 	  0, 0
6368 	},
6369 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6370 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6371 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6372 	},
6373 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6374 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6375 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6376 	},
6377 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6378 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6379 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6380 	},
6381 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6382 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6383 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6384 	},
6385 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6386 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6387 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6388 	},
6389 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6390 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6391 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6392 	},
6393 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6394 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6395 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6396 	},
6397 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6398 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6399 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6400 	},
6401 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6402 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6403 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6404 	},
6405 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6406 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6407 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6408 	},
6409 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6410 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6411 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6412 	},
6413 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6414 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6415 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6416 	},
6417 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6418 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6419 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6420 	},
6421 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6422 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6423 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6424 	},
6425 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6426 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6427 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6428 	},
6429 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6430 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6431 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6432 	},
6433 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6434 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6435 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6436 	},
6437 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6438 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6439 	  0, 0
6440 	},
6441 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6442 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6443 	  0, 0
6444 	},
6445 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6446 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6447 	  0, 0
6448 	},
6449 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6450 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6451 	  0, 0
6452 	},
6453 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6454 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6455 	  0, 0
6456 	},
6457 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6458 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6459 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6460 	},
6461 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6462 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6463 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6464 	},
6465 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6466 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6467 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6468 	},
6469 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6470 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6471 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6472 	},
6473 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6474 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6475 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6476 	},
6477 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6478 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6479 	  0, 0
6480 	},
6481 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6482 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6483 	  0, 0
6484 	},
6485 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6486 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6487 	  0, 0
6488 	},
6489 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6490 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6491 	  0, 0
6492 	},
6493 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6494 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6495 	  0, 0
6496 	},
6497 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6498 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6499 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6500 	},
6501 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6502 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6503 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6504 	},
6505 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6506 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6507 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6508 	},
6509 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6510 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6511 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6512 	},
6513 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6514 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6515 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6516 	},
6517 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6518 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6519 	  0, 0
6520 	},
6521 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6522 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6523 	  0, 0
6524 	},
6525 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6526 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6527 	  0, 0
6528 	},
6529 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6530 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6531 	  0, 0
6532 	},
6533 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6534 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6535 	  0, 0
6536 	},
6537 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6538 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6539 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6540 	},
6541 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6542 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6543 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6544 	},
6545 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6546 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6547 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6548 	},
6549 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6550 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6551 	  0, 0
6552 	},
6553 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6554 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6555 	  0, 0
6556 	},
6557 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6558 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6559 	  0, 0
6560 	},
6561 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6562 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6563 	  0, 0
6564 	},
6565 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6566 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6567 	  0, 0
6568 	},
6569 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6570 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6571 	  0, 0
6572 	}
6573 };
6574 
6575 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6576 				     void *inject_if, uint32_t instance_mask)
6577 {
6578 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6579 	int ret;
6580 	struct ta_ras_trigger_error_input block_info = { 0 };
6581 
6582 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6583 		return -EINVAL;
6584 
6585 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6586 		return -EINVAL;
6587 
6588 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6589 		return -EPERM;
6590 
6591 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6592 	      info->head.type)) {
6593 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6594 			ras_gfx_subblocks[info->head.sub_block_index].name,
6595 			info->head.type);
6596 		return -EPERM;
6597 	}
6598 
6599 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6600 	      info->head.type)) {
6601 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6602 			ras_gfx_subblocks[info->head.sub_block_index].name,
6603 			info->head.type);
6604 		return -EPERM;
6605 	}
6606 
6607 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6608 	block_info.sub_block_index =
6609 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6610 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6611 	block_info.address = info->address;
6612 	block_info.value = info->value;
6613 
6614 	mutex_lock(&adev->grbm_idx_mutex);
6615 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6616 	mutex_unlock(&adev->grbm_idx_mutex);
6617 
6618 	return ret;
6619 }
6620 
6621 static const char * const vml2_mems[] = {
6622 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6623 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6624 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6625 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6626 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6627 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6628 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6629 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6630 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6631 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6632 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6633 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6634 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6635 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6636 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6637 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6638 };
6639 
6640 static const char * const vml2_walker_mems[] = {
6641 	"UTC_VML2_CACHE_PDE0_MEM0",
6642 	"UTC_VML2_CACHE_PDE0_MEM1",
6643 	"UTC_VML2_CACHE_PDE1_MEM0",
6644 	"UTC_VML2_CACHE_PDE1_MEM1",
6645 	"UTC_VML2_CACHE_PDE2_MEM0",
6646 	"UTC_VML2_CACHE_PDE2_MEM1",
6647 	"UTC_VML2_RDIF_LOG_FIFO",
6648 };
6649 
6650 static const char * const atc_l2_cache_2m_mems[] = {
6651 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6652 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6653 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6654 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6655 };
6656 
6657 static const char *atc_l2_cache_4k_mems[] = {
6658 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6659 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6660 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6661 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6662 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6663 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6664 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6665 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6666 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6667 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6668 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6669 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6670 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6671 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6672 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6673 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6674 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6675 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6676 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6677 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6678 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6679 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6680 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6681 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6682 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6683 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6684 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6685 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6686 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6687 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6688 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6689 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6690 };
6691 
6692 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6693 					 struct ras_err_data *err_data)
6694 {
6695 	uint32_t i, data;
6696 	uint32_t sec_count, ded_count;
6697 
6698 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6699 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6700 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6701 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6702 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6703 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6704 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6705 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6706 
6707 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6708 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6709 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6710 
6711 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6712 		if (sec_count) {
6713 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6714 				"SEC %d\n", i, vml2_mems[i], sec_count);
6715 			err_data->ce_count += sec_count;
6716 		}
6717 
6718 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6719 		if (ded_count) {
6720 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6721 				"DED %d\n", i, vml2_mems[i], ded_count);
6722 			err_data->ue_count += ded_count;
6723 		}
6724 	}
6725 
6726 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6727 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6728 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6729 
6730 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6731 						SEC_COUNT);
6732 		if (sec_count) {
6733 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6734 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6735 			err_data->ce_count += sec_count;
6736 		}
6737 
6738 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6739 						DED_COUNT);
6740 		if (ded_count) {
6741 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6742 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6743 			err_data->ue_count += ded_count;
6744 		}
6745 	}
6746 
6747 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6748 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6749 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6750 
6751 		sec_count = (data & 0x00006000L) >> 0xd;
6752 		if (sec_count) {
6753 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6754 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6755 				sec_count);
6756 			err_data->ce_count += sec_count;
6757 		}
6758 	}
6759 
6760 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6761 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6762 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6763 
6764 		sec_count = (data & 0x00006000L) >> 0xd;
6765 		if (sec_count) {
6766 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6767 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6768 				sec_count);
6769 			err_data->ce_count += sec_count;
6770 		}
6771 
6772 		ded_count = (data & 0x00018000L) >> 0xf;
6773 		if (ded_count) {
6774 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6775 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6776 				ded_count);
6777 			err_data->ue_count += ded_count;
6778 		}
6779 	}
6780 
6781 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6782 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6783 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6784 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6785 
6786 	return 0;
6787 }
6788 
6789 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6790 	const struct soc15_reg_entry *reg,
6791 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6792 	uint32_t *sec_count, uint32_t *ded_count)
6793 {
6794 	uint32_t i;
6795 	uint32_t sec_cnt, ded_cnt;
6796 
6797 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6798 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6799 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6800 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6801 			continue;
6802 
6803 		sec_cnt = (value &
6804 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6805 				gfx_v9_0_ras_fields[i].sec_count_shift;
6806 		if (sec_cnt) {
6807 			dev_info(adev->dev, "GFX SubBlock %s, "
6808 				"Instance[%d][%d], SEC %d\n",
6809 				gfx_v9_0_ras_fields[i].name,
6810 				se_id, inst_id,
6811 				sec_cnt);
6812 			*sec_count += sec_cnt;
6813 		}
6814 
6815 		ded_cnt = (value &
6816 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6817 				gfx_v9_0_ras_fields[i].ded_count_shift;
6818 		if (ded_cnt) {
6819 			dev_info(adev->dev, "GFX SubBlock %s, "
6820 				"Instance[%d][%d], DED %d\n",
6821 				gfx_v9_0_ras_fields[i].name,
6822 				se_id, inst_id,
6823 				ded_cnt);
6824 			*ded_count += ded_cnt;
6825 		}
6826 	}
6827 
6828 	return 0;
6829 }
6830 
6831 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6832 {
6833 	int i, j, k;
6834 
6835 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6836 		return;
6837 
6838 	/* read back registers to clear the counters */
6839 	mutex_lock(&adev->grbm_idx_mutex);
6840 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6841 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6842 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6843 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6844 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6845 			}
6846 		}
6847 	}
6848 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6849 	mutex_unlock(&adev->grbm_idx_mutex);
6850 
6851 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6852 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6853 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6854 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6855 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6856 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6857 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6858 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6859 
6860 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6861 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6862 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6863 	}
6864 
6865 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6866 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6867 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6868 	}
6869 
6870 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6871 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6872 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6873 	}
6874 
6875 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6876 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6877 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6878 	}
6879 
6880 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6881 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6882 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6883 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6884 }
6885 
6886 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6887 					  void *ras_error_status)
6888 {
6889 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6890 	uint32_t sec_count = 0, ded_count = 0;
6891 	uint32_t i, j, k;
6892 	uint32_t reg_value;
6893 
6894 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6895 		return;
6896 
6897 	err_data->ue_count = 0;
6898 	err_data->ce_count = 0;
6899 
6900 	mutex_lock(&adev->grbm_idx_mutex);
6901 
6902 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6903 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6904 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6905 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6906 				reg_value =
6907 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6908 				if (reg_value)
6909 					gfx_v9_0_ras_error_count(adev,
6910 						&gfx_v9_0_edc_counter_regs[i],
6911 						j, k, reg_value,
6912 						&sec_count, &ded_count);
6913 			}
6914 		}
6915 	}
6916 
6917 	err_data->ce_count += sec_count;
6918 	err_data->ue_count += ded_count;
6919 
6920 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6921 	mutex_unlock(&adev->grbm_idx_mutex);
6922 
6923 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6924 }
6925 
6926 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6927 {
6928 	const unsigned int cp_coher_cntl =
6929 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6930 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6931 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6932 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6933 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6934 
6935 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6936 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6937 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6938 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6939 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6940 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6941 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6942 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6943 }
6944 
6945 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6946 					uint32_t pipe, bool enable)
6947 {
6948 	struct amdgpu_device *adev = ring->adev;
6949 	uint32_t val;
6950 	uint32_t wcl_cs_reg;
6951 
6952 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6953 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6954 
6955 	switch (pipe) {
6956 	case 0:
6957 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6958 		break;
6959 	case 1:
6960 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6961 		break;
6962 	case 2:
6963 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6964 		break;
6965 	case 3:
6966 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6967 		break;
6968 	default:
6969 		DRM_DEBUG("invalid pipe %d\n", pipe);
6970 		return;
6971 	}
6972 
6973 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6974 
6975 }
6976 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6977 {
6978 	struct amdgpu_device *adev = ring->adev;
6979 	uint32_t val;
6980 	int i;
6981 
6982 
6983 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6984 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6985 	 * around 25% of gpu resources.
6986 	 */
6987 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6988 	amdgpu_ring_emit_wreg(ring,
6989 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6990 			      val);
6991 
6992 	/* Restrict waves for normal/low priority compute queues as well
6993 	 * to get best QoS for high priority compute jobs.
6994 	 *
6995 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6996 	 */
6997 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6998 		if (i != ring->pipe)
6999 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7000 
7001 	}
7002 }
7003 
7004 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7005 {
7006 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7007 	uint32_t i, j, k, reg, index = 0;
7008 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7009 
7010 	if (!adev->gfx.ip_dump_core)
7011 		return;
7012 
7013 	for (i = 0; i < reg_count; i++)
7014 		drm_printf(p, "%-50s \t 0x%08x\n",
7015 			   gc_reg_list_9[i].reg_name,
7016 			   adev->gfx.ip_dump_core[i]);
7017 
7018 	/* print compute queue registers for all instances */
7019 	if (!adev->gfx.ip_dump_compute_queues)
7020 		return;
7021 
7022 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7023 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7024 		   adev->gfx.mec.num_mec,
7025 		   adev->gfx.mec.num_pipe_per_mec,
7026 		   adev->gfx.mec.num_queue_per_pipe);
7027 
7028 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7029 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7030 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7031 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7032 				for (reg = 0; reg < reg_count; reg++) {
7033 					drm_printf(p, "%-50s \t 0x%08x\n",
7034 						   gc_cp_reg_list_9[reg].reg_name,
7035 						   adev->gfx.ip_dump_compute_queues[index + reg]);
7036 				}
7037 				index += reg_count;
7038 			}
7039 		}
7040 	}
7041 
7042 }
7043 
7044 static void gfx_v9_ip_dump(void *handle)
7045 {
7046 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7047 	uint32_t i, j, k, reg, index = 0;
7048 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7049 
7050 	if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7051 		return;
7052 
7053 	amdgpu_gfx_off_ctrl(adev, false);
7054 	for (i = 0; i < reg_count; i++)
7055 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7056 	amdgpu_gfx_off_ctrl(adev, true);
7057 
7058 	/* dump compute queue registers for all instances */
7059 	if (!adev->gfx.ip_dump_compute_queues)
7060 		return;
7061 
7062 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7063 	amdgpu_gfx_off_ctrl(adev, false);
7064 	mutex_lock(&adev->srbm_mutex);
7065 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7066 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7067 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7068 				/* ME0 is for GFX so start from 1 for CP */
7069 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7070 
7071 				for (reg = 0; reg < reg_count; reg++) {
7072 					adev->gfx.ip_dump_compute_queues[index + reg] =
7073 						RREG32(SOC15_REG_ENTRY_OFFSET(
7074 							gc_cp_reg_list_9[reg]));
7075 				}
7076 				index += reg_count;
7077 			}
7078 		}
7079 	}
7080 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7081 	mutex_unlock(&adev->srbm_mutex);
7082 	amdgpu_gfx_off_ctrl(adev, true);
7083 
7084 }
7085 
7086 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7087 	.name = "gfx_v9_0",
7088 	.early_init = gfx_v9_0_early_init,
7089 	.late_init = gfx_v9_0_late_init,
7090 	.sw_init = gfx_v9_0_sw_init,
7091 	.sw_fini = gfx_v9_0_sw_fini,
7092 	.hw_init = gfx_v9_0_hw_init,
7093 	.hw_fini = gfx_v9_0_hw_fini,
7094 	.suspend = gfx_v9_0_suspend,
7095 	.resume = gfx_v9_0_resume,
7096 	.is_idle = gfx_v9_0_is_idle,
7097 	.wait_for_idle = gfx_v9_0_wait_for_idle,
7098 	.soft_reset = gfx_v9_0_soft_reset,
7099 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
7100 	.set_powergating_state = gfx_v9_0_set_powergating_state,
7101 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
7102 	.dump_ip_state = gfx_v9_ip_dump,
7103 	.print_ip_state = gfx_v9_ip_print,
7104 };
7105 
7106 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7107 	.type = AMDGPU_RING_TYPE_GFX,
7108 	.align_mask = 0xff,
7109 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7110 	.support_64bit_ptrs = true,
7111 	.secure_submission_supported = true,
7112 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7113 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7114 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7115 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7116 		5 +  /* COND_EXEC */
7117 		7 +  /* PIPELINE_SYNC */
7118 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7119 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7120 		2 + /* VM_FLUSH */
7121 		8 +  /* FENCE for VM_FLUSH */
7122 		20 + /* GDS switch */
7123 		4 + /* double SWITCH_BUFFER,
7124 		       the first COND_EXEC jump to the place just
7125 			   prior to this double SWITCH_BUFFER  */
7126 		5 + /* COND_EXEC */
7127 		7 +	 /*	HDP_flush */
7128 		4 +	 /*	VGT_flush */
7129 		14 + /*	CE_META */
7130 		31 + /*	DE_META */
7131 		3 + /* CNTX_CTRL */
7132 		5 + /* HDP_INVL */
7133 		8 + 8 + /* FENCE x2 */
7134 		2 + /* SWITCH_BUFFER */
7135 		7, /* gfx_v9_0_emit_mem_sync */
7136 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7137 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7138 	.emit_fence = gfx_v9_0_ring_emit_fence,
7139 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7140 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7141 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7142 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7143 	.test_ring = gfx_v9_0_ring_test_ring,
7144 	.insert_nop = amdgpu_ring_insert_nop,
7145 	.pad_ib = amdgpu_ring_generic_pad_ib,
7146 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7147 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7148 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7149 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
7150 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7151 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7152 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7153 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7154 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7155 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7156 };
7157 
7158 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7159 	.type = AMDGPU_RING_TYPE_GFX,
7160 	.align_mask = 0xff,
7161 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7162 	.support_64bit_ptrs = true,
7163 	.secure_submission_supported = true,
7164 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7165 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7166 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7167 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7168 		5 +  /* COND_EXEC */
7169 		7 +  /* PIPELINE_SYNC */
7170 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7171 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7172 		2 + /* VM_FLUSH */
7173 		8 +  /* FENCE for VM_FLUSH */
7174 		20 + /* GDS switch */
7175 		4 + /* double SWITCH_BUFFER,
7176 		     * the first COND_EXEC jump to the place just
7177 		     * prior to this double SWITCH_BUFFER
7178 		     */
7179 		5 + /* COND_EXEC */
7180 		7 +	 /*	HDP_flush */
7181 		4 +	 /*	VGT_flush */
7182 		14 + /*	CE_META */
7183 		31 + /*	DE_META */
7184 		3 + /* CNTX_CTRL */
7185 		5 + /* HDP_INVL */
7186 		8 + 8 + /* FENCE x2 */
7187 		2 + /* SWITCH_BUFFER */
7188 		7, /* gfx_v9_0_emit_mem_sync */
7189 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7190 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7191 	.emit_fence = gfx_v9_0_ring_emit_fence,
7192 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7193 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7194 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7195 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7196 	.test_ring = gfx_v9_0_ring_test_ring,
7197 	.test_ib = gfx_v9_0_ring_test_ib,
7198 	.insert_nop = amdgpu_sw_ring_insert_nop,
7199 	.pad_ib = amdgpu_ring_generic_pad_ib,
7200 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7201 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7202 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7203 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7204 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7205 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7206 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7207 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7208 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7209 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
7210 	.patch_de = gfx_v9_0_ring_patch_de_meta,
7211 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
7212 };
7213 
7214 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7215 	.type = AMDGPU_RING_TYPE_COMPUTE,
7216 	.align_mask = 0xff,
7217 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7218 	.support_64bit_ptrs = true,
7219 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7220 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7221 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7222 	.emit_frame_size =
7223 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7224 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7225 		5 + /* hdp invalidate */
7226 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7227 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7228 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7229 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7230 		7 + /* gfx_v9_0_emit_mem_sync */
7231 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7232 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7233 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7234 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7235 	.emit_fence = gfx_v9_0_ring_emit_fence,
7236 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7237 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7238 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7239 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7240 	.test_ring = gfx_v9_0_ring_test_ring,
7241 	.test_ib = gfx_v9_0_ring_test_ib,
7242 	.insert_nop = amdgpu_ring_insert_nop,
7243 	.pad_ib = amdgpu_ring_generic_pad_ib,
7244 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7245 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7246 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7247 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7248 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7249 };
7250 
7251 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7252 	.type = AMDGPU_RING_TYPE_KIQ,
7253 	.align_mask = 0xff,
7254 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7255 	.support_64bit_ptrs = true,
7256 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7257 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7258 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7259 	.emit_frame_size =
7260 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7261 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7262 		5 + /* hdp invalidate */
7263 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7264 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7265 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7266 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7267 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7268 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7269 	.test_ring = gfx_v9_0_ring_test_ring,
7270 	.insert_nop = amdgpu_ring_insert_nop,
7271 	.pad_ib = amdgpu_ring_generic_pad_ib,
7272 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7273 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7274 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7275 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7276 };
7277 
7278 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7279 {
7280 	int i;
7281 
7282 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7283 
7284 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7285 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7286 
7287 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7288 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7289 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7290 	}
7291 
7292 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7293 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7294 }
7295 
7296 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7297 	.set = gfx_v9_0_set_eop_interrupt_state,
7298 	.process = gfx_v9_0_eop_irq,
7299 };
7300 
7301 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7302 	.set = gfx_v9_0_set_priv_reg_fault_state,
7303 	.process = gfx_v9_0_priv_reg_irq,
7304 };
7305 
7306 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7307 	.set = gfx_v9_0_set_priv_inst_fault_state,
7308 	.process = gfx_v9_0_priv_inst_irq,
7309 };
7310 
7311 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7312 	.set = gfx_v9_0_set_cp_ecc_error_state,
7313 	.process = amdgpu_gfx_cp_ecc_error_irq,
7314 };
7315 
7316 
7317 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7318 {
7319 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7320 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7321 
7322 	adev->gfx.priv_reg_irq.num_types = 1;
7323 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7324 
7325 	adev->gfx.priv_inst_irq.num_types = 1;
7326 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7327 
7328 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7329 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7330 }
7331 
7332 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7333 {
7334 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7335 	case IP_VERSION(9, 0, 1):
7336 	case IP_VERSION(9, 2, 1):
7337 	case IP_VERSION(9, 4, 0):
7338 	case IP_VERSION(9, 2, 2):
7339 	case IP_VERSION(9, 1, 0):
7340 	case IP_VERSION(9, 4, 1):
7341 	case IP_VERSION(9, 3, 0):
7342 	case IP_VERSION(9, 4, 2):
7343 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7344 		break;
7345 	default:
7346 		break;
7347 	}
7348 }
7349 
7350 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7351 {
7352 	/* init asci gds info */
7353 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7354 	case IP_VERSION(9, 0, 1):
7355 	case IP_VERSION(9, 2, 1):
7356 	case IP_VERSION(9, 4, 0):
7357 		adev->gds.gds_size = 0x10000;
7358 		break;
7359 	case IP_VERSION(9, 2, 2):
7360 	case IP_VERSION(9, 1, 0):
7361 	case IP_VERSION(9, 4, 1):
7362 		adev->gds.gds_size = 0x1000;
7363 		break;
7364 	case IP_VERSION(9, 4, 2):
7365 		/* aldebaran removed all the GDS internal memory,
7366 		 * only support GWS opcode in kernel, like barrier
7367 		 * semaphore.etc */
7368 		adev->gds.gds_size = 0;
7369 		break;
7370 	default:
7371 		adev->gds.gds_size = 0x10000;
7372 		break;
7373 	}
7374 
7375 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7376 	case IP_VERSION(9, 0, 1):
7377 	case IP_VERSION(9, 4, 0):
7378 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7379 		break;
7380 	case IP_VERSION(9, 2, 1):
7381 		adev->gds.gds_compute_max_wave_id = 0x27f;
7382 		break;
7383 	case IP_VERSION(9, 2, 2):
7384 	case IP_VERSION(9, 1, 0):
7385 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7386 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7387 		else
7388 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7389 		break;
7390 	case IP_VERSION(9, 4, 1):
7391 		adev->gds.gds_compute_max_wave_id = 0xfff;
7392 		break;
7393 	case IP_VERSION(9, 4, 2):
7394 		/* deprecated for Aldebaran, no usage at all */
7395 		adev->gds.gds_compute_max_wave_id = 0;
7396 		break;
7397 	default:
7398 		/* this really depends on the chip */
7399 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7400 		break;
7401 	}
7402 
7403 	adev->gds.gws_size = 64;
7404 	adev->gds.oa_size = 16;
7405 }
7406 
7407 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7408 						 u32 bitmap)
7409 {
7410 	u32 data;
7411 
7412 	if (!bitmap)
7413 		return;
7414 
7415 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7416 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7417 
7418 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7419 }
7420 
7421 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7422 {
7423 	u32 data, mask;
7424 
7425 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7426 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7427 
7428 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7429 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7430 
7431 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7432 
7433 	return (~data) & mask;
7434 }
7435 
7436 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7437 				 struct amdgpu_cu_info *cu_info)
7438 {
7439 	int i, j, k, counter, active_cu_number = 0;
7440 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7441 	unsigned disable_masks[4 * 4];
7442 
7443 	if (!adev || !cu_info)
7444 		return -EINVAL;
7445 
7446 	/*
7447 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7448 	 */
7449 	if (adev->gfx.config.max_shader_engines *
7450 		adev->gfx.config.max_sh_per_se > 16)
7451 		return -EINVAL;
7452 
7453 	amdgpu_gfx_parse_disable_cu(disable_masks,
7454 				    adev->gfx.config.max_shader_engines,
7455 				    adev->gfx.config.max_sh_per_se);
7456 
7457 	mutex_lock(&adev->grbm_idx_mutex);
7458 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7459 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7460 			mask = 1;
7461 			ao_bitmap = 0;
7462 			counter = 0;
7463 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7464 			gfx_v9_0_set_user_cu_inactive_bitmap(
7465 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7466 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7467 
7468 			/*
7469 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7470 			 * 4x4 size array, and it's usually suitable for Vega
7471 			 * ASICs which has 4*2 SE/SH layout.
7472 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7473 			 * To mostly reduce the impact, we make it compatible
7474 			 * with current bitmap array as below:
7475 			 *    SE4,SH0 --> bitmap[0][1]
7476 			 *    SE5,SH0 --> bitmap[1][1]
7477 			 *    SE6,SH0 --> bitmap[2][1]
7478 			 *    SE7,SH0 --> bitmap[3][1]
7479 			 */
7480 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7481 
7482 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7483 				if (bitmap & mask) {
7484 					if (counter < adev->gfx.config.max_cu_per_sh)
7485 						ao_bitmap |= mask;
7486 					counter ++;
7487 				}
7488 				mask <<= 1;
7489 			}
7490 			active_cu_number += counter;
7491 			if (i < 2 && j < 2)
7492 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7493 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7494 		}
7495 	}
7496 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7497 	mutex_unlock(&adev->grbm_idx_mutex);
7498 
7499 	cu_info->number = active_cu_number;
7500 	cu_info->ao_cu_mask = ao_cu_mask;
7501 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7502 
7503 	return 0;
7504 }
7505 
7506 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7507 {
7508 	.type = AMD_IP_BLOCK_TYPE_GFX,
7509 	.major = 9,
7510 	.minor = 0,
7511 	.rev = 0,
7512 	.funcs = &gfx_v9_0_ip_funcs,
7513 };
7514