xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 0db66572747a789922e8137904e8b4c39d9b94f6)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45 
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47 
48 #include "amdgpu_ras.h"
49 
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_4_2.h"
54 
55 #include "asic_reg/pwr/pwr_10_0_offset.h"
56 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
57 #include "asic_reg/gc/gc_9_0_default.h"
58 
59 #define GFX9_NUM_GFX_RINGS     1
60 #define GFX9_NUM_SW_GFX_RINGS  2
61 #define GFX9_MEC_HPD_SIZE 4096
62 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
63 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
64 
65 #define mmGCEA_PROBE_MAP                        0x070c
66 #define mmGCEA_PROBE_MAP_BASE_IDX               0
67 
68 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
74 
75 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
81 
82 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
88 
89 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
90 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
91 MODULE_FIRMWARE("amdgpu/raven_me.bin");
92 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
94 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
95 
96 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
103 
104 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
110 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
111 
112 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
113 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
114 
115 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
127 
128 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
133 
134 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
135 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
136 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
137 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
138 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
139 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
140 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
141 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
142 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
143 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
144 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
145 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
146 
147 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
149 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
151 
152 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
153 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
154 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
155 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
156 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
157 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
158 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
159 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
160 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
161 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
162 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
163 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
164 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
165 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
166 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
167 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
168 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
169 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
170 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
171 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
172 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
173 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
174 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
175 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
177 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
178 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
179 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
180 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
181 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
182 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
183 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
184 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
185 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
186 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
187 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
188 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
189 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
190 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
191 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
192 	SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
193 	SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
194 	SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
195 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
196 	SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
197 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
198 	SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
199 	SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
200 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
201 	SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
202 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
203 	SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
204 	SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
205 	SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
206 	SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
207 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
208 	SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
209 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
210 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
211 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
212 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
213 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
215 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
217 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
218 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
219 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
220 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
221 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
222 	SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
223 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
224 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
225 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
226 	SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
227 	/* cp header registers */
228 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
229 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
230 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
231 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
232 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
233 	/* SE status registers */
234 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
235 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
236 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
237 	SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
238 };
239 
240 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
241 	/* compute queue registers */
242 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
243 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
244 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
245 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
246 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
247 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
248 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
249 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
250 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
251 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
252 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
253 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
254 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
255 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
256 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
257 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
258 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
259 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
260 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
261 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
262 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
263 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
264 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
265 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
266 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
267 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
268 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
269 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
270 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
271 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
272 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
273 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
274 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
275 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
276 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
277 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
278 	SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
279 };
280 
281 enum ta_ras_gfx_subblock {
282 	/*CPC*/
283 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
284 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
285 	TA_RAS_BLOCK__GFX_CPC_UCODE,
286 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
287 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
288 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
289 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
290 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
291 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
292 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 	/* CPF*/
294 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
295 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
297 	TA_RAS_BLOCK__GFX_CPF_TAG,
298 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
299 	/* CPG*/
300 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
301 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
303 	TA_RAS_BLOCK__GFX_CPG_TAG,
304 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
305 	/* GDS*/
306 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
307 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
309 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
310 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
311 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
312 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 	/* SPI*/
314 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
315 	/* SQ*/
316 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
317 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
319 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
320 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
321 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
322 	/* SQC (3 ranges)*/
323 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
324 	/* SQC range 0*/
325 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
326 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
327 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
328 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
329 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
330 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
331 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
332 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
333 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
334 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
335 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
336 	/* SQC range 1*/
337 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
338 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
339 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
340 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
341 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
342 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
343 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
344 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
345 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
346 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
347 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
348 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
349 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
350 	/* SQC range 2*/
351 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
352 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
353 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
354 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
355 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
356 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
357 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
358 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
359 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
360 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
361 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
362 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
363 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
364 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
365 	/* TA*/
366 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
367 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
369 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
370 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
371 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
372 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 	/* TCA*/
374 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
375 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
377 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 	/* TCC (5 sub-ranges)*/
379 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
380 	/* TCC range 0*/
381 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
382 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
383 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
384 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
385 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
386 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
387 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
388 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
389 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
390 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 	/* TCC range 1*/
392 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
393 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
395 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
396 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
397 	/* TCC range 2*/
398 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
399 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
401 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
402 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
403 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
404 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
405 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
406 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
407 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
408 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
409 	/* TCC range 3*/
410 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
411 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
413 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
414 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
415 	/* TCC range 4*/
416 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
417 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
418 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
419 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
420 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
421 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
422 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
423 	/* TCI*/
424 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
425 	/* TCP*/
426 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
427 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
429 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
430 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
431 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
432 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
433 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
434 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 	/* TD*/
436 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
437 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
439 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
440 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 	/* EA (3 sub-ranges)*/
442 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
443 	/* EA range 0*/
444 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
445 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
446 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
447 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
448 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
449 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
450 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
451 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
452 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
453 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 	/* EA range 1*/
455 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
456 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
458 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
459 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
460 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
461 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
462 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
463 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 	/* EA range 2*/
465 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
466 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
468 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
469 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
470 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
472 	/* UTC VM L2 bank*/
473 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
474 	/* UTC VM walker*/
475 	TA_RAS_BLOCK__UTC_VML2_WALKER,
476 	/* UTC ATC L2 2MB cache*/
477 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
478 	/* UTC ATC L2 4KB cache*/
479 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
480 	TA_RAS_BLOCK__GFX_MAX
481 };
482 
483 struct ras_gfx_subblock {
484 	unsigned char *name;
485 	int ta_subblock;
486 	int hw_supported_error_type;
487 	int sw_supported_error_type;
488 };
489 
490 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
491 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
492 		#subblock,                                                     \
493 		TA_RAS_BLOCK__##subblock,                                      \
494 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
495 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
496 	}
497 
498 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
499 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
500 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
501 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
502 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
503 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
505 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
509 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
510 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
511 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
512 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
513 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
514 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
515 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
516 			     0),
517 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
518 			     0),
519 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
520 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
521 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
522 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
523 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
524 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
525 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
526 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
527 			     0, 0),
528 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
529 			     0),
530 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
531 			     0, 0),
532 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
533 			     0),
534 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
535 			     0, 0),
536 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
537 			     0),
538 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
539 			     1),
540 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
541 			     0, 0, 0),
542 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
543 			     0),
544 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
545 			     0),
546 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
547 			     0),
548 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
549 			     0),
550 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
551 			     0),
552 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
553 			     0, 0),
554 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
555 			     0),
556 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
557 			     0),
558 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
559 			     0, 0, 0),
560 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
561 			     0),
562 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
563 			     0),
564 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
565 			     0),
566 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
567 			     0),
568 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
569 			     0),
570 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
571 			     0, 0),
572 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
573 			     0),
574 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
575 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
576 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
580 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
581 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
582 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
583 			     1),
584 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
585 			     1),
586 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
587 			     1),
588 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
589 			     0),
590 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
591 			     0),
592 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
593 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
595 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
596 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
597 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
598 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
599 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
600 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
601 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
602 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
603 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
604 			     0),
605 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
606 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
607 			     0),
608 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
609 			     0, 0),
610 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
611 			     0),
612 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
613 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
614 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
615 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
616 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
617 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
618 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
619 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
620 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
621 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
622 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
623 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
624 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
625 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
632 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
643 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
644 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
645 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
646 };
647 
648 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
649 {
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
668 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
670 };
671 
672 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
673 {
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
682 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
683 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
684 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
685 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
692 };
693 
694 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
695 {
696 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
697 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
698 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
699 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
700 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
701 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
702 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
703 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
704 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
705 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
706 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
707 };
708 
709 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
710 {
711 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
712 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
713 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
714 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
715 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
716 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
717 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
718 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
719 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
720 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
721 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
722 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
723 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
724 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
725 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
726 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
727 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
728 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
729 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
730 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
731 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
732 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
733 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
734 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
735 };
736 
737 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
738 {
739 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
740 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
741 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
742 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
743 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
744 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
745 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
746 };
747 
748 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
749 {
750 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
751 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
752 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
753 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
754 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
755 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
756 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
757 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
758 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
759 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
760 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
761 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
762 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
763 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
764 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
766 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
767 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
768 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
769 };
770 
771 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
772 {
773 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
774 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
775 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
776 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
777 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
778 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
779 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
780 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
781 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
782 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
783 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
784 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
785 };
786 
787 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
788 {
789 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
790 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
791 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
792 };
793 
794 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
795 {
796 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
797 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
798 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
799 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
800 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
801 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
802 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
803 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
804 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
805 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
806 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
807 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
808 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
809 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
810 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
811 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
812 };
813 
814 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
815 {
816 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
817 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
818 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
819 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
820 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
821 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
822 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
823 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
824 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
825 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
826 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
827 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
828 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
829 };
830 
831 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
832 {
833 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
834 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
835 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
836 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
837 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
838 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
839 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
840 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
841 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
842 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
843 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
844 };
845 
846 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
847 	{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
848 	{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
849 };
850 
851 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
852 {
853 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
854 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 };
862 
863 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
864 {
865 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
866 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 };
874 
875 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
876 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
877 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
878 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
879 
880 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
881 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
884 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
885 				struct amdgpu_cu_info *cu_info);
886 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
887 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
888 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
889 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
890 					  void *ras_error_status);
891 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
892 				     void *inject_if, uint32_t instance_mask);
893 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
894 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
895 					      unsigned int vmid);
896 
897 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
898 				uint64_t queue_mask)
899 {
900 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
901 	amdgpu_ring_write(kiq_ring,
902 		PACKET3_SET_RESOURCES_VMID_MASK(0) |
903 		/* vmid_mask:0* queue_type:0 (KIQ) */
904 		PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
905 	amdgpu_ring_write(kiq_ring,
906 			lower_32_bits(queue_mask));	/* queue mask lo */
907 	amdgpu_ring_write(kiq_ring,
908 			upper_32_bits(queue_mask));	/* queue mask hi */
909 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
910 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
911 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
912 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
913 }
914 
915 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
916 				 struct amdgpu_ring *ring)
917 {
918 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
919 	uint64_t wptr_addr = ring->wptr_gpu_addr;
920 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
921 
922 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
923 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
924 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
925 			 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
926 			 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
927 			 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
928 			 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
929 			 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
930 			 /*queue_type: normal compute queue */
931 			 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
932 			 /* alloc format: all_on_one_pipe */
933 			 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
934 			 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
935 			 /* num_queues: must be 1 */
936 			 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
937 	amdgpu_ring_write(kiq_ring,
938 			PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
939 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
940 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
941 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
942 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
943 }
944 
945 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
946 				   struct amdgpu_ring *ring,
947 				   enum amdgpu_unmap_queues_action action,
948 				   u64 gpu_addr, u64 seq)
949 {
950 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
951 
952 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
953 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
954 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
955 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
956 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
957 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
958 	amdgpu_ring_write(kiq_ring,
959 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
960 
961 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
962 		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
963 		amdgpu_ring_write(kiq_ring, 0);
964 		amdgpu_ring_write(kiq_ring, 0);
965 
966 	} else {
967 		amdgpu_ring_write(kiq_ring, 0);
968 		amdgpu_ring_write(kiq_ring, 0);
969 		amdgpu_ring_write(kiq_ring, 0);
970 	}
971 }
972 
973 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
974 				   struct amdgpu_ring *ring,
975 				   u64 addr,
976 				   u64 seq)
977 {
978 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
979 
980 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
981 	amdgpu_ring_write(kiq_ring,
982 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
983 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
984 			  PACKET3_QUERY_STATUS_COMMAND(2));
985 	/* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
986 	amdgpu_ring_write(kiq_ring,
987 			PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
988 			PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
989 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
990 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
991 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
992 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
993 }
994 
995 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
996 				uint16_t pasid, uint32_t flush_type,
997 				bool all_hub)
998 {
999 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1000 	amdgpu_ring_write(kiq_ring,
1001 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1002 			PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1003 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1004 			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1005 }
1006 
1007 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1008 	.kiq_set_resources = gfx_v9_0_kiq_set_resources,
1009 	.kiq_map_queues = gfx_v9_0_kiq_map_queues,
1010 	.kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1011 	.kiq_query_status = gfx_v9_0_kiq_query_status,
1012 	.kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1013 	.set_resources_size = 8,
1014 	.map_queues_size = 7,
1015 	.unmap_queues_size = 6,
1016 	.query_status_size = 7,
1017 	.invalidate_tlbs_size = 2,
1018 };
1019 
1020 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1021 {
1022 	adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1023 }
1024 
1025 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1026 {
1027 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1028 	case IP_VERSION(9, 0, 1):
1029 		soc15_program_register_sequence(adev,
1030 						golden_settings_gc_9_0,
1031 						ARRAY_SIZE(golden_settings_gc_9_0));
1032 		soc15_program_register_sequence(adev,
1033 						golden_settings_gc_9_0_vg10,
1034 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1035 		break;
1036 	case IP_VERSION(9, 2, 1):
1037 		soc15_program_register_sequence(adev,
1038 						golden_settings_gc_9_2_1,
1039 						ARRAY_SIZE(golden_settings_gc_9_2_1));
1040 		soc15_program_register_sequence(adev,
1041 						golden_settings_gc_9_2_1_vg12,
1042 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1043 		break;
1044 	case IP_VERSION(9, 4, 0):
1045 		soc15_program_register_sequence(adev,
1046 						golden_settings_gc_9_0,
1047 						ARRAY_SIZE(golden_settings_gc_9_0));
1048 		soc15_program_register_sequence(adev,
1049 						golden_settings_gc_9_0_vg20,
1050 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1051 		break;
1052 	case IP_VERSION(9, 4, 1):
1053 		soc15_program_register_sequence(adev,
1054 						golden_settings_gc_9_4_1_arct,
1055 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1056 		break;
1057 	case IP_VERSION(9, 2, 2):
1058 	case IP_VERSION(9, 1, 0):
1059 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1060 						ARRAY_SIZE(golden_settings_gc_9_1));
1061 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1062 			soc15_program_register_sequence(adev,
1063 							golden_settings_gc_9_1_rv2,
1064 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1065 		else
1066 			soc15_program_register_sequence(adev,
1067 							golden_settings_gc_9_1_rv1,
1068 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1069 		break;
1070 	 case IP_VERSION(9, 3, 0):
1071 		soc15_program_register_sequence(adev,
1072 						golden_settings_gc_9_1_rn,
1073 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
1074 		return; /* for renoir, don't need common goldensetting */
1075 	case IP_VERSION(9, 4, 2):
1076 		gfx_v9_4_2_init_golden_registers(adev,
1077 						 adev->smuio.funcs->get_die_id(adev));
1078 		break;
1079 	default:
1080 		break;
1081 	}
1082 
1083 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1084 	    (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1085 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1086 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1087 }
1088 
1089 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1090 				       bool wc, uint32_t reg, uint32_t val)
1091 {
1092 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1093 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1094 				WRITE_DATA_DST_SEL(0) |
1095 				(wc ? WR_CONFIRM : 0));
1096 	amdgpu_ring_write(ring, reg);
1097 	amdgpu_ring_write(ring, 0);
1098 	amdgpu_ring_write(ring, val);
1099 }
1100 
1101 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1102 				  int mem_space, int opt, uint32_t addr0,
1103 				  uint32_t addr1, uint32_t ref, uint32_t mask,
1104 				  uint32_t inv)
1105 {
1106 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1107 	amdgpu_ring_write(ring,
1108 				 /* memory (1) or register (0) */
1109 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1110 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1111 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
1112 				 WAIT_REG_MEM_ENGINE(eng_sel)));
1113 
1114 	if (mem_space)
1115 		BUG_ON(addr0 & 0x3); /* Dword align */
1116 	amdgpu_ring_write(ring, addr0);
1117 	amdgpu_ring_write(ring, addr1);
1118 	amdgpu_ring_write(ring, ref);
1119 	amdgpu_ring_write(ring, mask);
1120 	amdgpu_ring_write(ring, inv); /* poll interval */
1121 }
1122 
1123 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1124 {
1125 	struct amdgpu_device *adev = ring->adev;
1126 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1127 	uint32_t tmp = 0;
1128 	unsigned i;
1129 	int r;
1130 
1131 	WREG32(scratch, 0xCAFEDEAD);
1132 	r = amdgpu_ring_alloc(ring, 3);
1133 	if (r)
1134 		return r;
1135 
1136 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1137 	amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1138 	amdgpu_ring_write(ring, 0xDEADBEEF);
1139 	amdgpu_ring_commit(ring);
1140 
1141 	for (i = 0; i < adev->usec_timeout; i++) {
1142 		tmp = RREG32(scratch);
1143 		if (tmp == 0xDEADBEEF)
1144 			break;
1145 		udelay(1);
1146 	}
1147 
1148 	if (i >= adev->usec_timeout)
1149 		r = -ETIMEDOUT;
1150 	return r;
1151 }
1152 
1153 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1154 {
1155 	struct amdgpu_device *adev = ring->adev;
1156 	struct amdgpu_ib ib;
1157 	struct dma_fence *f = NULL;
1158 
1159 	unsigned index;
1160 	uint64_t gpu_addr;
1161 	uint32_t tmp;
1162 	long r;
1163 
1164 	r = amdgpu_device_wb_get(adev, &index);
1165 	if (r)
1166 		return r;
1167 
1168 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1169 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1170 	memset(&ib, 0, sizeof(ib));
1171 
1172 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1173 	if (r)
1174 		goto err1;
1175 
1176 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1177 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1178 	ib.ptr[2] = lower_32_bits(gpu_addr);
1179 	ib.ptr[3] = upper_32_bits(gpu_addr);
1180 	ib.ptr[4] = 0xDEADBEEF;
1181 	ib.length_dw = 5;
1182 
1183 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1184 	if (r)
1185 		goto err2;
1186 
1187 	r = dma_fence_wait_timeout(f, false, timeout);
1188 	if (r == 0) {
1189 		r = -ETIMEDOUT;
1190 		goto err2;
1191 	} else if (r < 0) {
1192 		goto err2;
1193 	}
1194 
1195 	tmp = adev->wb.wb[index];
1196 	if (tmp == 0xDEADBEEF)
1197 		r = 0;
1198 	else
1199 		r = -EINVAL;
1200 
1201 err2:
1202 	amdgpu_ib_free(adev, &ib, NULL);
1203 	dma_fence_put(f);
1204 err1:
1205 	amdgpu_device_wb_free(adev, index);
1206 	return r;
1207 }
1208 
1209 
1210 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1211 {
1212 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
1213 	amdgpu_ucode_release(&adev->gfx.me_fw);
1214 	amdgpu_ucode_release(&adev->gfx.ce_fw);
1215 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
1216 	amdgpu_ucode_release(&adev->gfx.mec_fw);
1217 	amdgpu_ucode_release(&adev->gfx.mec2_fw);
1218 
1219 	kfree(adev->gfx.rlc.register_list_format);
1220 }
1221 
1222 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1223 {
1224 	adev->gfx.me_fw_write_wait = false;
1225 	adev->gfx.mec_fw_write_wait = false;
1226 
1227 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1228 	    ((adev->gfx.mec_fw_version < 0x000001a5) ||
1229 	     (adev->gfx.mec_feature_version < 46) ||
1230 	     (adev->gfx.pfp_fw_version < 0x000000b7) ||
1231 	     (adev->gfx.pfp_feature_version < 46)))
1232 		DRM_WARN_ONCE("CP firmware version too old, please update!");
1233 
1234 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1235 	case IP_VERSION(9, 0, 1):
1236 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1237 		    (adev->gfx.me_feature_version >= 42) &&
1238 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1239 		    (adev->gfx.pfp_feature_version >= 42))
1240 			adev->gfx.me_fw_write_wait = true;
1241 
1242 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1243 		    (adev->gfx.mec_feature_version >= 42))
1244 			adev->gfx.mec_fw_write_wait = true;
1245 		break;
1246 	case IP_VERSION(9, 2, 1):
1247 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1248 		    (adev->gfx.me_feature_version >= 44) &&
1249 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1250 		    (adev->gfx.pfp_feature_version >= 44))
1251 			adev->gfx.me_fw_write_wait = true;
1252 
1253 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1254 		    (adev->gfx.mec_feature_version >= 44))
1255 			adev->gfx.mec_fw_write_wait = true;
1256 		break;
1257 	case IP_VERSION(9, 4, 0):
1258 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1259 		    (adev->gfx.me_feature_version >= 44) &&
1260 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1261 		    (adev->gfx.pfp_feature_version >= 44))
1262 			adev->gfx.me_fw_write_wait = true;
1263 
1264 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1265 		    (adev->gfx.mec_feature_version >= 44))
1266 			adev->gfx.mec_fw_write_wait = true;
1267 		break;
1268 	case IP_VERSION(9, 1, 0):
1269 	case IP_VERSION(9, 2, 2):
1270 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1271 		    (adev->gfx.me_feature_version >= 42) &&
1272 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1273 		    (adev->gfx.pfp_feature_version >= 42))
1274 			adev->gfx.me_fw_write_wait = true;
1275 
1276 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1277 		    (adev->gfx.mec_feature_version >= 42))
1278 			adev->gfx.mec_fw_write_wait = true;
1279 		break;
1280 	default:
1281 		adev->gfx.me_fw_write_wait = true;
1282 		adev->gfx.mec_fw_write_wait = true;
1283 		break;
1284 	}
1285 }
1286 
1287 struct amdgpu_gfxoff_quirk {
1288 	u16 chip_vendor;
1289 	u16 chip_device;
1290 	u16 subsys_vendor;
1291 	u16 subsys_device;
1292 	u8 revision;
1293 };
1294 
1295 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1296 	/* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1297 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1298 	/* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1299 	{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1300 	/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1301 	{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1302 	/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1303 	{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1304 	{ 0, 0, 0, 0, 0 },
1305 };
1306 
1307 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1308 {
1309 	const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1310 
1311 	while (p && p->chip_device != 0) {
1312 		if (pdev->vendor == p->chip_vendor &&
1313 		    pdev->device == p->chip_device &&
1314 		    pdev->subsystem_vendor == p->subsys_vendor &&
1315 		    pdev->subsystem_device == p->subsys_device &&
1316 		    pdev->revision == p->revision) {
1317 			return true;
1318 		}
1319 		++p;
1320 	}
1321 	return false;
1322 }
1323 
1324 static bool is_raven_kicker(struct amdgpu_device *adev)
1325 {
1326 	if (adev->pm.fw_version >= 0x41e2b)
1327 		return true;
1328 	else
1329 		return false;
1330 }
1331 
1332 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1333 {
1334 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1335 	    (adev->gfx.me_fw_version >= 0x000000a5) &&
1336 	    (adev->gfx.me_feature_version >= 52))
1337 		return true;
1338 	else
1339 		return false;
1340 }
1341 
1342 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1343 {
1344 	if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1345 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1346 
1347 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1348 	case IP_VERSION(9, 0, 1):
1349 	case IP_VERSION(9, 2, 1):
1350 	case IP_VERSION(9, 4, 0):
1351 		break;
1352 	case IP_VERSION(9, 2, 2):
1353 	case IP_VERSION(9, 1, 0):
1354 		if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1355 		      (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1356 		    ((!is_raven_kicker(adev) &&
1357 		      adev->gfx.rlc_fw_version < 531) ||
1358 		     (adev->gfx.rlc_feature_version < 1) ||
1359 		     !adev->gfx.rlc.is_rlc_v2_1))
1360 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1361 
1362 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1363 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1364 				AMD_PG_SUPPORT_CP |
1365 				AMD_PG_SUPPORT_RLC_SMU_HS;
1366 		break;
1367 	case IP_VERSION(9, 3, 0):
1368 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1369 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1370 				AMD_PG_SUPPORT_CP |
1371 				AMD_PG_SUPPORT_RLC_SMU_HS;
1372 		break;
1373 	default:
1374 		break;
1375 	}
1376 }
1377 
1378 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1379 					  char *chip_name)
1380 {
1381 	char fw_name[50];
1382 	int err;
1383 
1384 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1385 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
1386 	if (err)
1387 		goto out;
1388 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1389 
1390 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1391 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
1392 	if (err)
1393 		goto out;
1394 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1395 
1396 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1397 	err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, fw_name);
1398 	if (err)
1399 		goto out;
1400 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1401 
1402 out:
1403 	if (err) {
1404 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
1405 		amdgpu_ucode_release(&adev->gfx.me_fw);
1406 		amdgpu_ucode_release(&adev->gfx.ce_fw);
1407 	}
1408 	return err;
1409 }
1410 
1411 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1412 				       char *chip_name)
1413 {
1414 	char fw_name[53];
1415 	int err;
1416 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1417 	uint16_t version_major;
1418 	uint16_t version_minor;
1419 	uint32_t smu_version;
1420 
1421 	/*
1422 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1423 	 * instead of picasso_rlc.bin.
1424 	 * Judgment method:
1425 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1426 	 *          or revision >= 0xD8 && revision <= 0xDF
1427 	 * otherwise is PCO FP5
1428 	 */
1429 	if (!strcmp(chip_name, "picasso") &&
1430 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1431 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1432 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1433 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1434 		(smu_version >= 0x41e2b))
1435 		/**
1436 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1437 		*/
1438 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1439 	else
1440 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1441 	err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
1442 	if (err)
1443 		goto out;
1444 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1445 
1446 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1447 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1448 	err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1449 out:
1450 	if (err)
1451 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
1452 
1453 	return err;
1454 }
1455 
1456 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1457 {
1458 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1459 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1460 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1461 		return false;
1462 
1463 	return true;
1464 }
1465 
1466 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1467 					      char *chip_name)
1468 {
1469 	char fw_name[50];
1470 	int err;
1471 
1472 	if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1473 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec.bin", chip_name);
1474 	else
1475 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1476 
1477 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
1478 	if (err)
1479 		goto out;
1480 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1481 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1482 
1483 	if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1484 		if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1485 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sjt_mec2.bin", chip_name);
1486 		else
1487 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1488 
1489 		/* ignore failures to load */
1490 		err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, fw_name);
1491 		if (!err) {
1492 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1493 			amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1494 		} else {
1495 			err = 0;
1496 			amdgpu_ucode_release(&adev->gfx.mec2_fw);
1497 		}
1498 	} else {
1499 		adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1500 		adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1501 	}
1502 
1503 	gfx_v9_0_check_if_need_gfxoff(adev);
1504 	gfx_v9_0_check_fw_write_wait(adev);
1505 
1506 out:
1507 	if (err)
1508 		amdgpu_ucode_release(&adev->gfx.mec_fw);
1509 	return err;
1510 }
1511 
1512 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1513 {
1514 	char ucode_prefix[30];
1515 	int r;
1516 
1517 	DRM_DEBUG("\n");
1518 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1519 
1520 	/* No CPG in Arcturus */
1521 	if (adev->gfx.num_gfx_rings) {
1522 		r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1523 		if (r)
1524 			return r;
1525 	}
1526 
1527 	r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1528 	if (r)
1529 		return r;
1530 
1531 	r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1532 	if (r)
1533 		return r;
1534 
1535 	return r;
1536 }
1537 
1538 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1539 {
1540 	u32 count = 0;
1541 	const struct cs_section_def *sect = NULL;
1542 	const struct cs_extent_def *ext = NULL;
1543 
1544 	/* begin clear state */
1545 	count += 2;
1546 	/* context control state */
1547 	count += 3;
1548 
1549 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1550 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1551 			if (sect->id == SECT_CONTEXT)
1552 				count += 2 + ext->reg_count;
1553 			else
1554 				return 0;
1555 		}
1556 	}
1557 
1558 	/* end clear state */
1559 	count += 2;
1560 	/* clear state */
1561 	count += 2;
1562 
1563 	return count;
1564 }
1565 
1566 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1567 				    volatile u32 *buffer)
1568 {
1569 	u32 count = 0, i;
1570 	const struct cs_section_def *sect = NULL;
1571 	const struct cs_extent_def *ext = NULL;
1572 
1573 	if (adev->gfx.rlc.cs_data == NULL)
1574 		return;
1575 	if (buffer == NULL)
1576 		return;
1577 
1578 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1579 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1580 
1581 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1582 	buffer[count++] = cpu_to_le32(0x80000000);
1583 	buffer[count++] = cpu_to_le32(0x80000000);
1584 
1585 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1586 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1587 			if (sect->id == SECT_CONTEXT) {
1588 				buffer[count++] =
1589 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1590 				buffer[count++] = cpu_to_le32(ext->reg_index -
1591 						PACKET3_SET_CONTEXT_REG_START);
1592 				for (i = 0; i < ext->reg_count; i++)
1593 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1594 			} else {
1595 				return;
1596 			}
1597 		}
1598 	}
1599 
1600 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1601 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1602 
1603 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1604 	buffer[count++] = cpu_to_le32(0);
1605 }
1606 
1607 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1608 {
1609 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1610 	uint32_t pg_always_on_cu_num = 2;
1611 	uint32_t always_on_cu_num;
1612 	uint32_t i, j, k;
1613 	uint32_t mask, cu_bitmap, counter;
1614 
1615 	if (adev->flags & AMD_IS_APU)
1616 		always_on_cu_num = 4;
1617 	else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1618 		always_on_cu_num = 8;
1619 	else
1620 		always_on_cu_num = 12;
1621 
1622 	mutex_lock(&adev->grbm_idx_mutex);
1623 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1624 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1625 			mask = 1;
1626 			cu_bitmap = 0;
1627 			counter = 0;
1628 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1629 
1630 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1631 				if (cu_info->bitmap[0][i][j] & mask) {
1632 					if (counter == pg_always_on_cu_num)
1633 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1634 					if (counter < always_on_cu_num)
1635 						cu_bitmap |= mask;
1636 					else
1637 						break;
1638 					counter++;
1639 				}
1640 				mask <<= 1;
1641 			}
1642 
1643 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1644 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1645 		}
1646 	}
1647 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1648 	mutex_unlock(&adev->grbm_idx_mutex);
1649 }
1650 
1651 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1652 {
1653 	uint32_t data;
1654 
1655 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1656 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1657 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1658 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1659 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1660 
1661 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1662 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1663 
1664 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1665 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1666 
1667 	mutex_lock(&adev->grbm_idx_mutex);
1668 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1669 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1670 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1671 
1672 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1673 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1674 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1675 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1676 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1677 
1678 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1679 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1680 	data &= 0x0000FFFF;
1681 	data |= 0x00C00000;
1682 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1683 
1684 	/*
1685 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1686 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1687 	 */
1688 
1689 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1690 	 * but used for RLC_LB_CNTL configuration */
1691 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1692 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1693 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1694 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1695 	mutex_unlock(&adev->grbm_idx_mutex);
1696 
1697 	gfx_v9_0_init_always_on_cu_mask(adev);
1698 }
1699 
1700 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1701 {
1702 	uint32_t data;
1703 
1704 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1705 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1706 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1707 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1708 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1709 
1710 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1711 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1712 
1713 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1714 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1715 
1716 	mutex_lock(&adev->grbm_idx_mutex);
1717 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1718 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1719 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1720 
1721 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1722 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1723 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1724 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1725 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1726 
1727 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1728 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1729 	data &= 0x0000FFFF;
1730 	data |= 0x00C00000;
1731 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1732 
1733 	/*
1734 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1735 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1736 	 */
1737 
1738 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1739 	 * but used for RLC_LB_CNTL configuration */
1740 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1741 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1742 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1743 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1744 	mutex_unlock(&adev->grbm_idx_mutex);
1745 
1746 	gfx_v9_0_init_always_on_cu_mask(adev);
1747 }
1748 
1749 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1750 {
1751 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1752 }
1753 
1754 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1755 {
1756 	if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1757 		return 5;
1758 	else
1759 		return 4;
1760 }
1761 
1762 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1763 {
1764 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1765 
1766 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1767 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1768 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1769 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1770 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1771 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1772 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1773 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1774 	adev->gfx.rlc.rlcg_reg_access_supported = true;
1775 }
1776 
1777 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1778 {
1779 	const struct cs_section_def *cs_data;
1780 	int r;
1781 
1782 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1783 
1784 	cs_data = adev->gfx.rlc.cs_data;
1785 
1786 	if (cs_data) {
1787 		/* init clear state block */
1788 		r = amdgpu_gfx_rlc_init_csb(adev);
1789 		if (r)
1790 			return r;
1791 	}
1792 
1793 	if (adev->flags & AMD_IS_APU) {
1794 		/* TODO: double check the cp_table_size for RV */
1795 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1796 		r = amdgpu_gfx_rlc_init_cpt(adev);
1797 		if (r)
1798 			return r;
1799 	}
1800 
1801 	return 0;
1802 }
1803 
1804 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1805 {
1806 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1807 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1808 }
1809 
1810 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1811 {
1812 	int r;
1813 	u32 *hpd;
1814 	const __le32 *fw_data;
1815 	unsigned fw_size;
1816 	u32 *fw;
1817 	size_t mec_hpd_size;
1818 
1819 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1820 
1821 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1822 
1823 	/* take ownership of the relevant compute queues */
1824 	amdgpu_gfx_compute_queue_acquire(adev);
1825 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1826 	if (mec_hpd_size) {
1827 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1828 					      AMDGPU_GEM_DOMAIN_VRAM |
1829 					      AMDGPU_GEM_DOMAIN_GTT,
1830 					      &adev->gfx.mec.hpd_eop_obj,
1831 					      &adev->gfx.mec.hpd_eop_gpu_addr,
1832 					      (void **)&hpd);
1833 		if (r) {
1834 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1835 			gfx_v9_0_mec_fini(adev);
1836 			return r;
1837 		}
1838 
1839 		memset(hpd, 0, mec_hpd_size);
1840 
1841 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1842 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1843 	}
1844 
1845 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1846 
1847 	fw_data = (const __le32 *)
1848 		(adev->gfx.mec_fw->data +
1849 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1850 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1851 
1852 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1853 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1854 				      &adev->gfx.mec.mec_fw_obj,
1855 				      &adev->gfx.mec.mec_fw_gpu_addr,
1856 				      (void **)&fw);
1857 	if (r) {
1858 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1859 		gfx_v9_0_mec_fini(adev);
1860 		return r;
1861 	}
1862 
1863 	memcpy(fw, fw_data, fw_size);
1864 
1865 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1866 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1867 
1868 	return 0;
1869 }
1870 
1871 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1872 {
1873 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1874 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1875 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1876 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1877 		(SQ_IND_INDEX__FORCE_READ_MASK));
1878 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1879 }
1880 
1881 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1882 			   uint32_t wave, uint32_t thread,
1883 			   uint32_t regno, uint32_t num, uint32_t *out)
1884 {
1885 	WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1886 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1887 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1888 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1889 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1890 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1891 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1892 	while (num--)
1893 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1894 }
1895 
1896 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1897 {
1898 	/* type 1 wave data */
1899 	dst[(*no_fields)++] = 1;
1900 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1901 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1902 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1903 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1904 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1905 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1906 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1907 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1908 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1909 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1910 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1911 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1912 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1913 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1914 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1915 }
1916 
1917 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1918 				     uint32_t wave, uint32_t start,
1919 				     uint32_t size, uint32_t *dst)
1920 {
1921 	wave_read_regs(
1922 		adev, simd, wave, 0,
1923 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1924 }
1925 
1926 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1927 				     uint32_t wave, uint32_t thread,
1928 				     uint32_t start, uint32_t size,
1929 				     uint32_t *dst)
1930 {
1931 	wave_read_regs(
1932 		adev, simd, wave, thread,
1933 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1934 }
1935 
1936 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1937 				  u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1938 {
1939 	soc15_grbm_select(adev, me, pipe, q, vm, 0);
1940 }
1941 
1942 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1943         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1944         .select_se_sh = &gfx_v9_0_select_se_sh,
1945         .read_wave_data = &gfx_v9_0_read_wave_data,
1946         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1947         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1948         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1949 };
1950 
1951 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
1952 		.ras_error_inject = &gfx_v9_0_ras_error_inject,
1953 		.query_ras_error_count = &gfx_v9_0_query_ras_error_count,
1954 		.reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
1955 };
1956 
1957 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
1958 	.ras_block = {
1959 		.hw_ops = &gfx_v9_0_ras_ops,
1960 	},
1961 };
1962 
1963 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1964 {
1965 	u32 gb_addr_config;
1966 	int err;
1967 
1968 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1969 	case IP_VERSION(9, 0, 1):
1970 		adev->gfx.config.max_hw_contexts = 8;
1971 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1972 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1973 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1974 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1975 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1976 		break;
1977 	case IP_VERSION(9, 2, 1):
1978 		adev->gfx.config.max_hw_contexts = 8;
1979 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1980 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1981 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1982 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1983 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1984 		DRM_INFO("fix gfx.config for vega12\n");
1985 		break;
1986 	case IP_VERSION(9, 4, 0):
1987 		adev->gfx.ras = &gfx_v9_0_ras;
1988 		adev->gfx.config.max_hw_contexts = 8;
1989 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1990 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1991 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1992 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1993 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1994 		gb_addr_config &= ~0xf3e777ff;
1995 		gb_addr_config |= 0x22014042;
1996 		/* check vbios table if gpu info is not available */
1997 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1998 		if (err)
1999 			return err;
2000 		break;
2001 	case IP_VERSION(9, 2, 2):
2002 	case IP_VERSION(9, 1, 0):
2003 		adev->gfx.config.max_hw_contexts = 8;
2004 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2005 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2006 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2007 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2008 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2009 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2010 		else
2011 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2012 		break;
2013 	case IP_VERSION(9, 4, 1):
2014 		adev->gfx.ras = &gfx_v9_4_ras;
2015 		adev->gfx.config.max_hw_contexts = 8;
2016 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2017 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2018 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2019 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2020 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2021 		gb_addr_config &= ~0xf3e777ff;
2022 		gb_addr_config |= 0x22014042;
2023 		break;
2024 	case IP_VERSION(9, 3, 0):
2025 		adev->gfx.config.max_hw_contexts = 8;
2026 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2027 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2028 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2029 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2030 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2031 		gb_addr_config &= ~0xf3e777ff;
2032 		gb_addr_config |= 0x22010042;
2033 		break;
2034 	case IP_VERSION(9, 4, 2):
2035 		adev->gfx.ras = &gfx_v9_4_2_ras;
2036 		adev->gfx.config.max_hw_contexts = 8;
2037 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2038 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2039 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2040 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2041 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2042 		gb_addr_config &= ~0xf3e777ff;
2043 		gb_addr_config |= 0x22014042;
2044 		/* check vbios table if gpu info is not available */
2045 		err = amdgpu_atomfirmware_get_gfx_info(adev);
2046 		if (err)
2047 			return err;
2048 		break;
2049 	default:
2050 		BUG();
2051 		break;
2052 	}
2053 
2054 	adev->gfx.config.gb_addr_config = gb_addr_config;
2055 
2056 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2057 			REG_GET_FIELD(
2058 					adev->gfx.config.gb_addr_config,
2059 					GB_ADDR_CONFIG,
2060 					NUM_PIPES);
2061 
2062 	adev->gfx.config.max_tile_pipes =
2063 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2064 
2065 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2066 			REG_GET_FIELD(
2067 					adev->gfx.config.gb_addr_config,
2068 					GB_ADDR_CONFIG,
2069 					NUM_BANKS);
2070 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2071 			REG_GET_FIELD(
2072 					adev->gfx.config.gb_addr_config,
2073 					GB_ADDR_CONFIG,
2074 					MAX_COMPRESSED_FRAGS);
2075 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2076 			REG_GET_FIELD(
2077 					adev->gfx.config.gb_addr_config,
2078 					GB_ADDR_CONFIG,
2079 					NUM_RB_PER_SE);
2080 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2081 			REG_GET_FIELD(
2082 					adev->gfx.config.gb_addr_config,
2083 					GB_ADDR_CONFIG,
2084 					NUM_SHADER_ENGINES);
2085 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2086 			REG_GET_FIELD(
2087 					adev->gfx.config.gb_addr_config,
2088 					GB_ADDR_CONFIG,
2089 					PIPE_INTERLEAVE_SIZE));
2090 
2091 	return 0;
2092 }
2093 
2094 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2095 				      int mec, int pipe, int queue)
2096 {
2097 	unsigned irq_type;
2098 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2099 	unsigned int hw_prio;
2100 
2101 	ring = &adev->gfx.compute_ring[ring_id];
2102 
2103 	/* mec0 is me1 */
2104 	ring->me = mec + 1;
2105 	ring->pipe = pipe;
2106 	ring->queue = queue;
2107 
2108 	ring->ring_obj = NULL;
2109 	ring->use_doorbell = true;
2110 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2111 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2112 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2113 	ring->vm_hub = AMDGPU_GFXHUB(0);
2114 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115 
2116 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118 		+ ring->pipe;
2119 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2120 			AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2121 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2122 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2123 				hw_prio, NULL);
2124 }
2125 
2126 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2127 {
2128 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2129 	uint32_t *ptr;
2130 	uint32_t inst;
2131 
2132 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2133 	if (ptr == NULL) {
2134 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2135 		adev->gfx.ip_dump_core = NULL;
2136 	} else {
2137 		adev->gfx.ip_dump_core = ptr;
2138 	}
2139 
2140 	/* Allocate memory for compute queue registers for all the instances */
2141 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2142 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2143 		adev->gfx.mec.num_queue_per_pipe;
2144 
2145 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2146 	if (ptr == NULL) {
2147 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2148 		adev->gfx.ip_dump_compute_queues = NULL;
2149 	} else {
2150 		adev->gfx.ip_dump_compute_queues = ptr;
2151 	}
2152 }
2153 
2154 static int gfx_v9_0_sw_init(void *handle)
2155 {
2156 	int i, j, k, r, ring_id;
2157 	int xcc_id = 0;
2158 	struct amdgpu_ring *ring;
2159 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2160 	unsigned int hw_prio;
2161 
2162 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2163 	case IP_VERSION(9, 0, 1):
2164 	case IP_VERSION(9, 2, 1):
2165 	case IP_VERSION(9, 4, 0):
2166 	case IP_VERSION(9, 2, 2):
2167 	case IP_VERSION(9, 1, 0):
2168 	case IP_VERSION(9, 4, 1):
2169 	case IP_VERSION(9, 3, 0):
2170 	case IP_VERSION(9, 4, 2):
2171 		adev->gfx.mec.num_mec = 2;
2172 		break;
2173 	default:
2174 		adev->gfx.mec.num_mec = 1;
2175 		break;
2176 	}
2177 
2178 	adev->gfx.mec.num_pipe_per_mec = 4;
2179 	adev->gfx.mec.num_queue_per_pipe = 8;
2180 
2181 	/* EOP Event */
2182 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2183 	if (r)
2184 		return r;
2185 
2186 	/* Privileged reg */
2187 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2188 			      &adev->gfx.priv_reg_irq);
2189 	if (r)
2190 		return r;
2191 
2192 	/* Privileged inst */
2193 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2194 			      &adev->gfx.priv_inst_irq);
2195 	if (r)
2196 		return r;
2197 
2198 	/* ECC error */
2199 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2200 			      &adev->gfx.cp_ecc_error_irq);
2201 	if (r)
2202 		return r;
2203 
2204 	/* FUE error */
2205 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2206 			      &adev->gfx.cp_ecc_error_irq);
2207 	if (r)
2208 		return r;
2209 
2210 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2211 
2212 	if (adev->gfx.rlc.funcs) {
2213 		if (adev->gfx.rlc.funcs->init) {
2214 			r = adev->gfx.rlc.funcs->init(adev);
2215 			if (r) {
2216 				dev_err(adev->dev, "Failed to init rlc BOs!\n");
2217 				return r;
2218 			}
2219 		}
2220 	}
2221 
2222 	r = gfx_v9_0_mec_init(adev);
2223 	if (r) {
2224 		DRM_ERROR("Failed to init MEC BOs!\n");
2225 		return r;
2226 	}
2227 
2228 	/* set up the gfx ring */
2229 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2230 		ring = &adev->gfx.gfx_ring[i];
2231 		ring->ring_obj = NULL;
2232 		if (!i)
2233 			sprintf(ring->name, "gfx");
2234 		else
2235 			sprintf(ring->name, "gfx_%d", i);
2236 		ring->use_doorbell = true;
2237 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2238 
2239 		/* disable scheduler on the real ring */
2240 		ring->no_scheduler = adev->gfx.mcbp;
2241 		ring->vm_hub = AMDGPU_GFXHUB(0);
2242 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2243 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2244 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
2245 		if (r)
2246 			return r;
2247 	}
2248 
2249 	/* set up the software rings */
2250 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2251 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2252 			ring = &adev->gfx.sw_gfx_ring[i];
2253 			ring->ring_obj = NULL;
2254 			sprintf(ring->name, amdgpu_sw_ring_name(i));
2255 			ring->use_doorbell = true;
2256 			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2257 			ring->is_sw_ring = true;
2258 			hw_prio = amdgpu_sw_ring_priority(i);
2259 			ring->vm_hub = AMDGPU_GFXHUB(0);
2260 			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2261 					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2262 					     NULL);
2263 			if (r)
2264 				return r;
2265 			ring->wptr = 0;
2266 		}
2267 
2268 		/* init the muxer and add software rings */
2269 		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2270 					 GFX9_NUM_SW_GFX_RINGS);
2271 		if (r) {
2272 			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2273 			return r;
2274 		}
2275 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2276 			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2277 							&adev->gfx.sw_gfx_ring[i]);
2278 			if (r) {
2279 				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2280 				return r;
2281 			}
2282 		}
2283 	}
2284 
2285 	/* set up the compute queues - allocate horizontally across pipes */
2286 	ring_id = 0;
2287 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2288 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2289 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2290 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2291 								     k, j))
2292 					continue;
2293 
2294 				r = gfx_v9_0_compute_ring_init(adev,
2295 							       ring_id,
2296 							       i, k, j);
2297 				if (r)
2298 					return r;
2299 
2300 				ring_id++;
2301 			}
2302 		}
2303 	}
2304 
2305 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2306 	if (r) {
2307 		DRM_ERROR("Failed to init KIQ BOs!\n");
2308 		return r;
2309 	}
2310 
2311 	r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2312 	if (r)
2313 		return r;
2314 
2315 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2316 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2317 	if (r)
2318 		return r;
2319 
2320 	adev->gfx.ce_ram_size = 0x8000;
2321 
2322 	r = gfx_v9_0_gpu_early_init(adev);
2323 	if (r)
2324 		return r;
2325 
2326 	if (amdgpu_gfx_ras_sw_init(adev)) {
2327 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2328 		return -EINVAL;
2329 	}
2330 
2331 	gfx_v9_0_alloc_ip_dump(adev);
2332 
2333 	return 0;
2334 }
2335 
2336 
2337 static int gfx_v9_0_sw_fini(void *handle)
2338 {
2339 	int i;
2340 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2341 
2342 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2343 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2344 			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2345 		amdgpu_ring_mux_fini(&adev->gfx.muxer);
2346 	}
2347 
2348 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2349 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2350 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2351 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2352 
2353 	amdgpu_gfx_mqd_sw_fini(adev, 0);
2354 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2355 	amdgpu_gfx_kiq_fini(adev, 0);
2356 
2357 	gfx_v9_0_mec_fini(adev);
2358 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2359 				&adev->gfx.rlc.clear_state_gpu_addr,
2360 				(void **)&adev->gfx.rlc.cs_ptr);
2361 	if (adev->flags & AMD_IS_APU) {
2362 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2363 				&adev->gfx.rlc.cp_table_gpu_addr,
2364 				(void **)&adev->gfx.rlc.cp_table_ptr);
2365 	}
2366 	gfx_v9_0_free_microcode(adev);
2367 
2368 	kfree(adev->gfx.ip_dump_core);
2369 	kfree(adev->gfx.ip_dump_compute_queues);
2370 
2371 	return 0;
2372 }
2373 
2374 
2375 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2376 {
2377 	/* TODO */
2378 }
2379 
2380 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2381 			   u32 instance, int xcc_id)
2382 {
2383 	u32 data;
2384 
2385 	if (instance == 0xffffffff)
2386 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2387 	else
2388 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2389 
2390 	if (se_num == 0xffffffff)
2391 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2392 	else
2393 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2394 
2395 	if (sh_num == 0xffffffff)
2396 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2397 	else
2398 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2399 
2400 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2401 }
2402 
2403 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2404 {
2405 	u32 data, mask;
2406 
2407 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2408 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2409 
2410 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2411 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2412 
2413 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2414 					 adev->gfx.config.max_sh_per_se);
2415 
2416 	return (~data) & mask;
2417 }
2418 
2419 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2420 {
2421 	int i, j;
2422 	u32 data;
2423 	u32 active_rbs = 0;
2424 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2425 					adev->gfx.config.max_sh_per_se;
2426 
2427 	mutex_lock(&adev->grbm_idx_mutex);
2428 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2429 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2430 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2431 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2432 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2433 					       rb_bitmap_width_per_sh);
2434 		}
2435 	}
2436 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2437 	mutex_unlock(&adev->grbm_idx_mutex);
2438 
2439 	adev->gfx.config.backend_enable_mask = active_rbs;
2440 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2441 }
2442 
2443 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2444 				uint32_t first_vmid,
2445 				uint32_t last_vmid)
2446 {
2447 	uint32_t data;
2448 	uint32_t trap_config_vmid_mask = 0;
2449 	int i;
2450 
2451 	/* Calculate trap config vmid mask */
2452 	for (i = first_vmid; i < last_vmid; i++)
2453 		trap_config_vmid_mask |= (1 << i);
2454 
2455 	data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2456 			VMID_SEL, trap_config_vmid_mask);
2457 	data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2458 			TRAP_EN, 1);
2459 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2460 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2461 
2462 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2463 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2464 }
2465 
2466 #define DEFAULT_SH_MEM_BASES	(0x6000)
2467 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2468 {
2469 	int i;
2470 	uint32_t sh_mem_config;
2471 	uint32_t sh_mem_bases;
2472 
2473 	/*
2474 	 * Configure apertures:
2475 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2476 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2477 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2478 	 */
2479 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2480 
2481 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2482 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2483 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2484 
2485 	mutex_lock(&adev->srbm_mutex);
2486 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2487 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2488 		/* CP and shaders */
2489 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2490 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2491 	}
2492 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2493 	mutex_unlock(&adev->srbm_mutex);
2494 
2495 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2496 	   access. These should be enabled by FW for target VMIDs. */
2497 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2498 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2499 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2500 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2501 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2502 	}
2503 }
2504 
2505 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2506 {
2507 	int vmid;
2508 
2509 	/*
2510 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2511 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2512 	 * the driver can enable them for graphics. VMID0 should maintain
2513 	 * access so that HWS firmware can save/restore entries.
2514 	 */
2515 	for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2516 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2517 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2518 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2519 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2520 	}
2521 }
2522 
2523 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2524 {
2525 	uint32_t tmp;
2526 
2527 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2528 	case IP_VERSION(9, 4, 1):
2529 		tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2530 		tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2531 				!READ_ONCE(adev->barrier_has_auto_waitcnt));
2532 		WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2533 		break;
2534 	default:
2535 		break;
2536 	}
2537 }
2538 
2539 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2540 {
2541 	u32 tmp;
2542 	int i;
2543 
2544 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2545 
2546 	gfx_v9_0_tiling_mode_table_init(adev);
2547 
2548 	if (adev->gfx.num_gfx_rings)
2549 		gfx_v9_0_setup_rb(adev);
2550 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2551 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2552 
2553 	/* XXX SH_MEM regs */
2554 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2555 	mutex_lock(&adev->srbm_mutex);
2556 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2557 		soc15_grbm_select(adev, 0, 0, 0, i, 0);
2558 		/* CP and shaders */
2559 		if (i == 0) {
2560 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2561 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2562 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2563 					    !!adev->gmc.noretry);
2564 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2565 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2566 		} else {
2567 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2568 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2569 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2570 					    !!adev->gmc.noretry);
2571 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2572 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2573 				(adev->gmc.private_aperture_start >> 48));
2574 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2575 				(adev->gmc.shared_aperture_start >> 48));
2576 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2577 		}
2578 	}
2579 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2580 
2581 	mutex_unlock(&adev->srbm_mutex);
2582 
2583 	gfx_v9_0_init_compute_vmid(adev);
2584 	gfx_v9_0_init_gds_vmid(adev);
2585 	gfx_v9_0_init_sq_config(adev);
2586 }
2587 
2588 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2589 {
2590 	u32 i, j, k;
2591 	u32 mask;
2592 
2593 	mutex_lock(&adev->grbm_idx_mutex);
2594 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2595 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2596 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2597 			for (k = 0; k < adev->usec_timeout; k++) {
2598 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2599 					break;
2600 				udelay(1);
2601 			}
2602 			if (k == adev->usec_timeout) {
2603 				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2604 						      0xffffffff, 0xffffffff, 0);
2605 				mutex_unlock(&adev->grbm_idx_mutex);
2606 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2607 					 i, j);
2608 				return;
2609 			}
2610 		}
2611 	}
2612 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2613 	mutex_unlock(&adev->grbm_idx_mutex);
2614 
2615 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2616 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2617 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2618 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2619 	for (k = 0; k < adev->usec_timeout; k++) {
2620 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2621 			break;
2622 		udelay(1);
2623 	}
2624 }
2625 
2626 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2627 					       bool enable)
2628 {
2629 	u32 tmp;
2630 
2631 	/* These interrupts should be enabled to drive DS clock */
2632 
2633 	tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2634 
2635 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2636 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2637 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2638 	if(adev->gfx.num_gfx_rings)
2639 		tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2640 
2641 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2642 }
2643 
2644 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2645 {
2646 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2647 	/* csib */
2648 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2649 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2650 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2651 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2652 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2653 			adev->gfx.rlc.clear_state_size);
2654 }
2655 
2656 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2657 				int indirect_offset,
2658 				int list_size,
2659 				int *unique_indirect_regs,
2660 				int unique_indirect_reg_count,
2661 				int *indirect_start_offsets,
2662 				int *indirect_start_offsets_count,
2663 				int max_start_offsets_count)
2664 {
2665 	int idx;
2666 
2667 	for (; indirect_offset < list_size; indirect_offset++) {
2668 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2669 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2670 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2671 
2672 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2673 			indirect_offset += 2;
2674 
2675 			/* look for the matching indice */
2676 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2677 				if (unique_indirect_regs[idx] ==
2678 					register_list_format[indirect_offset] ||
2679 					!unique_indirect_regs[idx])
2680 					break;
2681 			}
2682 
2683 			BUG_ON(idx >= unique_indirect_reg_count);
2684 
2685 			if (!unique_indirect_regs[idx])
2686 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2687 
2688 			indirect_offset++;
2689 		}
2690 	}
2691 }
2692 
2693 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2694 {
2695 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2696 	int unique_indirect_reg_count = 0;
2697 
2698 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2699 	int indirect_start_offsets_count = 0;
2700 
2701 	int list_size = 0;
2702 	int i = 0, j = 0;
2703 	u32 tmp = 0;
2704 
2705 	u32 *register_list_format =
2706 		kmemdup(adev->gfx.rlc.register_list_format,
2707 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2708 	if (!register_list_format)
2709 		return -ENOMEM;
2710 
2711 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2712 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2713 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2714 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2715 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2716 				    unique_indirect_regs,
2717 				    unique_indirect_reg_count,
2718 				    indirect_start_offsets,
2719 				    &indirect_start_offsets_count,
2720 				    ARRAY_SIZE(indirect_start_offsets));
2721 
2722 	/* enable auto inc in case it is disabled */
2723 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2724 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2725 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2726 
2727 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2728 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2729 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2730 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2731 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2732 			adev->gfx.rlc.register_restore[i]);
2733 
2734 	/* load indirect register */
2735 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2736 		adev->gfx.rlc.reg_list_format_start);
2737 
2738 	/* direct register portion */
2739 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2740 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2741 			register_list_format[i]);
2742 
2743 	/* indirect register portion */
2744 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2745 		if (register_list_format[i] == 0xFFFFFFFF) {
2746 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2747 			continue;
2748 		}
2749 
2750 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2751 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2752 
2753 		for (j = 0; j < unique_indirect_reg_count; j++) {
2754 			if (register_list_format[i] == unique_indirect_regs[j]) {
2755 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2756 				break;
2757 			}
2758 		}
2759 
2760 		BUG_ON(j >= unique_indirect_reg_count);
2761 
2762 		i++;
2763 	}
2764 
2765 	/* set save/restore list size */
2766 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2767 	list_size = list_size >> 1;
2768 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2769 		adev->gfx.rlc.reg_restore_list_size);
2770 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2771 
2772 	/* write the starting offsets to RLC scratch ram */
2773 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2774 		adev->gfx.rlc.starting_offsets_start);
2775 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2776 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2777 		       indirect_start_offsets[i]);
2778 
2779 	/* load unique indirect regs*/
2780 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2781 		if (unique_indirect_regs[i] != 0) {
2782 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2783 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2784 			       unique_indirect_regs[i] & 0x3FFFF);
2785 
2786 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2787 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2788 			       unique_indirect_regs[i] >> 20);
2789 		}
2790 	}
2791 
2792 	kfree(register_list_format);
2793 	return 0;
2794 }
2795 
2796 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2797 {
2798 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2799 }
2800 
2801 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2802 					     bool enable)
2803 {
2804 	uint32_t data = 0;
2805 	uint32_t default_data = 0;
2806 
2807 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2808 	if (enable) {
2809 		/* enable GFXIP control over CGPG */
2810 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2811 		if(default_data != data)
2812 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2813 
2814 		/* update status */
2815 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2816 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2817 		if(default_data != data)
2818 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2819 	} else {
2820 		/* restore GFXIP control over GCPG */
2821 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2822 		if(default_data != data)
2823 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2824 	}
2825 }
2826 
2827 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2828 {
2829 	uint32_t data = 0;
2830 
2831 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2832 			      AMD_PG_SUPPORT_GFX_SMG |
2833 			      AMD_PG_SUPPORT_GFX_DMG)) {
2834 		/* init IDLE_POLL_COUNT = 60 */
2835 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2836 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2837 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2838 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2839 
2840 		/* init RLC PG Delay */
2841 		data = 0;
2842 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2843 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2844 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2845 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2846 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2847 
2848 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2849 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2850 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2851 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2852 
2853 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2854 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2855 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2856 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2857 
2858 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2859 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2860 
2861 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2862 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2863 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2864 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2865 			pwr_10_0_gfxip_control_over_cgpg(adev, true);
2866 	}
2867 }
2868 
2869 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2870 						bool enable)
2871 {
2872 	uint32_t data = 0;
2873 	uint32_t default_data = 0;
2874 
2875 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2876 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2877 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2878 			     enable ? 1 : 0);
2879 	if (default_data != data)
2880 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2881 }
2882 
2883 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2884 						bool enable)
2885 {
2886 	uint32_t data = 0;
2887 	uint32_t default_data = 0;
2888 
2889 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2890 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2891 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2892 			     enable ? 1 : 0);
2893 	if(default_data != data)
2894 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2895 }
2896 
2897 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2898 					bool enable)
2899 {
2900 	uint32_t data = 0;
2901 	uint32_t default_data = 0;
2902 
2903 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2904 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2905 			     CP_PG_DISABLE,
2906 			     enable ? 0 : 1);
2907 	if(default_data != data)
2908 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2909 }
2910 
2911 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2912 						bool enable)
2913 {
2914 	uint32_t data, default_data;
2915 
2916 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2917 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2918 			     GFX_POWER_GATING_ENABLE,
2919 			     enable ? 1 : 0);
2920 	if(default_data != data)
2921 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2922 }
2923 
2924 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2925 						bool enable)
2926 {
2927 	uint32_t data, default_data;
2928 
2929 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2930 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2931 			     GFX_PIPELINE_PG_ENABLE,
2932 			     enable ? 1 : 0);
2933 	if(default_data != data)
2934 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2935 
2936 	if (!enable)
2937 		/* read any GFX register to wake up GFX */
2938 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2939 }
2940 
2941 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2942 						       bool enable)
2943 {
2944 	uint32_t data, default_data;
2945 
2946 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2947 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2948 			     STATIC_PER_CU_PG_ENABLE,
2949 			     enable ? 1 : 0);
2950 	if(default_data != data)
2951 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2952 }
2953 
2954 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2955 						bool enable)
2956 {
2957 	uint32_t data, default_data;
2958 
2959 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2960 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2961 			     DYN_PER_CU_PG_ENABLE,
2962 			     enable ? 1 : 0);
2963 	if(default_data != data)
2964 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2965 }
2966 
2967 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2968 {
2969 	gfx_v9_0_init_csb(adev);
2970 
2971 	/*
2972 	 * Rlc save restore list is workable since v2_1.
2973 	 * And it's needed by gfxoff feature.
2974 	 */
2975 	if (adev->gfx.rlc.is_rlc_v2_1) {
2976 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2977 			    IP_VERSION(9, 2, 1) ||
2978 		    (adev->apu_flags & AMD_APU_IS_RAVEN2))
2979 			gfx_v9_1_init_rlc_save_restore_list(adev);
2980 		gfx_v9_0_enable_save_restore_machine(adev);
2981 	}
2982 
2983 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2984 			      AMD_PG_SUPPORT_GFX_SMG |
2985 			      AMD_PG_SUPPORT_GFX_DMG |
2986 			      AMD_PG_SUPPORT_CP |
2987 			      AMD_PG_SUPPORT_GDS |
2988 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2989 		WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
2990 			     adev->gfx.rlc.cp_table_gpu_addr >> 8);
2991 		gfx_v9_0_init_gfx_power_gating(adev);
2992 	}
2993 }
2994 
2995 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2996 {
2997 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2998 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2999 	gfx_v9_0_wait_for_rlc_serdes(adev);
3000 }
3001 
3002 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3003 {
3004 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3005 	udelay(50);
3006 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3007 	udelay(50);
3008 }
3009 
3010 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3011 {
3012 #ifdef AMDGPU_RLC_DEBUG_RETRY
3013 	u32 rlc_ucode_ver;
3014 #endif
3015 
3016 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3017 	udelay(50);
3018 
3019 	/* carrizo do enable cp interrupt after cp inited */
3020 	if (!(adev->flags & AMD_IS_APU)) {
3021 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3022 		udelay(50);
3023 	}
3024 
3025 #ifdef AMDGPU_RLC_DEBUG_RETRY
3026 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
3027 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3028 	if(rlc_ucode_ver == 0x108) {
3029 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3030 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
3031 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3032 		 * default is 0x9C4 to create a 100us interval */
3033 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3034 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3035 		 * to disable the page fault retry interrupts, default is
3036 		 * 0x100 (256) */
3037 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3038 	}
3039 #endif
3040 }
3041 
3042 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3043 {
3044 	const struct rlc_firmware_header_v2_0 *hdr;
3045 	const __le32 *fw_data;
3046 	unsigned i, fw_size;
3047 
3048 	if (!adev->gfx.rlc_fw)
3049 		return -EINVAL;
3050 
3051 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3052 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3053 
3054 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3055 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3056 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3057 
3058 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3059 			RLCG_UCODE_LOADING_START_ADDRESS);
3060 	for (i = 0; i < fw_size; i++)
3061 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3062 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3063 
3064 	return 0;
3065 }
3066 
3067 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3068 {
3069 	int r;
3070 
3071 	if (amdgpu_sriov_vf(adev)) {
3072 		gfx_v9_0_init_csb(adev);
3073 		return 0;
3074 	}
3075 
3076 	adev->gfx.rlc.funcs->stop(adev);
3077 
3078 	/* disable CG */
3079 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3080 
3081 	gfx_v9_0_init_pg(adev);
3082 
3083 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3084 		/* legacy rlc firmware loading */
3085 		r = gfx_v9_0_rlc_load_microcode(adev);
3086 		if (r)
3087 			return r;
3088 	}
3089 
3090 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3091 	case IP_VERSION(9, 2, 2):
3092 	case IP_VERSION(9, 1, 0):
3093 		gfx_v9_0_init_lbpw(adev);
3094 		if (amdgpu_lbpw == 0)
3095 			gfx_v9_0_enable_lbpw(adev, false);
3096 		else
3097 			gfx_v9_0_enable_lbpw(adev, true);
3098 		break;
3099 	case IP_VERSION(9, 4, 0):
3100 		gfx_v9_4_init_lbpw(adev);
3101 		if (amdgpu_lbpw > 0)
3102 			gfx_v9_0_enable_lbpw(adev, true);
3103 		else
3104 			gfx_v9_0_enable_lbpw(adev, false);
3105 		break;
3106 	default:
3107 		break;
3108 	}
3109 
3110 	gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3111 
3112 	adev->gfx.rlc.funcs->start(adev);
3113 
3114 	return 0;
3115 }
3116 
3117 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3118 {
3119 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3120 
3121 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3122 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3123 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3124 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3125 	udelay(50);
3126 }
3127 
3128 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3129 {
3130 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3131 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3132 	const struct gfx_firmware_header_v1_0 *me_hdr;
3133 	const __le32 *fw_data;
3134 	unsigned i, fw_size;
3135 
3136 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3137 		return -EINVAL;
3138 
3139 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3140 		adev->gfx.pfp_fw->data;
3141 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3142 		adev->gfx.ce_fw->data;
3143 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3144 		adev->gfx.me_fw->data;
3145 
3146 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3147 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3148 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3149 
3150 	gfx_v9_0_cp_gfx_enable(adev, false);
3151 
3152 	/* PFP */
3153 	fw_data = (const __le32 *)
3154 		(adev->gfx.pfp_fw->data +
3155 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3156 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3157 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3158 	for (i = 0; i < fw_size; i++)
3159 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3160 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3161 
3162 	/* CE */
3163 	fw_data = (const __le32 *)
3164 		(adev->gfx.ce_fw->data +
3165 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3166 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3167 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3168 	for (i = 0; i < fw_size; i++)
3169 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3170 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3171 
3172 	/* ME */
3173 	fw_data = (const __le32 *)
3174 		(adev->gfx.me_fw->data +
3175 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3176 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3177 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3178 	for (i = 0; i < fw_size; i++)
3179 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3180 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3181 
3182 	return 0;
3183 }
3184 
3185 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3186 {
3187 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3188 	const struct cs_section_def *sect = NULL;
3189 	const struct cs_extent_def *ext = NULL;
3190 	int r, i, tmp;
3191 
3192 	/* init the CP */
3193 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3194 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3195 
3196 	gfx_v9_0_cp_gfx_enable(adev, true);
3197 
3198 	/* Now only limit the quirk on the APU gfx9 series and already
3199 	 * confirmed that the APU gfx10/gfx11 needn't such update.
3200 	 */
3201 	if (adev->flags & AMD_IS_APU &&
3202 			adev->in_s3 && !adev->suspend_complete) {
3203 		DRM_INFO(" Will skip the CSB packet resubmit\n");
3204 		return 0;
3205 	}
3206 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3207 	if (r) {
3208 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3209 		return r;
3210 	}
3211 
3212 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3213 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3214 
3215 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3216 	amdgpu_ring_write(ring, 0x80000000);
3217 	amdgpu_ring_write(ring, 0x80000000);
3218 
3219 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3220 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3221 			if (sect->id == SECT_CONTEXT) {
3222 				amdgpu_ring_write(ring,
3223 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3224 					       ext->reg_count));
3225 				amdgpu_ring_write(ring,
3226 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3227 				for (i = 0; i < ext->reg_count; i++)
3228 					amdgpu_ring_write(ring, ext->extent[i]);
3229 			}
3230 		}
3231 	}
3232 
3233 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3234 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3235 
3236 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3237 	amdgpu_ring_write(ring, 0);
3238 
3239 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3240 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3241 	amdgpu_ring_write(ring, 0x8000);
3242 	amdgpu_ring_write(ring, 0x8000);
3243 
3244 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3245 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3246 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3247 	amdgpu_ring_write(ring, tmp);
3248 	amdgpu_ring_write(ring, 0);
3249 
3250 	amdgpu_ring_commit(ring);
3251 
3252 	return 0;
3253 }
3254 
3255 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3256 {
3257 	struct amdgpu_ring *ring;
3258 	u32 tmp;
3259 	u32 rb_bufsz;
3260 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3261 
3262 	/* Set the write pointer delay */
3263 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3264 
3265 	/* set the RB to use vmid 0 */
3266 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3267 
3268 	/* Set ring buffer size */
3269 	ring = &adev->gfx.gfx_ring[0];
3270 	rb_bufsz = order_base_2(ring->ring_size / 8);
3271 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3272 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3273 #ifdef __BIG_ENDIAN
3274 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3275 #endif
3276 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3277 
3278 	/* Initialize the ring buffer's write pointers */
3279 	ring->wptr = 0;
3280 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3281 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3282 
3283 	/* set the wb address wether it's enabled or not */
3284 	rptr_addr = ring->rptr_gpu_addr;
3285 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3286 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3287 
3288 	wptr_gpu_addr = ring->wptr_gpu_addr;
3289 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3290 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3291 
3292 	mdelay(1);
3293 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3294 
3295 	rb_addr = ring->gpu_addr >> 8;
3296 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3297 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3298 
3299 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3300 	if (ring->use_doorbell) {
3301 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3302 				    DOORBELL_OFFSET, ring->doorbell_index);
3303 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3304 				    DOORBELL_EN, 1);
3305 	} else {
3306 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3307 	}
3308 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3309 
3310 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3311 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3312 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3313 
3314 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3315 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3316 
3317 
3318 	/* start the ring */
3319 	gfx_v9_0_cp_gfx_start(adev);
3320 
3321 	return 0;
3322 }
3323 
3324 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3325 {
3326 	if (enable) {
3327 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3328 	} else {
3329 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3330 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3331 		adev->gfx.kiq[0].ring.sched.ready = false;
3332 	}
3333 	udelay(50);
3334 }
3335 
3336 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3337 {
3338 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3339 	const __le32 *fw_data;
3340 	unsigned i;
3341 	u32 tmp;
3342 
3343 	if (!adev->gfx.mec_fw)
3344 		return -EINVAL;
3345 
3346 	gfx_v9_0_cp_compute_enable(adev, false);
3347 
3348 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3349 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3350 
3351 	fw_data = (const __le32 *)
3352 		(adev->gfx.mec_fw->data +
3353 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3354 	tmp = 0;
3355 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3356 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3357 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3358 
3359 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3360 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3361 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3362 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3363 
3364 	/* MEC1 */
3365 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3366 			 mec_hdr->jt_offset);
3367 	for (i = 0; i < mec_hdr->jt_size; i++)
3368 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3369 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3370 
3371 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3372 			adev->gfx.mec_fw_version);
3373 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3374 
3375 	return 0;
3376 }
3377 
3378 /* KIQ functions */
3379 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3380 {
3381 	uint32_t tmp;
3382 	struct amdgpu_device *adev = ring->adev;
3383 
3384 	/* tell RLC which is KIQ queue */
3385 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3386 	tmp &= 0xffffff00;
3387 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3388 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3389 	tmp |= 0x80;
3390 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3391 }
3392 
3393 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3394 {
3395 	struct amdgpu_device *adev = ring->adev;
3396 
3397 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3398 		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3399 			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3400 			mqd->cp_hqd_queue_priority =
3401 				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3402 		}
3403 	}
3404 }
3405 
3406 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3407 {
3408 	struct amdgpu_device *adev = ring->adev;
3409 	struct v9_mqd *mqd = ring->mqd_ptr;
3410 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3411 	uint32_t tmp;
3412 
3413 	mqd->header = 0xC0310800;
3414 	mqd->compute_pipelinestat_enable = 0x00000001;
3415 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3416 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3417 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3418 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3419 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3420 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3421 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3422 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3423 	mqd->compute_misc_reserved = 0x00000003;
3424 
3425 	mqd->dynamic_cu_mask_addr_lo =
3426 		lower_32_bits(ring->mqd_gpu_addr
3427 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3428 	mqd->dynamic_cu_mask_addr_hi =
3429 		upper_32_bits(ring->mqd_gpu_addr
3430 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3431 
3432 	eop_base_addr = ring->eop_gpu_addr >> 8;
3433 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3434 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3435 
3436 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3437 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3438 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3439 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3440 
3441 	mqd->cp_hqd_eop_control = tmp;
3442 
3443 	/* enable doorbell? */
3444 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3445 
3446 	if (ring->use_doorbell) {
3447 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3448 				    DOORBELL_OFFSET, ring->doorbell_index);
3449 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3450 				    DOORBELL_EN, 1);
3451 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3452 				    DOORBELL_SOURCE, 0);
3453 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3454 				    DOORBELL_HIT, 0);
3455 	} else {
3456 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3457 					 DOORBELL_EN, 0);
3458 	}
3459 
3460 	mqd->cp_hqd_pq_doorbell_control = tmp;
3461 
3462 	/* disable the queue if it's active */
3463 	ring->wptr = 0;
3464 	mqd->cp_hqd_dequeue_request = 0;
3465 	mqd->cp_hqd_pq_rptr = 0;
3466 	mqd->cp_hqd_pq_wptr_lo = 0;
3467 	mqd->cp_hqd_pq_wptr_hi = 0;
3468 
3469 	/* set the pointer to the MQD */
3470 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3471 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3472 
3473 	/* set MQD vmid to 0 */
3474 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3475 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3476 	mqd->cp_mqd_control = tmp;
3477 
3478 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3479 	hqd_gpu_addr = ring->gpu_addr >> 8;
3480 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3481 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3482 
3483 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3484 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3485 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3486 			    (order_base_2(ring->ring_size / 4) - 1));
3487 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3488 			(order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3489 #ifdef __BIG_ENDIAN
3490 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3491 #endif
3492 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3493 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3494 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3495 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3496 	mqd->cp_hqd_pq_control = tmp;
3497 
3498 	/* set the wb address whether it's enabled or not */
3499 	wb_gpu_addr = ring->rptr_gpu_addr;
3500 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3501 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3502 		upper_32_bits(wb_gpu_addr) & 0xffff;
3503 
3504 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3505 	wb_gpu_addr = ring->wptr_gpu_addr;
3506 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3507 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3508 
3509 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3510 	ring->wptr = 0;
3511 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3512 
3513 	/* set the vmid for the queue */
3514 	mqd->cp_hqd_vmid = 0;
3515 
3516 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3517 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3518 	mqd->cp_hqd_persistent_state = tmp;
3519 
3520 	/* set MIN_IB_AVAIL_SIZE */
3521 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3522 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3523 	mqd->cp_hqd_ib_control = tmp;
3524 
3525 	/* set static priority for a queue/ring */
3526 	gfx_v9_0_mqd_set_priority(ring, mqd);
3527 	mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3528 
3529 	/* map_queues packet doesn't need activate the queue,
3530 	 * so only kiq need set this field.
3531 	 */
3532 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3533 		mqd->cp_hqd_active = 1;
3534 
3535 	return 0;
3536 }
3537 
3538 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3539 {
3540 	struct amdgpu_device *adev = ring->adev;
3541 	struct v9_mqd *mqd = ring->mqd_ptr;
3542 	int j;
3543 
3544 	/* disable wptr polling */
3545 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3546 
3547 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3548 	       mqd->cp_hqd_eop_base_addr_lo);
3549 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3550 	       mqd->cp_hqd_eop_base_addr_hi);
3551 
3552 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3553 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3554 	       mqd->cp_hqd_eop_control);
3555 
3556 	/* enable doorbell? */
3557 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3558 	       mqd->cp_hqd_pq_doorbell_control);
3559 
3560 	/* disable the queue if it's active */
3561 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3562 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3563 		for (j = 0; j < adev->usec_timeout; j++) {
3564 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3565 				break;
3566 			udelay(1);
3567 		}
3568 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3569 		       mqd->cp_hqd_dequeue_request);
3570 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3571 		       mqd->cp_hqd_pq_rptr);
3572 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3573 		       mqd->cp_hqd_pq_wptr_lo);
3574 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3575 		       mqd->cp_hqd_pq_wptr_hi);
3576 	}
3577 
3578 	/* set the pointer to the MQD */
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3580 	       mqd->cp_mqd_base_addr_lo);
3581 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3582 	       mqd->cp_mqd_base_addr_hi);
3583 
3584 	/* set MQD vmid to 0 */
3585 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3586 	       mqd->cp_mqd_control);
3587 
3588 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3589 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3590 	       mqd->cp_hqd_pq_base_lo);
3591 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3592 	       mqd->cp_hqd_pq_base_hi);
3593 
3594 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3595 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3596 	       mqd->cp_hqd_pq_control);
3597 
3598 	/* set the wb address whether it's enabled or not */
3599 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3600 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3601 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3602 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3603 
3604 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3605 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3606 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3607 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3608 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3609 
3610 	/* enable the doorbell if requested */
3611 	if (ring->use_doorbell) {
3612 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3613 					(adev->doorbell_index.kiq * 2) << 2);
3614 		/* If GC has entered CGPG, ringing doorbell > first page
3615 		 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3616 		 * workaround this issue. And this change has to align with firmware
3617 		 * update.
3618 		 */
3619 		if (check_if_enlarge_doorbell_range(adev))
3620 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3621 					(adev->doorbell.size - 4));
3622 		else
3623 			WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3624 					(adev->doorbell_index.userqueue_end * 2) << 2);
3625 	}
3626 
3627 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3628 	       mqd->cp_hqd_pq_doorbell_control);
3629 
3630 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3631 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3632 	       mqd->cp_hqd_pq_wptr_lo);
3633 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3634 	       mqd->cp_hqd_pq_wptr_hi);
3635 
3636 	/* set the vmid for the queue */
3637 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3638 
3639 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3640 	       mqd->cp_hqd_persistent_state);
3641 
3642 	/* activate the queue */
3643 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3644 	       mqd->cp_hqd_active);
3645 
3646 	if (ring->use_doorbell)
3647 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3648 
3649 	return 0;
3650 }
3651 
3652 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3653 {
3654 	struct amdgpu_device *adev = ring->adev;
3655 	int j;
3656 
3657 	/* disable the queue if it's active */
3658 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3659 
3660 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3661 
3662 		for (j = 0; j < adev->usec_timeout; j++) {
3663 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3664 				break;
3665 			udelay(1);
3666 		}
3667 
3668 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3669 			DRM_DEBUG("KIQ dequeue request failed.\n");
3670 
3671 			/* Manual disable if dequeue request times out */
3672 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3673 		}
3674 
3675 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3676 		      0);
3677 	}
3678 
3679 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3680 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3681 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3682 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3683 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3684 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3685 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3686 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3687 
3688 	return 0;
3689 }
3690 
3691 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3692 {
3693 	struct amdgpu_device *adev = ring->adev;
3694 	struct v9_mqd *mqd = ring->mqd_ptr;
3695 	struct v9_mqd *tmp_mqd;
3696 
3697 	gfx_v9_0_kiq_setting(ring);
3698 
3699 	/* GPU could be in bad state during probe, driver trigger the reset
3700 	 * after load the SMU, in this case , the mqd is not be initialized.
3701 	 * driver need to re-init the mqd.
3702 	 * check mqd->cp_hqd_pq_control since this value should not be 0
3703 	 */
3704 	tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3705 	if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3706 		/* for GPU_RESET case , reset MQD to a clean status */
3707 		if (adev->gfx.kiq[0].mqd_backup)
3708 			memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3709 
3710 		/* reset ring buffer */
3711 		ring->wptr = 0;
3712 		amdgpu_ring_clear_ring(ring);
3713 
3714 		mutex_lock(&adev->srbm_mutex);
3715 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3716 		gfx_v9_0_kiq_init_register(ring);
3717 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3718 		mutex_unlock(&adev->srbm_mutex);
3719 	} else {
3720 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3721 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3722 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3723 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3724 			amdgpu_ring_clear_ring(ring);
3725 		mutex_lock(&adev->srbm_mutex);
3726 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3727 		gfx_v9_0_mqd_init(ring);
3728 		gfx_v9_0_kiq_init_register(ring);
3729 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3730 		mutex_unlock(&adev->srbm_mutex);
3731 
3732 		if (adev->gfx.kiq[0].mqd_backup)
3733 			memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3734 	}
3735 
3736 	return 0;
3737 }
3738 
3739 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3740 {
3741 	struct amdgpu_device *adev = ring->adev;
3742 	struct v9_mqd *mqd = ring->mqd_ptr;
3743 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3744 	struct v9_mqd *tmp_mqd;
3745 
3746 	/* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3747 	 * is not be initialized before
3748 	 */
3749 	tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3750 
3751 	if (!tmp_mqd->cp_hqd_pq_control ||
3752 	    (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3753 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3754 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3755 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3756 		mutex_lock(&adev->srbm_mutex);
3757 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3758 		gfx_v9_0_mqd_init(ring);
3759 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3760 		mutex_unlock(&adev->srbm_mutex);
3761 
3762 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3763 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3764 	} else {
3765 		/* restore MQD to a clean status */
3766 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3767 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3768 		/* reset ring buffer */
3769 		ring->wptr = 0;
3770 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3771 		amdgpu_ring_clear_ring(ring);
3772 	}
3773 
3774 	return 0;
3775 }
3776 
3777 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3778 {
3779 	struct amdgpu_ring *ring;
3780 	int r;
3781 
3782 	ring = &adev->gfx.kiq[0].ring;
3783 
3784 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3785 	if (unlikely(r != 0))
3786 		return r;
3787 
3788 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3789 	if (unlikely(r != 0)) {
3790 		amdgpu_bo_unreserve(ring->mqd_obj);
3791 		return r;
3792 	}
3793 
3794 	gfx_v9_0_kiq_init_queue(ring);
3795 	amdgpu_bo_kunmap(ring->mqd_obj);
3796 	ring->mqd_ptr = NULL;
3797 	amdgpu_bo_unreserve(ring->mqd_obj);
3798 	return 0;
3799 }
3800 
3801 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3802 {
3803 	struct amdgpu_ring *ring = NULL;
3804 	int r = 0, i;
3805 
3806 	gfx_v9_0_cp_compute_enable(adev, true);
3807 
3808 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3809 		ring = &adev->gfx.compute_ring[i];
3810 
3811 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3812 		if (unlikely(r != 0))
3813 			goto done;
3814 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3815 		if (!r) {
3816 			r = gfx_v9_0_kcq_init_queue(ring);
3817 			amdgpu_bo_kunmap(ring->mqd_obj);
3818 			ring->mqd_ptr = NULL;
3819 		}
3820 		amdgpu_bo_unreserve(ring->mqd_obj);
3821 		if (r)
3822 			goto done;
3823 	}
3824 
3825 	r = amdgpu_gfx_enable_kcq(adev, 0);
3826 done:
3827 	return r;
3828 }
3829 
3830 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3831 {
3832 	int r, i;
3833 	struct amdgpu_ring *ring;
3834 
3835 	if (!(adev->flags & AMD_IS_APU))
3836 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3837 
3838 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3839 		if (adev->gfx.num_gfx_rings) {
3840 			/* legacy firmware loading */
3841 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3842 			if (r)
3843 				return r;
3844 		}
3845 
3846 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3847 		if (r)
3848 			return r;
3849 	}
3850 
3851 	r = gfx_v9_0_kiq_resume(adev);
3852 	if (r)
3853 		return r;
3854 
3855 	if (adev->gfx.num_gfx_rings) {
3856 		r = gfx_v9_0_cp_gfx_resume(adev);
3857 		if (r)
3858 			return r;
3859 	}
3860 
3861 	r = gfx_v9_0_kcq_resume(adev);
3862 	if (r)
3863 		return r;
3864 
3865 	if (adev->gfx.num_gfx_rings) {
3866 		ring = &adev->gfx.gfx_ring[0];
3867 		r = amdgpu_ring_test_helper(ring);
3868 		if (r)
3869 			return r;
3870 	}
3871 
3872 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3873 		ring = &adev->gfx.compute_ring[i];
3874 		amdgpu_ring_test_helper(ring);
3875 	}
3876 
3877 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3878 
3879 	return 0;
3880 }
3881 
3882 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3883 {
3884 	u32 tmp;
3885 
3886 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
3887 	    amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
3888 		return;
3889 
3890 	tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3891 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3892 				adev->df.hash_status.hash_64k);
3893 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3894 				adev->df.hash_status.hash_2m);
3895 	tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3896 				adev->df.hash_status.hash_1g);
3897 	WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3898 }
3899 
3900 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3901 {
3902 	if (adev->gfx.num_gfx_rings)
3903 		gfx_v9_0_cp_gfx_enable(adev, enable);
3904 	gfx_v9_0_cp_compute_enable(adev, enable);
3905 }
3906 
3907 static int gfx_v9_0_hw_init(void *handle)
3908 {
3909 	int r;
3910 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3911 
3912 	if (!amdgpu_sriov_vf(adev))
3913 		gfx_v9_0_init_golden_registers(adev);
3914 
3915 	gfx_v9_0_constants_init(adev);
3916 
3917 	gfx_v9_0_init_tcp_config(adev);
3918 
3919 	r = adev->gfx.rlc.funcs->resume(adev);
3920 	if (r)
3921 		return r;
3922 
3923 	r = gfx_v9_0_cp_resume(adev);
3924 	if (r)
3925 		return r;
3926 
3927 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
3928 		gfx_v9_4_2_set_power_brake_sequence(adev);
3929 
3930 	return r;
3931 }
3932 
3933 static int gfx_v9_0_hw_fini(void *handle)
3934 {
3935 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3936 
3937 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3938 		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3939 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3940 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3941 
3942 	/* DF freeze and kcq disable will fail */
3943 	if (!amdgpu_ras_intr_triggered())
3944 		/* disable KCQ to avoid CPC touch memory not valid anymore */
3945 		amdgpu_gfx_disable_kcq(adev, 0);
3946 
3947 	if (amdgpu_sriov_vf(adev)) {
3948 		gfx_v9_0_cp_gfx_enable(adev, false);
3949 		/* must disable polling for SRIOV when hw finished, otherwise
3950 		 * CPC engine may still keep fetching WB address which is already
3951 		 * invalid after sw finished and trigger DMAR reading error in
3952 		 * hypervisor side.
3953 		 */
3954 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3955 		return 0;
3956 	}
3957 
3958 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3959 	 * otherwise KIQ is hanging when binding back
3960 	 */
3961 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3962 		mutex_lock(&adev->srbm_mutex);
3963 		soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
3964 				adev->gfx.kiq[0].ring.pipe,
3965 				adev->gfx.kiq[0].ring.queue, 0, 0);
3966 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
3967 		soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3968 		mutex_unlock(&adev->srbm_mutex);
3969 	}
3970 
3971 	gfx_v9_0_cp_enable(adev, false);
3972 
3973 	/* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
3974 	if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
3975 	    (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
3976 		dev_dbg(adev->dev, "Skipping RLC halt\n");
3977 		return 0;
3978 	}
3979 
3980 	adev->gfx.rlc.funcs->stop(adev);
3981 	return 0;
3982 }
3983 
3984 static int gfx_v9_0_suspend(void *handle)
3985 {
3986 	return gfx_v9_0_hw_fini(handle);
3987 }
3988 
3989 static int gfx_v9_0_resume(void *handle)
3990 {
3991 	return gfx_v9_0_hw_init(handle);
3992 }
3993 
3994 static bool gfx_v9_0_is_idle(void *handle)
3995 {
3996 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3997 
3998 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3999 				GRBM_STATUS, GUI_ACTIVE))
4000 		return false;
4001 	else
4002 		return true;
4003 }
4004 
4005 static int gfx_v9_0_wait_for_idle(void *handle)
4006 {
4007 	unsigned i;
4008 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4009 
4010 	for (i = 0; i < adev->usec_timeout; i++) {
4011 		if (gfx_v9_0_is_idle(handle))
4012 			return 0;
4013 		udelay(1);
4014 	}
4015 	return -ETIMEDOUT;
4016 }
4017 
4018 static int gfx_v9_0_soft_reset(void *handle)
4019 {
4020 	u32 grbm_soft_reset = 0;
4021 	u32 tmp;
4022 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4023 
4024 	/* GRBM_STATUS */
4025 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4026 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4027 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4028 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4029 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4030 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4031 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4032 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4033 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4034 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4035 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4036 	}
4037 
4038 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4039 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4040 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4041 	}
4042 
4043 	/* GRBM_STATUS2 */
4044 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4045 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4046 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4047 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4048 
4049 
4050 	if (grbm_soft_reset) {
4051 		/* stop the rlc */
4052 		adev->gfx.rlc.funcs->stop(adev);
4053 
4054 		if (adev->gfx.num_gfx_rings)
4055 			/* Disable GFX parsing/prefetching */
4056 			gfx_v9_0_cp_gfx_enable(adev, false);
4057 
4058 		/* Disable MEC parsing/prefetching */
4059 		gfx_v9_0_cp_compute_enable(adev, false);
4060 
4061 		if (grbm_soft_reset) {
4062 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4063 			tmp |= grbm_soft_reset;
4064 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4065 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4066 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4067 
4068 			udelay(50);
4069 
4070 			tmp &= ~grbm_soft_reset;
4071 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4072 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4073 		}
4074 
4075 		/* Wait a little for things to settle down */
4076 		udelay(50);
4077 	}
4078 	return 0;
4079 }
4080 
4081 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4082 {
4083 	signed long r, cnt = 0;
4084 	unsigned long flags;
4085 	uint32_t seq, reg_val_offs = 0;
4086 	uint64_t value = 0;
4087 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4088 	struct amdgpu_ring *ring = &kiq->ring;
4089 
4090 	BUG_ON(!ring->funcs->emit_rreg);
4091 
4092 	spin_lock_irqsave(&kiq->ring_lock, flags);
4093 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4094 		pr_err("critical bug! too many kiq readers\n");
4095 		goto failed_unlock;
4096 	}
4097 	amdgpu_ring_alloc(ring, 32);
4098 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4099 	amdgpu_ring_write(ring, 9 |	/* src: register*/
4100 				(5 << 8) |	/* dst: memory */
4101 				(1 << 16) |	/* count sel */
4102 				(1 << 20));	/* write confirm */
4103 	amdgpu_ring_write(ring, 0);
4104 	amdgpu_ring_write(ring, 0);
4105 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4106 				reg_val_offs * 4));
4107 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4108 				reg_val_offs * 4));
4109 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4110 	if (r)
4111 		goto failed_undo;
4112 
4113 	amdgpu_ring_commit(ring);
4114 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4115 
4116 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4117 
4118 	/* don't wait anymore for gpu reset case because this way may
4119 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4120 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4121 	 * never return if we keep waiting in virt_kiq_rreg, which cause
4122 	 * gpu_recover() hang there.
4123 	 *
4124 	 * also don't wait anymore for IRQ context
4125 	 * */
4126 	if (r < 1 && (amdgpu_in_reset(adev)))
4127 		goto failed_kiq_read;
4128 
4129 	might_sleep();
4130 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4131 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4132 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4133 	}
4134 
4135 	if (cnt > MAX_KIQ_REG_TRY)
4136 		goto failed_kiq_read;
4137 
4138 	mb();
4139 	value = (uint64_t)adev->wb.wb[reg_val_offs] |
4140 		(uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4141 	amdgpu_device_wb_free(adev, reg_val_offs);
4142 	return value;
4143 
4144 failed_undo:
4145 	amdgpu_ring_undo(ring);
4146 failed_unlock:
4147 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
4148 failed_kiq_read:
4149 	if (reg_val_offs)
4150 		amdgpu_device_wb_free(adev, reg_val_offs);
4151 	pr_err("failed to read gpu clock\n");
4152 	return ~0;
4153 }
4154 
4155 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4156 {
4157 	uint64_t clock, clock_lo, clock_hi, hi_check;
4158 
4159 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4160 	case IP_VERSION(9, 3, 0):
4161 		preempt_disable();
4162 		clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4163 		clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4164 		hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4165 		/* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4166 		 * roughly every 42 seconds.
4167 		 */
4168 		if (hi_check != clock_hi) {
4169 			clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4170 			clock_hi = hi_check;
4171 		}
4172 		preempt_enable();
4173 		clock = clock_lo | (clock_hi << 32ULL);
4174 		break;
4175 	default:
4176 		amdgpu_gfx_off_ctrl(adev, false);
4177 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4178 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4179 			    IP_VERSION(9, 0, 1) &&
4180 		    amdgpu_sriov_runtime(adev)) {
4181 			clock = gfx_v9_0_kiq_read_clock(adev);
4182 		} else {
4183 			WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4184 			clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4185 				((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4186 		}
4187 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4188 		amdgpu_gfx_off_ctrl(adev, true);
4189 		break;
4190 	}
4191 	return clock;
4192 }
4193 
4194 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4195 					  uint32_t vmid,
4196 					  uint32_t gds_base, uint32_t gds_size,
4197 					  uint32_t gws_base, uint32_t gws_size,
4198 					  uint32_t oa_base, uint32_t oa_size)
4199 {
4200 	struct amdgpu_device *adev = ring->adev;
4201 
4202 	/* GDS Base */
4203 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4204 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4205 				   gds_base);
4206 
4207 	/* GDS Size */
4208 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4209 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4210 				   gds_size);
4211 
4212 	/* GWS */
4213 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4214 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4215 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4216 
4217 	/* OA */
4218 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4219 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4220 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4221 }
4222 
4223 static const u32 vgpr_init_compute_shader[] =
4224 {
4225 	0xb07c0000, 0xbe8000ff,
4226 	0x000000f8, 0xbf110800,
4227 	0x7e000280, 0x7e020280,
4228 	0x7e040280, 0x7e060280,
4229 	0x7e080280, 0x7e0a0280,
4230 	0x7e0c0280, 0x7e0e0280,
4231 	0x80808800, 0xbe803200,
4232 	0xbf84fff5, 0xbf9c0000,
4233 	0xd28c0001, 0x0001007f,
4234 	0xd28d0001, 0x0002027e,
4235 	0x10020288, 0xb8810904,
4236 	0xb7814000, 0xd1196a01,
4237 	0x00000301, 0xbe800087,
4238 	0xbefc00c1, 0xd89c4000,
4239 	0x00020201, 0xd89cc080,
4240 	0x00040401, 0x320202ff,
4241 	0x00000800, 0x80808100,
4242 	0xbf84fff8, 0x7e020280,
4243 	0xbf810000, 0x00000000,
4244 };
4245 
4246 static const u32 sgpr_init_compute_shader[] =
4247 {
4248 	0xb07c0000, 0xbe8000ff,
4249 	0x0000005f, 0xbee50080,
4250 	0xbe812c65, 0xbe822c65,
4251 	0xbe832c65, 0xbe842c65,
4252 	0xbe852c65, 0xb77c0005,
4253 	0x80808500, 0xbf84fff8,
4254 	0xbe800080, 0xbf810000,
4255 };
4256 
4257 static const u32 vgpr_init_compute_shader_arcturus[] = {
4258 	0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4259 	0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4260 	0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4261 	0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4262 	0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4263 	0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4264 	0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4265 	0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4266 	0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4267 	0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4268 	0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4269 	0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4270 	0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4271 	0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4272 	0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4273 	0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4274 	0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4275 	0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4276 	0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4277 	0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4278 	0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4279 	0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4280 	0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4281 	0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4282 	0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4283 	0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4284 	0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4285 	0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4286 	0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4287 	0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4288 	0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4289 	0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4290 	0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4291 	0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4292 	0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4293 	0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4294 	0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4295 	0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4296 	0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4297 	0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4298 	0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4299 	0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4300 	0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4301 	0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4302 	0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4303 	0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4304 	0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4305 	0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4306 	0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4307 	0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4308 	0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4309 	0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4310 	0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4311 	0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4312 	0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4313 	0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4314 	0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4315 	0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4316 	0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4317 	0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4318 	0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4319 	0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4320 	0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4321 	0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4322 	0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4323 	0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4324 	0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4325 	0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4326 	0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4327 	0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4328 	0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4329 	0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4330 	0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4331 	0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4332 	0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4333 	0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4334 	0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4335 	0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4336 	0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4337 	0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4338 	0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4339 	0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4340 	0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4341 	0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4342 	0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4343 	0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4344 	0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4345 	0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4346 	0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4347 	0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4348 	0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4349 	0xbf84fff8, 0xbf810000,
4350 };
4351 
4352 /* When below register arrays changed, please update gpr_reg_size,
4353   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4354   to cover all gfx9 ASICs */
4355 static const struct soc15_reg_entry vgpr_init_regs[] = {
4356    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4357    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4358    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4359    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4360    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4361    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4362    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4363    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4364    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4365    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4366    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4367    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4368    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4369    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4370 };
4371 
4372 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4373    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4374    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4375    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4376    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4377    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4378    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4379    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4380    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4381    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4382    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4383    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4384    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4385    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4386    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4387 };
4388 
4389 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4390    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4391    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4392    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4393    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4394    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4395    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4396    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4397    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4398    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4399    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4400    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4401    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4402    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4403    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4404 };
4405 
4406 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4407    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4408    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4409    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4410    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4411    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4412    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4413    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4414    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4415    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4416    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4417    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4418    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4419    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4420    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4421 };
4422 
4423 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4424    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4425    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4426    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4427    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4428    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4429    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4430    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4431    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4432    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4433    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4434    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4435    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4436    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4437    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4438    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4439    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4440    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4441    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4442    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4443    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4444    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4445    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4446    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4447    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4448    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4449    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4450    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4451    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4452    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4453    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4454    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4455    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4456    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4457 };
4458 
4459 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4460 {
4461 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4462 	int i, r;
4463 
4464 	/* only support when RAS is enabled */
4465 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4466 		return 0;
4467 
4468 	r = amdgpu_ring_alloc(ring, 7);
4469 	if (r) {
4470 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4471 			ring->name, r);
4472 		return r;
4473 	}
4474 
4475 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4476 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4477 
4478 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4479 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4480 				PACKET3_DMA_DATA_DST_SEL(1) |
4481 				PACKET3_DMA_DATA_SRC_SEL(2) |
4482 				PACKET3_DMA_DATA_ENGINE(0)));
4483 	amdgpu_ring_write(ring, 0);
4484 	amdgpu_ring_write(ring, 0);
4485 	amdgpu_ring_write(ring, 0);
4486 	amdgpu_ring_write(ring, 0);
4487 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4488 				adev->gds.gds_size);
4489 
4490 	amdgpu_ring_commit(ring);
4491 
4492 	for (i = 0; i < adev->usec_timeout; i++) {
4493 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4494 			break;
4495 		udelay(1);
4496 	}
4497 
4498 	if (i >= adev->usec_timeout)
4499 		r = -ETIMEDOUT;
4500 
4501 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4502 
4503 	return r;
4504 }
4505 
4506 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4507 {
4508 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4509 	struct amdgpu_ib ib;
4510 	struct dma_fence *f = NULL;
4511 	int r, i;
4512 	unsigned total_size, vgpr_offset, sgpr_offset;
4513 	u64 gpu_addr;
4514 
4515 	int compute_dim_x = adev->gfx.config.max_shader_engines *
4516 						adev->gfx.config.max_cu_per_sh *
4517 						adev->gfx.config.max_sh_per_se;
4518 	int sgpr_work_group_size = 5;
4519 	int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4520 	int vgpr_init_shader_size;
4521 	const u32 *vgpr_init_shader_ptr;
4522 	const struct soc15_reg_entry *vgpr_init_regs_ptr;
4523 
4524 	/* only support when RAS is enabled */
4525 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4526 		return 0;
4527 
4528 	/* bail if the compute ring is not ready */
4529 	if (!ring->sched.ready)
4530 		return 0;
4531 
4532 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4533 		vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4534 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4535 		vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4536 	} else {
4537 		vgpr_init_shader_ptr = vgpr_init_compute_shader;
4538 		vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4539 		vgpr_init_regs_ptr = vgpr_init_regs;
4540 	}
4541 
4542 	total_size =
4543 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4544 	total_size +=
4545 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4546 	total_size +=
4547 		(gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4548 	total_size = ALIGN(total_size, 256);
4549 	vgpr_offset = total_size;
4550 	total_size += ALIGN(vgpr_init_shader_size, 256);
4551 	sgpr_offset = total_size;
4552 	total_size += sizeof(sgpr_init_compute_shader);
4553 
4554 	/* allocate an indirect buffer to put the commands in */
4555 	memset(&ib, 0, sizeof(ib));
4556 	r = amdgpu_ib_get(adev, NULL, total_size,
4557 					AMDGPU_IB_POOL_DIRECT, &ib);
4558 	if (r) {
4559 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4560 		return r;
4561 	}
4562 
4563 	/* load the compute shaders */
4564 	for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4565 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4566 
4567 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4568 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4569 
4570 	/* init the ib length to 0 */
4571 	ib.length_dw = 0;
4572 
4573 	/* VGPR */
4574 	/* write the register state for the compute dispatch */
4575 	for (i = 0; i < gpr_reg_size; i++) {
4576 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4577 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4578 								- PACKET3_SET_SH_REG_START;
4579 		ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4580 	}
4581 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4582 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4583 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4584 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4585 							- PACKET3_SET_SH_REG_START;
4586 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4587 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4588 
4589 	/* write dispatch packet */
4590 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4591 	ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4592 	ib.ptr[ib.length_dw++] = 1; /* y */
4593 	ib.ptr[ib.length_dw++] = 1; /* z */
4594 	ib.ptr[ib.length_dw++] =
4595 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4596 
4597 	/* write CS partial flush packet */
4598 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4599 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4600 
4601 	/* SGPR1 */
4602 	/* write the register state for the compute dispatch */
4603 	for (i = 0; i < gpr_reg_size; i++) {
4604 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4605 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4606 								- PACKET3_SET_SH_REG_START;
4607 		ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4608 	}
4609 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4610 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4611 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4612 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4613 							- PACKET3_SET_SH_REG_START;
4614 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4615 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4616 
4617 	/* write dispatch packet */
4618 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4619 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4620 	ib.ptr[ib.length_dw++] = 1; /* y */
4621 	ib.ptr[ib.length_dw++] = 1; /* z */
4622 	ib.ptr[ib.length_dw++] =
4623 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4624 
4625 	/* write CS partial flush packet */
4626 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4627 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4628 
4629 	/* SGPR2 */
4630 	/* write the register state for the compute dispatch */
4631 	for (i = 0; i < gpr_reg_size; i++) {
4632 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4633 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4634 								- PACKET3_SET_SH_REG_START;
4635 		ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4636 	}
4637 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4638 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4639 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4640 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4641 							- PACKET3_SET_SH_REG_START;
4642 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4643 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4644 
4645 	/* write dispatch packet */
4646 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4647 	ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4648 	ib.ptr[ib.length_dw++] = 1; /* y */
4649 	ib.ptr[ib.length_dw++] = 1; /* z */
4650 	ib.ptr[ib.length_dw++] =
4651 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4652 
4653 	/* write CS partial flush packet */
4654 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4655 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4656 
4657 	/* shedule the ib on the ring */
4658 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4659 	if (r) {
4660 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4661 		goto fail;
4662 	}
4663 
4664 	/* wait for the GPU to finish processing the IB */
4665 	r = dma_fence_wait(f, false);
4666 	if (r) {
4667 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4668 		goto fail;
4669 	}
4670 
4671 fail:
4672 	amdgpu_ib_free(adev, &ib, NULL);
4673 	dma_fence_put(f);
4674 
4675 	return r;
4676 }
4677 
4678 static int gfx_v9_0_early_init(void *handle)
4679 {
4680 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4681 
4682 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4683 
4684 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4685 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4686 		adev->gfx.num_gfx_rings = 0;
4687 	else
4688 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4689 	adev->gfx.xcc_mask = 1;
4690 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4691 					  AMDGPU_MAX_COMPUTE_RINGS);
4692 	gfx_v9_0_set_kiq_pm4_funcs(adev);
4693 	gfx_v9_0_set_ring_funcs(adev);
4694 	gfx_v9_0_set_irq_funcs(adev);
4695 	gfx_v9_0_set_gds_init(adev);
4696 	gfx_v9_0_set_rlc_funcs(adev);
4697 
4698 	/* init rlcg reg access ctrl */
4699 	gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4700 
4701 	return gfx_v9_0_init_microcode(adev);
4702 }
4703 
4704 static int gfx_v9_0_ecc_late_init(void *handle)
4705 {
4706 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4707 	int r;
4708 
4709 	/*
4710 	 * Temp workaround to fix the issue that CP firmware fails to
4711 	 * update read pointer when CPDMA is writing clearing operation
4712 	 * to GDS in suspend/resume sequence on several cards. So just
4713 	 * limit this operation in cold boot sequence.
4714 	 */
4715 	if ((!adev->in_suspend) &&
4716 	    (adev->gds.gds_size)) {
4717 		r = gfx_v9_0_do_edc_gds_workarounds(adev);
4718 		if (r)
4719 			return r;
4720 	}
4721 
4722 	/* requires IBs so do in late init after IB pool is initialized */
4723 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4724 		r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4725 	else
4726 		r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4727 
4728 	if (r)
4729 		return r;
4730 
4731 	if (adev->gfx.ras &&
4732 	    adev->gfx.ras->enable_watchdog_timer)
4733 		adev->gfx.ras->enable_watchdog_timer(adev);
4734 
4735 	return 0;
4736 }
4737 
4738 static int gfx_v9_0_late_init(void *handle)
4739 {
4740 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4741 	int r;
4742 
4743 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4744 	if (r)
4745 		return r;
4746 
4747 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4748 	if (r)
4749 		return r;
4750 
4751 	r = gfx_v9_0_ecc_late_init(handle);
4752 	if (r)
4753 		return r;
4754 
4755 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4756 		gfx_v9_4_2_debug_trap_config_init(adev,
4757 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4758 	else
4759 		gfx_v9_0_debug_trap_config_init(adev,
4760 			adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4761 
4762 	return 0;
4763 }
4764 
4765 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4766 {
4767 	uint32_t rlc_setting;
4768 
4769 	/* if RLC is not enabled, do nothing */
4770 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4771 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4772 		return false;
4773 
4774 	return true;
4775 }
4776 
4777 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4778 {
4779 	uint32_t data;
4780 	unsigned i;
4781 
4782 	data = RLC_SAFE_MODE__CMD_MASK;
4783 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4784 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4785 
4786 	/* wait for RLC_SAFE_MODE */
4787 	for (i = 0; i < adev->usec_timeout; i++) {
4788 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4789 			break;
4790 		udelay(1);
4791 	}
4792 }
4793 
4794 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4795 {
4796 	uint32_t data;
4797 
4798 	data = RLC_SAFE_MODE__CMD_MASK;
4799 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4800 }
4801 
4802 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4803 						bool enable)
4804 {
4805 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4806 
4807 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4808 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4809 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4810 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4811 	} else {
4812 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4813 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4814 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4815 	}
4816 
4817 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4818 }
4819 
4820 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4821 						bool enable)
4822 {
4823 	/* TODO: double check if we need to perform under safe mode */
4824 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4825 
4826 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4827 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4828 	else
4829 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4830 
4831 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4832 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4833 	else
4834 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4835 
4836 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4837 }
4838 
4839 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4840 						      bool enable)
4841 {
4842 	uint32_t data, def;
4843 
4844 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4845 
4846 	/* It is disabled by HW by default */
4847 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4848 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4849 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4850 
4851 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4852 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4853 
4854 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4855 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4856 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4857 
4858 		/* only for Vega10 & Raven1 */
4859 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4860 
4861 		if (def != data)
4862 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4863 
4864 		/* MGLS is a global flag to control all MGLS in GFX */
4865 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4866 			/* 2 - RLC memory Light sleep */
4867 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4868 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4869 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4870 				if (def != data)
4871 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4872 			}
4873 			/* 3 - CP memory Light sleep */
4874 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4875 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4876 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4877 				if (def != data)
4878 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4879 			}
4880 		}
4881 	} else {
4882 		/* 1 - MGCG_OVERRIDE */
4883 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4884 
4885 		if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4886 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4887 
4888 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4889 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4890 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4891 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4892 
4893 		if (def != data)
4894 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4895 
4896 		/* 2 - disable MGLS in RLC */
4897 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4898 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4899 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4900 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4901 		}
4902 
4903 		/* 3 - disable MGLS in CP */
4904 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4905 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4906 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4907 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4908 		}
4909 	}
4910 
4911 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4912 }
4913 
4914 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4915 					   bool enable)
4916 {
4917 	uint32_t data, def;
4918 
4919 	if (!adev->gfx.num_gfx_rings)
4920 		return;
4921 
4922 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4923 
4924 	/* Enable 3D CGCG/CGLS */
4925 	if (enable) {
4926 		/* write cmd to clear cgcg/cgls ov */
4927 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4928 		/* unset CGCG override */
4929 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4930 		/* update CGCG and CGLS override bits */
4931 		if (def != data)
4932 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4933 
4934 		/* enable 3Dcgcg FSM(0x0000363f) */
4935 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4936 
4937 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
4938 			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4939 				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4940 		else
4941 			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
4942 
4943 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4944 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4945 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4946 		if (def != data)
4947 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4948 
4949 		/* set IDLE_POLL_COUNT(0x00900100) */
4950 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4951 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4952 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4953 		if (def != data)
4954 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4955 	} else {
4956 		/* Disable CGCG/CGLS */
4957 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4958 		/* disable cgcg, cgls should be disabled */
4959 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4960 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4961 		/* disable cgcg and cgls in FSM */
4962 		if (def != data)
4963 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4964 	}
4965 
4966 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4967 }
4968 
4969 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4970 						      bool enable)
4971 {
4972 	uint32_t def, data;
4973 
4974 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4975 
4976 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4977 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4978 		/* unset CGCG override */
4979 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4980 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4981 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4982 		else
4983 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4984 		/* update CGCG and CGLS override bits */
4985 		if (def != data)
4986 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4987 
4988 		/* enable cgcg FSM(0x0000363F) */
4989 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4990 
4991 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
4992 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4993 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4994 		else
4995 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4996 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4997 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4998 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4999 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5000 		if (def != data)
5001 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5002 
5003 		/* set IDLE_POLL_COUNT(0x00900100) */
5004 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5005 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5006 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5007 		if (def != data)
5008 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5009 	} else {
5010 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5011 		/* reset CGCG/CGLS bits */
5012 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5013 		/* disable cgcg and cgls in FSM */
5014 		if (def != data)
5015 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5016 	}
5017 
5018 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5019 }
5020 
5021 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5022 					    bool enable)
5023 {
5024 	if (enable) {
5025 		/* CGCG/CGLS should be enabled after MGCG/MGLS
5026 		 * ===  MGCG + MGLS ===
5027 		 */
5028 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5029 		/* ===  CGCG /CGLS for GFX 3D Only === */
5030 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5031 		/* ===  CGCG + CGLS === */
5032 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5033 	} else {
5034 		/* CGCG/CGLS should be disabled before MGCG/MGLS
5035 		 * ===  CGCG + CGLS ===
5036 		 */
5037 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5038 		/* ===  CGCG /CGLS for GFX 3D Only === */
5039 		gfx_v9_0_update_3d_clock_gating(adev, enable);
5040 		/* ===  MGCG + MGLS === */
5041 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5042 	}
5043 	return 0;
5044 }
5045 
5046 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5047 					      unsigned int vmid)
5048 {
5049 	u32 reg, data;
5050 
5051 	reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5052 	if (amdgpu_sriov_is_pp_one_vf(adev))
5053 		data = RREG32_NO_KIQ(reg);
5054 	else
5055 		data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5056 
5057 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5058 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5059 
5060 	if (amdgpu_sriov_is_pp_one_vf(adev))
5061 		WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5062 	else
5063 		WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5064 }
5065 
5066 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5067 {
5068 	amdgpu_gfx_off_ctrl(adev, false);
5069 
5070 	gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5071 
5072 	amdgpu_gfx_off_ctrl(adev, true);
5073 }
5074 
5075 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5076 					uint32_t offset,
5077 					struct soc15_reg_rlcg *entries, int arr_size)
5078 {
5079 	int i;
5080 	uint32_t reg;
5081 
5082 	if (!entries)
5083 		return false;
5084 
5085 	for (i = 0; i < arr_size; i++) {
5086 		const struct soc15_reg_rlcg *entry;
5087 
5088 		entry = &entries[i];
5089 		reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5090 		if (offset == reg)
5091 			return true;
5092 	}
5093 
5094 	return false;
5095 }
5096 
5097 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5098 {
5099 	return gfx_v9_0_check_rlcg_range(adev, offset,
5100 					(void *)rlcg_access_gc_9_0,
5101 					ARRAY_SIZE(rlcg_access_gc_9_0));
5102 }
5103 
5104 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5105 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5106 	.set_safe_mode = gfx_v9_0_set_safe_mode,
5107 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
5108 	.init = gfx_v9_0_rlc_init,
5109 	.get_csb_size = gfx_v9_0_get_csb_size,
5110 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
5111 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5112 	.resume = gfx_v9_0_rlc_resume,
5113 	.stop = gfx_v9_0_rlc_stop,
5114 	.reset = gfx_v9_0_rlc_reset,
5115 	.start = gfx_v9_0_rlc_start,
5116 	.update_spm_vmid = gfx_v9_0_update_spm_vmid,
5117 	.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5118 };
5119 
5120 static int gfx_v9_0_set_powergating_state(void *handle,
5121 					  enum amd_powergating_state state)
5122 {
5123 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5124 	bool enable = (state == AMD_PG_STATE_GATE);
5125 
5126 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5127 	case IP_VERSION(9, 2, 2):
5128 	case IP_VERSION(9, 1, 0):
5129 	case IP_VERSION(9, 3, 0):
5130 		if (!enable)
5131 			amdgpu_gfx_off_ctrl(adev, false);
5132 
5133 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5134 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5135 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5136 		} else {
5137 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5138 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5139 		}
5140 
5141 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5142 			gfx_v9_0_enable_cp_power_gating(adev, true);
5143 		else
5144 			gfx_v9_0_enable_cp_power_gating(adev, false);
5145 
5146 		/* update gfx cgpg state */
5147 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5148 
5149 		/* update mgcg state */
5150 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5151 
5152 		if (enable)
5153 			amdgpu_gfx_off_ctrl(adev, true);
5154 		break;
5155 	case IP_VERSION(9, 2, 1):
5156 		amdgpu_gfx_off_ctrl(adev, enable);
5157 		break;
5158 	default:
5159 		break;
5160 	}
5161 
5162 	return 0;
5163 }
5164 
5165 static int gfx_v9_0_set_clockgating_state(void *handle,
5166 					  enum amd_clockgating_state state)
5167 {
5168 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5169 
5170 	if (amdgpu_sriov_vf(adev))
5171 		return 0;
5172 
5173 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5174 	case IP_VERSION(9, 0, 1):
5175 	case IP_VERSION(9, 2, 1):
5176 	case IP_VERSION(9, 4, 0):
5177 	case IP_VERSION(9, 2, 2):
5178 	case IP_VERSION(9, 1, 0):
5179 	case IP_VERSION(9, 4, 1):
5180 	case IP_VERSION(9, 3, 0):
5181 	case IP_VERSION(9, 4, 2):
5182 		gfx_v9_0_update_gfx_clock_gating(adev,
5183 						 state == AMD_CG_STATE_GATE);
5184 		break;
5185 	default:
5186 		break;
5187 	}
5188 	return 0;
5189 }
5190 
5191 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5192 {
5193 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5194 	int data;
5195 
5196 	if (amdgpu_sriov_vf(adev))
5197 		*flags = 0;
5198 
5199 	/* AMD_CG_SUPPORT_GFX_MGCG */
5200 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5201 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5202 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5203 
5204 	/* AMD_CG_SUPPORT_GFX_CGCG */
5205 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5206 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5207 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5208 
5209 	/* AMD_CG_SUPPORT_GFX_CGLS */
5210 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5211 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5212 
5213 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
5214 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5215 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5216 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5217 
5218 	/* AMD_CG_SUPPORT_GFX_CP_LS */
5219 	data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5220 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5221 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5222 
5223 	if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5224 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5225 		data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5226 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5227 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5228 
5229 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5230 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5231 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5232 	}
5233 }
5234 
5235 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5236 {
5237 	return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5238 }
5239 
5240 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5241 {
5242 	struct amdgpu_device *adev = ring->adev;
5243 	u64 wptr;
5244 
5245 	/* XXX check if swapping is necessary on BE */
5246 	if (ring->use_doorbell) {
5247 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5248 	} else {
5249 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5250 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5251 	}
5252 
5253 	return wptr;
5254 }
5255 
5256 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5257 {
5258 	struct amdgpu_device *adev = ring->adev;
5259 
5260 	if (ring->use_doorbell) {
5261 		/* XXX check if swapping is necessary on BE */
5262 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5263 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5264 	} else {
5265 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5266 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5267 	}
5268 }
5269 
5270 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5271 {
5272 	struct amdgpu_device *adev = ring->adev;
5273 	u32 ref_and_mask, reg_mem_engine;
5274 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5275 
5276 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5277 		switch (ring->me) {
5278 		case 1:
5279 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5280 			break;
5281 		case 2:
5282 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5283 			break;
5284 		default:
5285 			return;
5286 		}
5287 		reg_mem_engine = 0;
5288 	} else {
5289 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5290 		reg_mem_engine = 1; /* pfp */
5291 	}
5292 
5293 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5294 			      adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5295 			      adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5296 			      ref_and_mask, ref_and_mask, 0x20);
5297 }
5298 
5299 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5300 					struct amdgpu_job *job,
5301 					struct amdgpu_ib *ib,
5302 					uint32_t flags)
5303 {
5304 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5305 	u32 header, control = 0;
5306 
5307 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5308 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5309 	else
5310 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5311 
5312 	control |= ib->length_dw | (vmid << 24);
5313 
5314 	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5315 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5316 
5317 		if (flags & AMDGPU_IB_PREEMPTED)
5318 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5319 
5320 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5321 			gfx_v9_0_ring_emit_de_meta(ring,
5322 						   (!amdgpu_sriov_vf(ring->adev) &&
5323 						   flags & AMDGPU_IB_PREEMPTED) ?
5324 						   true : false,
5325 						   job->gds_size > 0 && job->gds_base != 0);
5326 	}
5327 
5328 	amdgpu_ring_write(ring, header);
5329 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5330 	amdgpu_ring_write(ring,
5331 #ifdef __BIG_ENDIAN
5332 		(2 << 0) |
5333 #endif
5334 		lower_32_bits(ib->gpu_addr));
5335 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5336 	amdgpu_ring_ib_on_emit_cntl(ring);
5337 	amdgpu_ring_write(ring, control);
5338 }
5339 
5340 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5341 				     unsigned offset)
5342 {
5343 	u32 control = ring->ring[offset];
5344 
5345 	control |= INDIRECT_BUFFER_PRE_RESUME(1);
5346 	ring->ring[offset] = control;
5347 }
5348 
5349 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5350 					unsigned offset)
5351 {
5352 	struct amdgpu_device *adev = ring->adev;
5353 	void *ce_payload_cpu_addr;
5354 	uint64_t payload_offset, payload_size;
5355 
5356 	payload_size = sizeof(struct v9_ce_ib_state);
5357 
5358 	if (ring->is_mes_queue) {
5359 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5360 					  gfx[0].gfx_meta_data) +
5361 			offsetof(struct v9_gfx_meta_data, ce_payload);
5362 		ce_payload_cpu_addr =
5363 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5364 	} else {
5365 		payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5366 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5367 	}
5368 
5369 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5370 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5371 	} else {
5372 		memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5373 		       (ring->buf_mask + 1 - offset) << 2);
5374 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5375 		memcpy((void *)&ring->ring[0],
5376 		       ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5377 		       payload_size);
5378 	}
5379 }
5380 
5381 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5382 					unsigned offset)
5383 {
5384 	struct amdgpu_device *adev = ring->adev;
5385 	void *de_payload_cpu_addr;
5386 	uint64_t payload_offset, payload_size;
5387 
5388 	payload_size = sizeof(struct v9_de_ib_state);
5389 
5390 	if (ring->is_mes_queue) {
5391 		payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5392 					  gfx[0].gfx_meta_data) +
5393 			offsetof(struct v9_gfx_meta_data, de_payload);
5394 		de_payload_cpu_addr =
5395 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5396 	} else {
5397 		payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5398 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5399 	}
5400 
5401 	((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5402 		IB_COMPLETION_STATUS_PREEMPTED;
5403 
5404 	if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5405 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5406 	} else {
5407 		memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5408 		       (ring->buf_mask + 1 - offset) << 2);
5409 		payload_size -= (ring->buf_mask + 1 - offset) << 2;
5410 		memcpy((void *)&ring->ring[0],
5411 		       de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5412 		       payload_size);
5413 	}
5414 }
5415 
5416 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5417 					  struct amdgpu_job *job,
5418 					  struct amdgpu_ib *ib,
5419 					  uint32_t flags)
5420 {
5421 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5422 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5423 
5424 	/* Currently, there is a high possibility to get wave ID mismatch
5425 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5426 	 * different wave IDs than the GDS expects. This situation happens
5427 	 * randomly when at least 5 compute pipes use GDS ordered append.
5428 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5429 	 * Those are probably bugs somewhere else in the kernel driver.
5430 	 *
5431 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5432 	 * GDS to 0 for this ring (me/pipe).
5433 	 */
5434 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5435 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5436 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5437 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5438 	}
5439 
5440 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5441 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5442 	amdgpu_ring_write(ring,
5443 #ifdef __BIG_ENDIAN
5444 				(2 << 0) |
5445 #endif
5446 				lower_32_bits(ib->gpu_addr));
5447 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5448 	amdgpu_ring_write(ring, control);
5449 }
5450 
5451 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5452 				     u64 seq, unsigned flags)
5453 {
5454 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5455 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5456 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5457 	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5458 	uint32_t dw2 = 0;
5459 
5460 	/* RELEASE_MEM - flush caches, send int */
5461 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5462 
5463 	if (writeback) {
5464 		dw2 = EOP_TC_NC_ACTION_EN;
5465 	} else {
5466 		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5467 				EOP_TC_MD_ACTION_EN;
5468 	}
5469 	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5470 				EVENT_INDEX(5);
5471 	if (exec)
5472 		dw2 |= EOP_EXEC;
5473 
5474 	amdgpu_ring_write(ring, dw2);
5475 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5476 
5477 	/*
5478 	 * the address should be Qword aligned if 64bit write, Dword
5479 	 * aligned if only send 32bit data low (discard data high)
5480 	 */
5481 	if (write64bit)
5482 		BUG_ON(addr & 0x7);
5483 	else
5484 		BUG_ON(addr & 0x3);
5485 	amdgpu_ring_write(ring, lower_32_bits(addr));
5486 	amdgpu_ring_write(ring, upper_32_bits(addr));
5487 	amdgpu_ring_write(ring, lower_32_bits(seq));
5488 	amdgpu_ring_write(ring, upper_32_bits(seq));
5489 	amdgpu_ring_write(ring, 0);
5490 }
5491 
5492 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5493 {
5494 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5495 	uint32_t seq = ring->fence_drv.sync_seq;
5496 	uint64_t addr = ring->fence_drv.gpu_addr;
5497 
5498 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5499 			      lower_32_bits(addr), upper_32_bits(addr),
5500 			      seq, 0xffffffff, 4);
5501 }
5502 
5503 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5504 					unsigned vmid, uint64_t pd_addr)
5505 {
5506 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5507 
5508 	/* compute doesn't have PFP */
5509 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5510 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5511 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5512 		amdgpu_ring_write(ring, 0x0);
5513 	}
5514 }
5515 
5516 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5517 {
5518 	return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5519 }
5520 
5521 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5522 {
5523 	u64 wptr;
5524 
5525 	/* XXX check if swapping is necessary on BE */
5526 	if (ring->use_doorbell)
5527 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5528 	else
5529 		BUG();
5530 	return wptr;
5531 }
5532 
5533 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5534 {
5535 	struct amdgpu_device *adev = ring->adev;
5536 
5537 	/* XXX check if swapping is necessary on BE */
5538 	if (ring->use_doorbell) {
5539 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5540 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5541 	} else{
5542 		BUG(); /* only DOORBELL method supported on gfx9 now */
5543 	}
5544 }
5545 
5546 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5547 					 u64 seq, unsigned int flags)
5548 {
5549 	struct amdgpu_device *adev = ring->adev;
5550 
5551 	/* we only allocate 32bit for each seq wb address */
5552 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5553 
5554 	/* write fence seq to the "addr" */
5555 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5556 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5557 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5558 	amdgpu_ring_write(ring, lower_32_bits(addr));
5559 	amdgpu_ring_write(ring, upper_32_bits(addr));
5560 	amdgpu_ring_write(ring, lower_32_bits(seq));
5561 
5562 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5563 		/* set register to trigger INT */
5564 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5565 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5566 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5567 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5568 		amdgpu_ring_write(ring, 0);
5569 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5570 	}
5571 }
5572 
5573 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5574 {
5575 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5576 	amdgpu_ring_write(ring, 0);
5577 }
5578 
5579 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5580 {
5581 	struct amdgpu_device *adev = ring->adev;
5582 	struct v9_ce_ib_state ce_payload = {0};
5583 	uint64_t offset, ce_payload_gpu_addr;
5584 	void *ce_payload_cpu_addr;
5585 	int cnt;
5586 
5587 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5588 
5589 	if (ring->is_mes_queue) {
5590 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5591 				  gfx[0].gfx_meta_data) +
5592 			offsetof(struct v9_gfx_meta_data, ce_payload);
5593 		ce_payload_gpu_addr =
5594 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5595 		ce_payload_cpu_addr =
5596 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5597 	} else {
5598 		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5599 		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5600 		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5601 	}
5602 
5603 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5604 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5605 				 WRITE_DATA_DST_SEL(8) |
5606 				 WR_CONFIRM) |
5607 				 WRITE_DATA_CACHE_POLICY(0));
5608 	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5609 	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5610 
5611 	amdgpu_ring_ib_on_emit_ce(ring);
5612 
5613 	if (resume)
5614 		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5615 					   sizeof(ce_payload) >> 2);
5616 	else
5617 		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5618 					   sizeof(ce_payload) >> 2);
5619 }
5620 
5621 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5622 {
5623 	int i, r = 0;
5624 	struct amdgpu_device *adev = ring->adev;
5625 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5626 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5627 	unsigned long flags;
5628 
5629 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5630 		return -EINVAL;
5631 
5632 	spin_lock_irqsave(&kiq->ring_lock, flags);
5633 
5634 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5635 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5636 		return -ENOMEM;
5637 	}
5638 
5639 	/* assert preemption condition */
5640 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5641 
5642 	ring->trail_seq += 1;
5643 	amdgpu_ring_alloc(ring, 13);
5644 	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5645 				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5646 
5647 	/* assert IB preemption, emit the trailing fence */
5648 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5649 				   ring->trail_fence_gpu_addr,
5650 				   ring->trail_seq);
5651 
5652 	amdgpu_ring_commit(kiq_ring);
5653 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5654 
5655 	/* poll the trailing fence */
5656 	for (i = 0; i < adev->usec_timeout; i++) {
5657 		if (ring->trail_seq ==
5658 			le32_to_cpu(*ring->trail_fence_cpu_addr))
5659 			break;
5660 		udelay(1);
5661 	}
5662 
5663 	if (i >= adev->usec_timeout) {
5664 		r = -EINVAL;
5665 		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5666 	}
5667 
5668 	/*reset the CP_VMID_PREEMPT after trailing fence*/
5669 	amdgpu_ring_emit_wreg(ring,
5670 			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5671 			      0x0);
5672 	amdgpu_ring_commit(ring);
5673 
5674 	/* deassert preemption condition */
5675 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5676 	return r;
5677 }
5678 
5679 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5680 {
5681 	struct amdgpu_device *adev = ring->adev;
5682 	struct v9_de_ib_state de_payload = {0};
5683 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5684 	void *de_payload_cpu_addr;
5685 	int cnt;
5686 
5687 	if (ring->is_mes_queue) {
5688 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5689 				  gfx[0].gfx_meta_data) +
5690 			offsetof(struct v9_gfx_meta_data, de_payload);
5691 		de_payload_gpu_addr =
5692 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5693 		de_payload_cpu_addr =
5694 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5695 
5696 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5697 				  gfx[0].gds_backup) +
5698 			offsetof(struct v9_gfx_meta_data, de_payload);
5699 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5700 	} else {
5701 		offset = offsetof(struct v9_gfx_meta_data, de_payload);
5702 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5703 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5704 
5705 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5706 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5707 				 PAGE_SIZE);
5708 	}
5709 
5710 	if (usegds) {
5711 		de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5712 		de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5713 	}
5714 
5715 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5716 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5717 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5718 				 WRITE_DATA_DST_SEL(8) |
5719 				 WR_CONFIRM) |
5720 				 WRITE_DATA_CACHE_POLICY(0));
5721 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5722 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5723 
5724 	amdgpu_ring_ib_on_emit_de(ring);
5725 	if (resume)
5726 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5727 					   sizeof(de_payload) >> 2);
5728 	else
5729 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5730 					   sizeof(de_payload) >> 2);
5731 }
5732 
5733 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5734 				   bool secure)
5735 {
5736 	uint32_t v = secure ? FRAME_TMZ : 0;
5737 
5738 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5739 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5740 }
5741 
5742 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5743 {
5744 	uint32_t dw2 = 0;
5745 
5746 	gfx_v9_0_ring_emit_ce_meta(ring,
5747 				   (!amdgpu_sriov_vf(ring->adev) &&
5748 				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
5749 
5750 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5751 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5752 		/* set load_global_config & load_global_uconfig */
5753 		dw2 |= 0x8001;
5754 		/* set load_cs_sh_regs */
5755 		dw2 |= 0x01000000;
5756 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5757 		dw2 |= 0x10002;
5758 
5759 		/* set load_ce_ram if preamble presented */
5760 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5761 			dw2 |= 0x10000000;
5762 	} else {
5763 		/* still load_ce_ram if this is the first time preamble presented
5764 		 * although there is no context switch happens.
5765 		 */
5766 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5767 			dw2 |= 0x10000000;
5768 	}
5769 
5770 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5771 	amdgpu_ring_write(ring, dw2);
5772 	amdgpu_ring_write(ring, 0);
5773 }
5774 
5775 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5776 						  uint64_t addr)
5777 {
5778 	unsigned ret;
5779 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5780 	amdgpu_ring_write(ring, lower_32_bits(addr));
5781 	amdgpu_ring_write(ring, upper_32_bits(addr));
5782 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5783 	amdgpu_ring_write(ring, 0);
5784 	ret = ring->wptr & ring->buf_mask;
5785 	/* patch dummy value later */
5786 	amdgpu_ring_write(ring, 0);
5787 	return ret;
5788 }
5789 
5790 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5791 				    uint32_t reg_val_offs)
5792 {
5793 	struct amdgpu_device *adev = ring->adev;
5794 
5795 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5796 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5797 				(5 << 8) |	/* dst: memory */
5798 				(1 << 20));	/* write confirm */
5799 	amdgpu_ring_write(ring, reg);
5800 	amdgpu_ring_write(ring, 0);
5801 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5802 				reg_val_offs * 4));
5803 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5804 				reg_val_offs * 4));
5805 }
5806 
5807 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5808 				    uint32_t val)
5809 {
5810 	uint32_t cmd = 0;
5811 
5812 	switch (ring->funcs->type) {
5813 	case AMDGPU_RING_TYPE_GFX:
5814 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5815 		break;
5816 	case AMDGPU_RING_TYPE_KIQ:
5817 		cmd = (1 << 16); /* no inc addr */
5818 		break;
5819 	default:
5820 		cmd = WR_CONFIRM;
5821 		break;
5822 	}
5823 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5824 	amdgpu_ring_write(ring, cmd);
5825 	amdgpu_ring_write(ring, reg);
5826 	amdgpu_ring_write(ring, 0);
5827 	amdgpu_ring_write(ring, val);
5828 }
5829 
5830 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5831 					uint32_t val, uint32_t mask)
5832 {
5833 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5834 }
5835 
5836 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5837 						  uint32_t reg0, uint32_t reg1,
5838 						  uint32_t ref, uint32_t mask)
5839 {
5840 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5841 	struct amdgpu_device *adev = ring->adev;
5842 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5843 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5844 
5845 	if (fw_version_ok)
5846 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5847 				      ref, mask, 0x20);
5848 	else
5849 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5850 							   ref, mask);
5851 }
5852 
5853 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5854 {
5855 	struct amdgpu_device *adev = ring->adev;
5856 	uint32_t value = 0;
5857 
5858 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5859 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5860 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5861 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5862 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5863 }
5864 
5865 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5866 						 enum amdgpu_interrupt_state state)
5867 {
5868 	switch (state) {
5869 	case AMDGPU_IRQ_STATE_DISABLE:
5870 	case AMDGPU_IRQ_STATE_ENABLE:
5871 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5872 			       TIME_STAMP_INT_ENABLE,
5873 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5874 		break;
5875 	default:
5876 		break;
5877 	}
5878 }
5879 
5880 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5881 						     int me, int pipe,
5882 						     enum amdgpu_interrupt_state state)
5883 {
5884 	u32 mec_int_cntl, mec_int_cntl_reg;
5885 
5886 	/*
5887 	 * amdgpu controls only the first MEC. That's why this function only
5888 	 * handles the setting of interrupts for this specific MEC. All other
5889 	 * pipes' interrupts are set by amdkfd.
5890 	 */
5891 
5892 	if (me == 1) {
5893 		switch (pipe) {
5894 		case 0:
5895 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5896 			break;
5897 		case 1:
5898 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5899 			break;
5900 		case 2:
5901 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5902 			break;
5903 		case 3:
5904 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5905 			break;
5906 		default:
5907 			DRM_DEBUG("invalid pipe %d\n", pipe);
5908 			return;
5909 		}
5910 	} else {
5911 		DRM_DEBUG("invalid me %d\n", me);
5912 		return;
5913 	}
5914 
5915 	switch (state) {
5916 	case AMDGPU_IRQ_STATE_DISABLE:
5917 		mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5918 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5919 					     TIME_STAMP_INT_ENABLE, 0);
5920 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5921 		break;
5922 	case AMDGPU_IRQ_STATE_ENABLE:
5923 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5924 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5925 					     TIME_STAMP_INT_ENABLE, 1);
5926 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5927 		break;
5928 	default:
5929 		break;
5930 	}
5931 }
5932 
5933 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5934 					     struct amdgpu_irq_src *source,
5935 					     unsigned type,
5936 					     enum amdgpu_interrupt_state state)
5937 {
5938 	switch (state) {
5939 	case AMDGPU_IRQ_STATE_DISABLE:
5940 	case AMDGPU_IRQ_STATE_ENABLE:
5941 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5942 			       PRIV_REG_INT_ENABLE,
5943 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5944 		break;
5945 	default:
5946 		break;
5947 	}
5948 
5949 	return 0;
5950 }
5951 
5952 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5953 					      struct amdgpu_irq_src *source,
5954 					      unsigned type,
5955 					      enum amdgpu_interrupt_state state)
5956 {
5957 	switch (state) {
5958 	case AMDGPU_IRQ_STATE_DISABLE:
5959 	case AMDGPU_IRQ_STATE_ENABLE:
5960 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5961 			       PRIV_INSTR_INT_ENABLE,
5962 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5963 		break;
5964 	default:
5965 		break;
5966 	}
5967 
5968 	return 0;
5969 }
5970 
5971 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5972 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5973 			CP_ECC_ERROR_INT_ENABLE, 1)
5974 
5975 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5976 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5977 			CP_ECC_ERROR_INT_ENABLE, 0)
5978 
5979 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5980 					      struct amdgpu_irq_src *source,
5981 					      unsigned type,
5982 					      enum amdgpu_interrupt_state state)
5983 {
5984 	switch (state) {
5985 	case AMDGPU_IRQ_STATE_DISABLE:
5986 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5987 				CP_ECC_ERROR_INT_ENABLE, 0);
5988 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5989 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5990 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5991 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5992 		break;
5993 
5994 	case AMDGPU_IRQ_STATE_ENABLE:
5995 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5996 				CP_ECC_ERROR_INT_ENABLE, 1);
5997 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5998 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5999 		ENABLE_ECC_ON_ME_PIPE(1, 2);
6000 		ENABLE_ECC_ON_ME_PIPE(1, 3);
6001 		break;
6002 	default:
6003 		break;
6004 	}
6005 
6006 	return 0;
6007 }
6008 
6009 
6010 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6011 					    struct amdgpu_irq_src *src,
6012 					    unsigned type,
6013 					    enum amdgpu_interrupt_state state)
6014 {
6015 	switch (type) {
6016 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6017 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6018 		break;
6019 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6020 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6021 		break;
6022 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6023 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6024 		break;
6025 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6026 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6027 		break;
6028 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6029 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6030 		break;
6031 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6032 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6033 		break;
6034 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6035 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6036 		break;
6037 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6038 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6039 		break;
6040 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6041 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6042 		break;
6043 	default:
6044 		break;
6045 	}
6046 	return 0;
6047 }
6048 
6049 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6050 			    struct amdgpu_irq_src *source,
6051 			    struct amdgpu_iv_entry *entry)
6052 {
6053 	int i;
6054 	u8 me_id, pipe_id, queue_id;
6055 	struct amdgpu_ring *ring;
6056 
6057 	DRM_DEBUG("IH: CP EOP\n");
6058 	me_id = (entry->ring_id & 0x0c) >> 2;
6059 	pipe_id = (entry->ring_id & 0x03) >> 0;
6060 	queue_id = (entry->ring_id & 0x70) >> 4;
6061 
6062 	switch (me_id) {
6063 	case 0:
6064 		if (adev->gfx.num_gfx_rings) {
6065 			if (!adev->gfx.mcbp) {
6066 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6067 			} else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6068 				/* Fence signals are handled on the software rings*/
6069 				for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6070 					amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6071 			}
6072 		}
6073 		break;
6074 	case 1:
6075 	case 2:
6076 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6077 			ring = &adev->gfx.compute_ring[i];
6078 			/* Per-queue interrupt is supported for MEC starting from VI.
6079 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
6080 			  */
6081 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6082 				amdgpu_fence_process(ring);
6083 		}
6084 		break;
6085 	}
6086 	return 0;
6087 }
6088 
6089 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6090 			   struct amdgpu_iv_entry *entry)
6091 {
6092 	u8 me_id, pipe_id, queue_id;
6093 	struct amdgpu_ring *ring;
6094 	int i;
6095 
6096 	me_id = (entry->ring_id & 0x0c) >> 2;
6097 	pipe_id = (entry->ring_id & 0x03) >> 0;
6098 	queue_id = (entry->ring_id & 0x70) >> 4;
6099 
6100 	switch (me_id) {
6101 	case 0:
6102 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6103 		break;
6104 	case 1:
6105 	case 2:
6106 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6107 			ring = &adev->gfx.compute_ring[i];
6108 			if (ring->me == me_id && ring->pipe == pipe_id &&
6109 			    ring->queue == queue_id)
6110 				drm_sched_fault(&ring->sched);
6111 		}
6112 		break;
6113 	}
6114 }
6115 
6116 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6117 				 struct amdgpu_irq_src *source,
6118 				 struct amdgpu_iv_entry *entry)
6119 {
6120 	DRM_ERROR("Illegal register access in command stream\n");
6121 	gfx_v9_0_fault(adev, entry);
6122 	return 0;
6123 }
6124 
6125 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6126 				  struct amdgpu_irq_src *source,
6127 				  struct amdgpu_iv_entry *entry)
6128 {
6129 	DRM_ERROR("Illegal instruction in command stream\n");
6130 	gfx_v9_0_fault(adev, entry);
6131 	return 0;
6132 }
6133 
6134 
6135 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6136 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6137 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6138 	  SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6139 	},
6140 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6141 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6142 	  SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6143 	},
6144 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6145 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6146 	  0, 0
6147 	},
6148 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6149 	  SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6150 	  0, 0
6151 	},
6152 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6153 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6154 	  SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6155 	},
6156 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6157 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6158 	  0, 0
6159 	},
6160 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6161 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6162 	  SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6163 	},
6164 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6165 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6166 	  SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6167 	},
6168 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6169 	  SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6170 	  0, 0
6171 	},
6172 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6173 	  SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6174 	  0, 0
6175 	},
6176 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6177 	  SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6178 	  0, 0
6179 	},
6180 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6181 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6182 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6183 	},
6184 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6185 	  SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6186 	  0, 0
6187 	},
6188 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6189 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6190 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6191 	},
6192 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6193 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6194 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6195 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6196 	},
6197 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6198 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6199 	  SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6200 	  0, 0
6201 	},
6202 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6203 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6204 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6205 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6206 	},
6207 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6208 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6209 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6210 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6211 	},
6212 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6213 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6214 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6215 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6216 	},
6217 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6218 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6219 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6220 	  SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6221 	},
6222 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6223 	  SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6224 	  0, 0
6225 	},
6226 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6227 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6228 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6229 	},
6230 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6231 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6232 	  0, 0
6233 	},
6234 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6235 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6236 	  0, 0
6237 	},
6238 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6239 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6240 	  0, 0
6241 	},
6242 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6243 	  SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6244 	  0, 0
6245 	},
6246 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6247 	  SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6248 	  0, 0
6249 	},
6250 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6251 	  SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6252 	  0, 0
6253 	},
6254 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6255 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6256 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6257 	},
6258 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6259 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6260 	  SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6261 	},
6262 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6263 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6264 	  SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6265 	},
6266 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6267 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6268 	  SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6269 	},
6270 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6271 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6272 	  SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6273 	},
6274 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6275 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6276 	  0, 0
6277 	},
6278 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6279 	  SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6280 	  0, 0
6281 	},
6282 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6283 	  SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6284 	  0, 0
6285 	},
6286 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6287 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6288 	  0, 0
6289 	},
6290 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6291 	  SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6292 	  0, 0
6293 	},
6294 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6295 	  SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6296 	  0, 0
6297 	},
6298 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6299 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6300 	  0, 0
6301 	},
6302 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6303 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6304 	  0, 0
6305 	},
6306 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6307 	  SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6308 	  0, 0
6309 	},
6310 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6311 	  SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6312 	  0, 0
6313 	},
6314 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6315 	  SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6316 	  0, 0
6317 	},
6318 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6319 	  SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6320 	  0, 0
6321 	},
6322 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6323 	  SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6324 	  0, 0
6325 	},
6326 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6327 	  SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6328 	  0, 0
6329 	},
6330 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6331 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6332 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6333 	},
6334 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6335 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6336 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6337 	},
6338 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6339 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6340 	  0, 0
6341 	},
6342 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6343 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6344 	  0, 0
6345 	},
6346 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6347 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6348 	  0, 0
6349 	},
6350 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6351 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6352 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6353 	},
6354 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6355 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6356 	  SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6357 	},
6358 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6359 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6360 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6361 	},
6362 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6363 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6364 	  SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6365 	},
6366 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6367 	  SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6368 	  0, 0
6369 	},
6370 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6371 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6372 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6373 	},
6374 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6375 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6376 	  SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6377 	},
6378 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6379 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6380 	  SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6381 	},
6382 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6383 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6384 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6385 	},
6386 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6387 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6388 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6389 	},
6390 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6391 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6392 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6393 	},
6394 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6395 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6396 	  SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6397 	},
6398 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6399 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6400 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6401 	},
6402 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6403 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6404 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6405 	},
6406 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6407 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6408 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6409 	},
6410 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6411 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6412 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6413 	},
6414 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6415 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6416 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6417 	},
6418 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6419 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6420 	  SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6421 	},
6422 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6423 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6424 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6425 	},
6426 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6427 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6428 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6429 	},
6430 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6431 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6432 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6433 	},
6434 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6435 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6436 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6437 	},
6438 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6439 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6440 	  0, 0
6441 	},
6442 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6443 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6444 	  0, 0
6445 	},
6446 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6447 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6448 	  0, 0
6449 	},
6450 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6451 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6452 	  0, 0
6453 	},
6454 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6455 	  SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6456 	  0, 0
6457 	},
6458 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6459 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6460 	  SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6461 	},
6462 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6463 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6464 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6465 	},
6466 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6467 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6468 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6469 	},
6470 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6471 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6472 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6473 	},
6474 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6475 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6476 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6477 	},
6478 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6479 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6480 	  0, 0
6481 	},
6482 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6483 	  SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6484 	  0, 0
6485 	},
6486 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6487 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6488 	  0, 0
6489 	},
6490 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6491 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6492 	  0, 0
6493 	},
6494 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6495 	  SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6496 	  0, 0
6497 	},
6498 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6499 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6500 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6501 	},
6502 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6503 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6504 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6505 	},
6506 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6507 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6508 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6509 	},
6510 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6511 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6512 	  SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6513 	},
6514 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6515 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6516 	  SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6517 	},
6518 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6519 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6520 	  0, 0
6521 	},
6522 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6523 	  SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6524 	  0, 0
6525 	},
6526 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6527 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6528 	  0, 0
6529 	},
6530 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6531 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6532 	  0, 0
6533 	},
6534 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6535 	  SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6536 	  0, 0
6537 	},
6538 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6539 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6540 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6541 	},
6542 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6543 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6544 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6545 	},
6546 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6547 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6548 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6549 	},
6550 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6551 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6552 	  0, 0
6553 	},
6554 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6555 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6556 	  0, 0
6557 	},
6558 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6559 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6560 	  0, 0
6561 	},
6562 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6563 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6564 	  0, 0
6565 	},
6566 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6567 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6568 	  0, 0
6569 	},
6570 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6571 	  SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6572 	  0, 0
6573 	}
6574 };
6575 
6576 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6577 				     void *inject_if, uint32_t instance_mask)
6578 {
6579 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6580 	int ret;
6581 	struct ta_ras_trigger_error_input block_info = { 0 };
6582 
6583 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6584 		return -EINVAL;
6585 
6586 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6587 		return -EINVAL;
6588 
6589 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6590 		return -EPERM;
6591 
6592 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6593 	      info->head.type)) {
6594 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6595 			ras_gfx_subblocks[info->head.sub_block_index].name,
6596 			info->head.type);
6597 		return -EPERM;
6598 	}
6599 
6600 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6601 	      info->head.type)) {
6602 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6603 			ras_gfx_subblocks[info->head.sub_block_index].name,
6604 			info->head.type);
6605 		return -EPERM;
6606 	}
6607 
6608 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6609 	block_info.sub_block_index =
6610 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6611 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6612 	block_info.address = info->address;
6613 	block_info.value = info->value;
6614 
6615 	mutex_lock(&adev->grbm_idx_mutex);
6616 	ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6617 	mutex_unlock(&adev->grbm_idx_mutex);
6618 
6619 	return ret;
6620 }
6621 
6622 static const char * const vml2_mems[] = {
6623 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6624 	"UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6625 	"UTC_VML2_BANK_CACHE_0_4K_MEM0",
6626 	"UTC_VML2_BANK_CACHE_0_4K_MEM1",
6627 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6628 	"UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6629 	"UTC_VML2_BANK_CACHE_1_4K_MEM0",
6630 	"UTC_VML2_BANK_CACHE_1_4K_MEM1",
6631 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6632 	"UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6633 	"UTC_VML2_BANK_CACHE_2_4K_MEM0",
6634 	"UTC_VML2_BANK_CACHE_2_4K_MEM1",
6635 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6636 	"UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6637 	"UTC_VML2_BANK_CACHE_3_4K_MEM0",
6638 	"UTC_VML2_BANK_CACHE_3_4K_MEM1",
6639 };
6640 
6641 static const char * const vml2_walker_mems[] = {
6642 	"UTC_VML2_CACHE_PDE0_MEM0",
6643 	"UTC_VML2_CACHE_PDE0_MEM1",
6644 	"UTC_VML2_CACHE_PDE1_MEM0",
6645 	"UTC_VML2_CACHE_PDE1_MEM1",
6646 	"UTC_VML2_CACHE_PDE2_MEM0",
6647 	"UTC_VML2_CACHE_PDE2_MEM1",
6648 	"UTC_VML2_RDIF_LOG_FIFO",
6649 };
6650 
6651 static const char * const atc_l2_cache_2m_mems[] = {
6652 	"UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6653 	"UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6654 	"UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6655 	"UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6656 };
6657 
6658 static const char *atc_l2_cache_4k_mems[] = {
6659 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6660 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6661 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6662 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6663 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6664 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6665 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6666 	"UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6667 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6668 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6669 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6670 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6671 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6672 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6673 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6674 	"UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6675 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6676 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6677 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6678 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6679 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6680 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6681 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6682 	"UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6683 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6684 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6685 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6686 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6687 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6688 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6689 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6690 	"UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6691 };
6692 
6693 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6694 					 struct ras_err_data *err_data)
6695 {
6696 	uint32_t i, data;
6697 	uint32_t sec_count, ded_count;
6698 
6699 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6700 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6701 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6702 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6703 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6704 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6705 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6706 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6707 
6708 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6709 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6710 		data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6711 
6712 		sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6713 		if (sec_count) {
6714 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6715 				"SEC %d\n", i, vml2_mems[i], sec_count);
6716 			err_data->ce_count += sec_count;
6717 		}
6718 
6719 		ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6720 		if (ded_count) {
6721 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6722 				"DED %d\n", i, vml2_mems[i], ded_count);
6723 			err_data->ue_count += ded_count;
6724 		}
6725 	}
6726 
6727 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6728 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6729 		data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6730 
6731 		sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6732 						SEC_COUNT);
6733 		if (sec_count) {
6734 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6735 				"SEC %d\n", i, vml2_walker_mems[i], sec_count);
6736 			err_data->ce_count += sec_count;
6737 		}
6738 
6739 		ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6740 						DED_COUNT);
6741 		if (ded_count) {
6742 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6743 				"DED %d\n", i, vml2_walker_mems[i], ded_count);
6744 			err_data->ue_count += ded_count;
6745 		}
6746 	}
6747 
6748 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6749 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6750 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6751 
6752 		sec_count = (data & 0x00006000L) >> 0xd;
6753 		if (sec_count) {
6754 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6755 				"SEC %d\n", i, atc_l2_cache_2m_mems[i],
6756 				sec_count);
6757 			err_data->ce_count += sec_count;
6758 		}
6759 	}
6760 
6761 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6762 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6763 		data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6764 
6765 		sec_count = (data & 0x00006000L) >> 0xd;
6766 		if (sec_count) {
6767 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6768 				"SEC %d\n", i, atc_l2_cache_4k_mems[i],
6769 				sec_count);
6770 			err_data->ce_count += sec_count;
6771 		}
6772 
6773 		ded_count = (data & 0x00018000L) >> 0xf;
6774 		if (ded_count) {
6775 			dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6776 				"DED %d\n", i, atc_l2_cache_4k_mems[i],
6777 				ded_count);
6778 			err_data->ue_count += ded_count;
6779 		}
6780 	}
6781 
6782 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6783 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6784 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6785 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6786 
6787 	return 0;
6788 }
6789 
6790 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6791 	const struct soc15_reg_entry *reg,
6792 	uint32_t se_id, uint32_t inst_id, uint32_t value,
6793 	uint32_t *sec_count, uint32_t *ded_count)
6794 {
6795 	uint32_t i;
6796 	uint32_t sec_cnt, ded_cnt;
6797 
6798 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6799 		if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6800 			gfx_v9_0_ras_fields[i].seg != reg->seg ||
6801 			gfx_v9_0_ras_fields[i].inst != reg->inst)
6802 			continue;
6803 
6804 		sec_cnt = (value &
6805 				gfx_v9_0_ras_fields[i].sec_count_mask) >>
6806 				gfx_v9_0_ras_fields[i].sec_count_shift;
6807 		if (sec_cnt) {
6808 			dev_info(adev->dev, "GFX SubBlock %s, "
6809 				"Instance[%d][%d], SEC %d\n",
6810 				gfx_v9_0_ras_fields[i].name,
6811 				se_id, inst_id,
6812 				sec_cnt);
6813 			*sec_count += sec_cnt;
6814 		}
6815 
6816 		ded_cnt = (value &
6817 				gfx_v9_0_ras_fields[i].ded_count_mask) >>
6818 				gfx_v9_0_ras_fields[i].ded_count_shift;
6819 		if (ded_cnt) {
6820 			dev_info(adev->dev, "GFX SubBlock %s, "
6821 				"Instance[%d][%d], DED %d\n",
6822 				gfx_v9_0_ras_fields[i].name,
6823 				se_id, inst_id,
6824 				ded_cnt);
6825 			*ded_count += ded_cnt;
6826 		}
6827 	}
6828 
6829 	return 0;
6830 }
6831 
6832 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6833 {
6834 	int i, j, k;
6835 
6836 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6837 		return;
6838 
6839 	/* read back registers to clear the counters */
6840 	mutex_lock(&adev->grbm_idx_mutex);
6841 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6842 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6843 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6844 				amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
6845 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6846 			}
6847 		}
6848 	}
6849 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6850 	mutex_unlock(&adev->grbm_idx_mutex);
6851 
6852 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6853 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6854 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6855 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6856 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6857 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6858 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6859 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6860 
6861 	for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6862 		WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6863 		RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6864 	}
6865 
6866 	for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6867 		WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6868 		RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6869 	}
6870 
6871 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6872 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6873 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6874 	}
6875 
6876 	for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6877 		WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6878 		RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6879 	}
6880 
6881 	WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6882 	WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6883 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6884 	WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6885 }
6886 
6887 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6888 					  void *ras_error_status)
6889 {
6890 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6891 	uint32_t sec_count = 0, ded_count = 0;
6892 	uint32_t i, j, k;
6893 	uint32_t reg_value;
6894 
6895 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6896 		return;
6897 
6898 	err_data->ue_count = 0;
6899 	err_data->ce_count = 0;
6900 
6901 	mutex_lock(&adev->grbm_idx_mutex);
6902 
6903 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6904 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6905 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6906 				amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
6907 				reg_value =
6908 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6909 				if (reg_value)
6910 					gfx_v9_0_ras_error_count(adev,
6911 						&gfx_v9_0_edc_counter_regs[i],
6912 						j, k, reg_value,
6913 						&sec_count, &ded_count);
6914 			}
6915 		}
6916 	}
6917 
6918 	err_data->ce_count += sec_count;
6919 	err_data->ue_count += ded_count;
6920 
6921 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6922 	mutex_unlock(&adev->grbm_idx_mutex);
6923 
6924 	gfx_v9_0_query_utc_edc_status(adev, err_data);
6925 }
6926 
6927 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6928 {
6929 	const unsigned int cp_coher_cntl =
6930 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6931 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6932 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6933 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6934 			PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6935 
6936 	/* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6937 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6938 	amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6939 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6940 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6941 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6942 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6943 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6944 }
6945 
6946 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6947 					uint32_t pipe, bool enable)
6948 {
6949 	struct amdgpu_device *adev = ring->adev;
6950 	uint32_t val;
6951 	uint32_t wcl_cs_reg;
6952 
6953 	/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6954 	val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6955 
6956 	switch (pipe) {
6957 	case 0:
6958 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6959 		break;
6960 	case 1:
6961 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6962 		break;
6963 	case 2:
6964 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6965 		break;
6966 	case 3:
6967 		wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6968 		break;
6969 	default:
6970 		DRM_DEBUG("invalid pipe %d\n", pipe);
6971 		return;
6972 	}
6973 
6974 	amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6975 
6976 }
6977 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6978 {
6979 	struct amdgpu_device *adev = ring->adev;
6980 	uint32_t val;
6981 	int i;
6982 
6983 
6984 	/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6985 	 * number of gfx waves. Setting 5 bit will make sure gfx only gets
6986 	 * around 25% of gpu resources.
6987 	 */
6988 	val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6989 	amdgpu_ring_emit_wreg(ring,
6990 			      SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6991 			      val);
6992 
6993 	/* Restrict waves for normal/low priority compute queues as well
6994 	 * to get best QoS for high priority compute jobs.
6995 	 *
6996 	 * amdgpu controls only 1st ME(0-3 CS pipes).
6997 	 */
6998 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6999 		if (i != ring->pipe)
7000 			gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7001 
7002 	}
7003 }
7004 
7005 static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
7006 {
7007 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7008 	uint32_t i, j, k, reg, index = 0;
7009 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7010 
7011 	if (!adev->gfx.ip_dump_core)
7012 		return;
7013 
7014 	for (i = 0; i < reg_count; i++)
7015 		drm_printf(p, "%-50s \t 0x%08x\n",
7016 			   gc_reg_list_9[i].reg_name,
7017 			   adev->gfx.ip_dump_core[i]);
7018 
7019 	/* print compute queue registers for all instances */
7020 	if (!adev->gfx.ip_dump_compute_queues)
7021 		return;
7022 
7023 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7024 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7025 		   adev->gfx.mec.num_mec,
7026 		   adev->gfx.mec.num_pipe_per_mec,
7027 		   adev->gfx.mec.num_queue_per_pipe);
7028 
7029 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7030 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7031 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7032 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7033 				for (reg = 0; reg < reg_count; reg++) {
7034 					drm_printf(p, "%-50s \t 0x%08x\n",
7035 						   gc_cp_reg_list_9[reg].reg_name,
7036 						   adev->gfx.ip_dump_compute_queues[index + reg]);
7037 				}
7038 				index += reg_count;
7039 			}
7040 		}
7041 	}
7042 
7043 }
7044 
7045 static void gfx_v9_ip_dump(void *handle)
7046 {
7047 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
7048 	uint32_t i, j, k, reg, index = 0;
7049 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7050 
7051 	if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7052 		return;
7053 
7054 	amdgpu_gfx_off_ctrl(adev, false);
7055 	for (i = 0; i < reg_count; i++)
7056 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7057 	amdgpu_gfx_off_ctrl(adev, true);
7058 
7059 	/* dump compute queue registers for all instances */
7060 	if (!adev->gfx.ip_dump_compute_queues)
7061 		return;
7062 
7063 	reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7064 	amdgpu_gfx_off_ctrl(adev, false);
7065 	mutex_lock(&adev->srbm_mutex);
7066 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7067 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7068 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7069 				/* ME0 is for GFX so start from 1 for CP */
7070 				soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7071 
7072 				for (reg = 0; reg < reg_count; reg++) {
7073 					adev->gfx.ip_dump_compute_queues[index + reg] =
7074 						RREG32(SOC15_REG_ENTRY_OFFSET(
7075 							gc_cp_reg_list_9[reg]));
7076 				}
7077 				index += reg_count;
7078 			}
7079 		}
7080 	}
7081 	soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7082 	mutex_unlock(&adev->srbm_mutex);
7083 	amdgpu_gfx_off_ctrl(adev, true);
7084 
7085 }
7086 
7087 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7088 	.name = "gfx_v9_0",
7089 	.early_init = gfx_v9_0_early_init,
7090 	.late_init = gfx_v9_0_late_init,
7091 	.sw_init = gfx_v9_0_sw_init,
7092 	.sw_fini = gfx_v9_0_sw_fini,
7093 	.hw_init = gfx_v9_0_hw_init,
7094 	.hw_fini = gfx_v9_0_hw_fini,
7095 	.suspend = gfx_v9_0_suspend,
7096 	.resume = gfx_v9_0_resume,
7097 	.is_idle = gfx_v9_0_is_idle,
7098 	.wait_for_idle = gfx_v9_0_wait_for_idle,
7099 	.soft_reset = gfx_v9_0_soft_reset,
7100 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
7101 	.set_powergating_state = gfx_v9_0_set_powergating_state,
7102 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
7103 	.dump_ip_state = gfx_v9_ip_dump,
7104 	.print_ip_state = gfx_v9_ip_print,
7105 };
7106 
7107 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7108 	.type = AMDGPU_RING_TYPE_GFX,
7109 	.align_mask = 0xff,
7110 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7111 	.support_64bit_ptrs = true,
7112 	.secure_submission_supported = true,
7113 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7114 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7115 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7116 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7117 		5 +  /* COND_EXEC */
7118 		7 +  /* PIPELINE_SYNC */
7119 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7120 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7121 		2 + /* VM_FLUSH */
7122 		8 +  /* FENCE for VM_FLUSH */
7123 		20 + /* GDS switch */
7124 		4 + /* double SWITCH_BUFFER,
7125 		       the first COND_EXEC jump to the place just
7126 			   prior to this double SWITCH_BUFFER  */
7127 		5 + /* COND_EXEC */
7128 		7 +	 /*	HDP_flush */
7129 		4 +	 /*	VGT_flush */
7130 		14 + /*	CE_META */
7131 		31 + /*	DE_META */
7132 		3 + /* CNTX_CTRL */
7133 		5 + /* HDP_INVL */
7134 		8 + 8 + /* FENCE x2 */
7135 		2 + /* SWITCH_BUFFER */
7136 		7, /* gfx_v9_0_emit_mem_sync */
7137 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7138 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7139 	.emit_fence = gfx_v9_0_ring_emit_fence,
7140 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7141 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7142 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7143 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7144 	.test_ring = gfx_v9_0_ring_test_ring,
7145 	.insert_nop = amdgpu_ring_insert_nop,
7146 	.pad_ib = amdgpu_ring_generic_pad_ib,
7147 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7148 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7149 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7150 	.preempt_ib = gfx_v9_0_ring_preempt_ib,
7151 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7152 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7153 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7154 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7155 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7156 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7157 };
7158 
7159 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7160 	.type = AMDGPU_RING_TYPE_GFX,
7161 	.align_mask = 0xff,
7162 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7163 	.support_64bit_ptrs = true,
7164 	.secure_submission_supported = true,
7165 	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7166 	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7167 	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7168 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
7169 		5 +  /* COND_EXEC */
7170 		7 +  /* PIPELINE_SYNC */
7171 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7172 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7173 		2 + /* VM_FLUSH */
7174 		8 +  /* FENCE for VM_FLUSH */
7175 		20 + /* GDS switch */
7176 		4 + /* double SWITCH_BUFFER,
7177 		     * the first COND_EXEC jump to the place just
7178 		     * prior to this double SWITCH_BUFFER
7179 		     */
7180 		5 + /* COND_EXEC */
7181 		7 +	 /*	HDP_flush */
7182 		4 +	 /*	VGT_flush */
7183 		14 + /*	CE_META */
7184 		31 + /*	DE_META */
7185 		3 + /* CNTX_CTRL */
7186 		5 + /* HDP_INVL */
7187 		8 + 8 + /* FENCE x2 */
7188 		2 + /* SWITCH_BUFFER */
7189 		7, /* gfx_v9_0_emit_mem_sync */
7190 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
7191 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7192 	.emit_fence = gfx_v9_0_ring_emit_fence,
7193 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7194 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7195 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7196 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7197 	.test_ring = gfx_v9_0_ring_test_ring,
7198 	.test_ib = gfx_v9_0_ring_test_ib,
7199 	.insert_nop = amdgpu_sw_ring_insert_nop,
7200 	.pad_ib = amdgpu_ring_generic_pad_ib,
7201 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
7202 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7203 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7204 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7205 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7206 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7207 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7208 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
7209 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7210 	.patch_cntl = gfx_v9_0_ring_patch_cntl,
7211 	.patch_de = gfx_v9_0_ring_patch_de_meta,
7212 	.patch_ce = gfx_v9_0_ring_patch_ce_meta,
7213 };
7214 
7215 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7216 	.type = AMDGPU_RING_TYPE_COMPUTE,
7217 	.align_mask = 0xff,
7218 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7219 	.support_64bit_ptrs = true,
7220 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7221 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7222 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7223 	.emit_frame_size =
7224 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7225 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7226 		5 + /* hdp invalidate */
7227 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7228 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7229 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7230 		8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7231 		7 + /* gfx_v9_0_emit_mem_sync */
7232 		5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7233 		15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7234 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7235 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
7236 	.emit_fence = gfx_v9_0_ring_emit_fence,
7237 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7238 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7239 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7240 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7241 	.test_ring = gfx_v9_0_ring_test_ring,
7242 	.test_ib = gfx_v9_0_ring_test_ib,
7243 	.insert_nop = amdgpu_ring_insert_nop,
7244 	.pad_ib = amdgpu_ring_generic_pad_ib,
7245 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7246 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7247 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7248 	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
7249 	.emit_wave_limit = gfx_v9_0_emit_wave_limit,
7250 };
7251 
7252 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7253 	.type = AMDGPU_RING_TYPE_KIQ,
7254 	.align_mask = 0xff,
7255 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7256 	.support_64bit_ptrs = true,
7257 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
7258 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
7259 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
7260 	.emit_frame_size =
7261 		20 + /* gfx_v9_0_ring_emit_gds_switch */
7262 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
7263 		5 + /* hdp invalidate */
7264 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7265 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7266 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7267 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7268 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
7269 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7270 	.test_ring = gfx_v9_0_ring_test_ring,
7271 	.insert_nop = amdgpu_ring_insert_nop,
7272 	.pad_ib = amdgpu_ring_generic_pad_ib,
7273 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
7274 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
7275 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7276 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7277 };
7278 
7279 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7280 {
7281 	int i;
7282 
7283 	adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7284 
7285 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7286 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7287 
7288 	if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7289 		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7290 			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7291 	}
7292 
7293 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7294 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7295 }
7296 
7297 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7298 	.set = gfx_v9_0_set_eop_interrupt_state,
7299 	.process = gfx_v9_0_eop_irq,
7300 };
7301 
7302 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7303 	.set = gfx_v9_0_set_priv_reg_fault_state,
7304 	.process = gfx_v9_0_priv_reg_irq,
7305 };
7306 
7307 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7308 	.set = gfx_v9_0_set_priv_inst_fault_state,
7309 	.process = gfx_v9_0_priv_inst_irq,
7310 };
7311 
7312 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7313 	.set = gfx_v9_0_set_cp_ecc_error_state,
7314 	.process = amdgpu_gfx_cp_ecc_error_irq,
7315 };
7316 
7317 
7318 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7319 {
7320 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7321 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7322 
7323 	adev->gfx.priv_reg_irq.num_types = 1;
7324 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7325 
7326 	adev->gfx.priv_inst_irq.num_types = 1;
7327 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7328 
7329 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7330 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7331 }
7332 
7333 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7334 {
7335 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7336 	case IP_VERSION(9, 0, 1):
7337 	case IP_VERSION(9, 2, 1):
7338 	case IP_VERSION(9, 4, 0):
7339 	case IP_VERSION(9, 2, 2):
7340 	case IP_VERSION(9, 1, 0):
7341 	case IP_VERSION(9, 4, 1):
7342 	case IP_VERSION(9, 3, 0):
7343 	case IP_VERSION(9, 4, 2):
7344 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7345 		break;
7346 	default:
7347 		break;
7348 	}
7349 }
7350 
7351 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7352 {
7353 	/* init asci gds info */
7354 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7355 	case IP_VERSION(9, 0, 1):
7356 	case IP_VERSION(9, 2, 1):
7357 	case IP_VERSION(9, 4, 0):
7358 		adev->gds.gds_size = 0x10000;
7359 		break;
7360 	case IP_VERSION(9, 2, 2):
7361 	case IP_VERSION(9, 1, 0):
7362 	case IP_VERSION(9, 4, 1):
7363 		adev->gds.gds_size = 0x1000;
7364 		break;
7365 	case IP_VERSION(9, 4, 2):
7366 		/* aldebaran removed all the GDS internal memory,
7367 		 * only support GWS opcode in kernel, like barrier
7368 		 * semaphore.etc */
7369 		adev->gds.gds_size = 0;
7370 		break;
7371 	default:
7372 		adev->gds.gds_size = 0x10000;
7373 		break;
7374 	}
7375 
7376 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7377 	case IP_VERSION(9, 0, 1):
7378 	case IP_VERSION(9, 4, 0):
7379 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7380 		break;
7381 	case IP_VERSION(9, 2, 1):
7382 		adev->gds.gds_compute_max_wave_id = 0x27f;
7383 		break;
7384 	case IP_VERSION(9, 2, 2):
7385 	case IP_VERSION(9, 1, 0):
7386 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7387 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7388 		else
7389 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7390 		break;
7391 	case IP_VERSION(9, 4, 1):
7392 		adev->gds.gds_compute_max_wave_id = 0xfff;
7393 		break;
7394 	case IP_VERSION(9, 4, 2):
7395 		/* deprecated for Aldebaran, no usage at all */
7396 		adev->gds.gds_compute_max_wave_id = 0;
7397 		break;
7398 	default:
7399 		/* this really depends on the chip */
7400 		adev->gds.gds_compute_max_wave_id = 0x7ff;
7401 		break;
7402 	}
7403 
7404 	adev->gds.gws_size = 64;
7405 	adev->gds.oa_size = 16;
7406 }
7407 
7408 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7409 						 u32 bitmap)
7410 {
7411 	u32 data;
7412 
7413 	if (!bitmap)
7414 		return;
7415 
7416 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7417 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7418 
7419 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7420 }
7421 
7422 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7423 {
7424 	u32 data, mask;
7425 
7426 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7427 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7428 
7429 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7430 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7431 
7432 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7433 
7434 	return (~data) & mask;
7435 }
7436 
7437 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7438 				 struct amdgpu_cu_info *cu_info)
7439 {
7440 	int i, j, k, counter, active_cu_number = 0;
7441 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7442 	unsigned disable_masks[4 * 4];
7443 
7444 	if (!adev || !cu_info)
7445 		return -EINVAL;
7446 
7447 	/*
7448 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7449 	 */
7450 	if (adev->gfx.config.max_shader_engines *
7451 		adev->gfx.config.max_sh_per_se > 16)
7452 		return -EINVAL;
7453 
7454 	amdgpu_gfx_parse_disable_cu(disable_masks,
7455 				    adev->gfx.config.max_shader_engines,
7456 				    adev->gfx.config.max_sh_per_se);
7457 
7458 	mutex_lock(&adev->grbm_idx_mutex);
7459 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7460 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7461 			mask = 1;
7462 			ao_bitmap = 0;
7463 			counter = 0;
7464 			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7465 			gfx_v9_0_set_user_cu_inactive_bitmap(
7466 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7467 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7468 
7469 			/*
7470 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7471 			 * 4x4 size array, and it's usually suitable for Vega
7472 			 * ASICs which has 4*2 SE/SH layout.
7473 			 * But for Arcturus, SE/SH layout is changed to 8*1.
7474 			 * To mostly reduce the impact, we make it compatible
7475 			 * with current bitmap array as below:
7476 			 *    SE4,SH0 --> bitmap[0][1]
7477 			 *    SE5,SH0 --> bitmap[1][1]
7478 			 *    SE6,SH0 --> bitmap[2][1]
7479 			 *    SE7,SH0 --> bitmap[3][1]
7480 			 */
7481 			cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7482 
7483 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7484 				if (bitmap & mask) {
7485 					if (counter < adev->gfx.config.max_cu_per_sh)
7486 						ao_bitmap |= mask;
7487 					counter ++;
7488 				}
7489 				mask <<= 1;
7490 			}
7491 			active_cu_number += counter;
7492 			if (i < 2 && j < 2)
7493 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7494 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7495 		}
7496 	}
7497 	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7498 	mutex_unlock(&adev->grbm_idx_mutex);
7499 
7500 	cu_info->number = active_cu_number;
7501 	cu_info->ao_cu_mask = ao_cu_mask;
7502 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7503 
7504 	return 0;
7505 }
7506 
7507 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7508 {
7509 	.type = AMD_IP_BLOCK_TYPE_GFX,
7510 	.major = 9,
7511 	.minor = 0,
7512 	.rev = 0,
7513 	.funcs = &gfx_v9_0_ip_funcs,
7514 };
7515