xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 72251fac062c0b4fe98670ec9e3db3f0702c50ae)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110 
111 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
113 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
115 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
117 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
119 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
121 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
123 
124 enum ta_ras_gfx_subblock {
125 	/*CPC*/
126 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
127 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
128 	TA_RAS_BLOCK__GFX_CPC_UCODE,
129 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
130 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
131 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
132 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
133 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
134 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
135 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
136 	/* CPF*/
137 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
139 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
140 	TA_RAS_BLOCK__GFX_CPF_TAG,
141 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
142 	/* CPG*/
143 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
144 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
145 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
146 	TA_RAS_BLOCK__GFX_CPG_TAG,
147 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
148 	/* GDS*/
149 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
150 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
151 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
152 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
153 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
154 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
155 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
156 	/* SPI*/
157 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
158 	/* SQ*/
159 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
160 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
161 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
162 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
163 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
164 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
165 	/* SQC (3 ranges)*/
166 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
167 	/* SQC range 0*/
168 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
169 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
170 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
171 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
172 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
173 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
174 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
175 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
176 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
177 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
178 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
179 	/* SQC range 1*/
180 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
181 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
182 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
183 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
184 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
185 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
187 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
188 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
189 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
190 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
191 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
192 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
193 	/* SQC range 2*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
196 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
197 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
203 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
204 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
205 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
206 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
207 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
208 	/* TA*/
209 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
210 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
211 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
212 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
213 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
214 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
215 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
216 	/* TCA*/
217 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
218 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
219 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
220 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
221 	/* TCC (5 sub-ranges)*/
222 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
223 	/* TCC range 0*/
224 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
225 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
226 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
227 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
228 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
229 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
230 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
231 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
232 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
233 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
234 	/* TCC range 1*/
235 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
236 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
237 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
238 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
239 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
240 	/* TCC range 2*/
241 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
242 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
243 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
244 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
245 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
246 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
247 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
248 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
250 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
251 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
252 	/* TCC range 3*/
253 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
254 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
255 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
256 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
257 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
258 	/* TCC range 4*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
260 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
261 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
262 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
263 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
264 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
265 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
266 	/* TCI*/
267 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
268 	/* TCP*/
269 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
270 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
271 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
272 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
273 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
274 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
275 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
276 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
277 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
278 	/* TD*/
279 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
282 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
283 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
284 	/* EA (3 sub-ranges)*/
285 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
286 	/* EA range 0*/
287 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
288 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
289 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
290 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
291 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
292 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
293 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
294 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
295 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
296 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
297 	/* EA range 1*/
298 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
301 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
302 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
303 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
307 	/* EA range 2*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
309 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
310 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
311 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
312 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
313 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
314 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
315 	/* UTC VM L2 bank*/
316 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
317 	/* UTC VM walker*/
318 	TA_RAS_BLOCK__UTC_VML2_WALKER,
319 	/* UTC ATC L2 2MB cache*/
320 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
321 	/* UTC ATC L2 4KB cache*/
322 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
323 	TA_RAS_BLOCK__GFX_MAX
324 };
325 
326 struct ras_gfx_subblock {
327 	unsigned char *name;
328 	int ta_subblock;
329 	int hw_supported_error_type;
330 	int sw_supported_error_type;
331 };
332 
333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
334 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
335 		#subblock,                                                     \
336 		TA_RAS_BLOCK__##subblock,                                      \
337 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
338 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
339 	}
340 
341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
342 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
343 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
344 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
345 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
346 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
347 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
348 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
349 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
350 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
351 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
359 			     0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
361 			     0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
370 			     0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
372 			     0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
374 			     0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
376 			     0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378 			     0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380 			     0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
382 			     1),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
384 			     0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
388 			     0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
392 			     0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394 			     0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
396 			     0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
402 			     0, 0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406 			     0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
414 			     0, 0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
426 			     1),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
428 			     1),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
430 			     1),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
447 			     0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
450 			     0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
452 			     0, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
454 			     0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
489 };
490 
491 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
492 {
493 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
494 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
495 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
496 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
497 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
498 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
499 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
500 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
501 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
502 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
535 };
536 
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
538 {
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
550 };
551 
552 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
553 {
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
578 };
579 
580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
581 {
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
589 };
590 
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
592 {
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 };
613 
614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
615 {
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
619 };
620 
621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
622 {
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
639 };
640 
641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
642 {
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
656 };
657 
658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
659 {
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
668 };
669 
670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
671 {
672 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
673 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
674 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
675 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
676 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
677 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
678 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
679 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
680 };
681 
682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
683 {
684 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
685 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
686 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
687 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
688 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
689 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
690 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
691 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
692 };
693 
694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
698 
699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
704                                  struct amdgpu_cu_info *cu_info);
705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
710 					  void *ras_error_status);
711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
712 				     void *inject_if);
713 
714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
715 {
716 	switch (adev->asic_type) {
717 	case CHIP_VEGA10:
718 		soc15_program_register_sequence(adev,
719 						golden_settings_gc_9_0,
720 						ARRAY_SIZE(golden_settings_gc_9_0));
721 		soc15_program_register_sequence(adev,
722 						golden_settings_gc_9_0_vg10,
723 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
724 		break;
725 	case CHIP_VEGA12:
726 		soc15_program_register_sequence(adev,
727 						golden_settings_gc_9_2_1,
728 						ARRAY_SIZE(golden_settings_gc_9_2_1));
729 		soc15_program_register_sequence(adev,
730 						golden_settings_gc_9_2_1_vg12,
731 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
732 		break;
733 	case CHIP_VEGA20:
734 		soc15_program_register_sequence(adev,
735 						golden_settings_gc_9_0,
736 						ARRAY_SIZE(golden_settings_gc_9_0));
737 		soc15_program_register_sequence(adev,
738 						golden_settings_gc_9_0_vg20,
739 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
740 		break;
741 	case CHIP_ARCTURUS:
742 		soc15_program_register_sequence(adev,
743 						golden_settings_gc_9_4_1_arct,
744 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
745 		break;
746 	case CHIP_RAVEN:
747 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
748 						ARRAY_SIZE(golden_settings_gc_9_1));
749 		if (adev->rev_id >= 8)
750 			soc15_program_register_sequence(adev,
751 							golden_settings_gc_9_1_rv2,
752 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
753 		else
754 			soc15_program_register_sequence(adev,
755 							golden_settings_gc_9_1_rv1,
756 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
757 		break;
758 	default:
759 		break;
760 	}
761 
762 	if (adev->asic_type != CHIP_ARCTURUS)
763 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
764 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
765 }
766 
767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
768 {
769 	adev->gfx.scratch.num_reg = 8;
770 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
771 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773 
774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
775 				       bool wc, uint32_t reg, uint32_t val)
776 {
777 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
778 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
779 				WRITE_DATA_DST_SEL(0) |
780 				(wc ? WR_CONFIRM : 0));
781 	amdgpu_ring_write(ring, reg);
782 	amdgpu_ring_write(ring, 0);
783 	amdgpu_ring_write(ring, val);
784 }
785 
786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
787 				  int mem_space, int opt, uint32_t addr0,
788 				  uint32_t addr1, uint32_t ref, uint32_t mask,
789 				  uint32_t inv)
790 {
791 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
792 	amdgpu_ring_write(ring,
793 				 /* memory (1) or register (0) */
794 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
795 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
796 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
797 				 WAIT_REG_MEM_ENGINE(eng_sel)));
798 
799 	if (mem_space)
800 		BUG_ON(addr0 & 0x3); /* Dword align */
801 	amdgpu_ring_write(ring, addr0);
802 	amdgpu_ring_write(ring, addr1);
803 	amdgpu_ring_write(ring, ref);
804 	amdgpu_ring_write(ring, mask);
805 	amdgpu_ring_write(ring, inv); /* poll interval */
806 }
807 
808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
809 {
810 	struct amdgpu_device *adev = ring->adev;
811 	uint32_t scratch;
812 	uint32_t tmp = 0;
813 	unsigned i;
814 	int r;
815 
816 	r = amdgpu_gfx_scratch_get(adev, &scratch);
817 	if (r)
818 		return r;
819 
820 	WREG32(scratch, 0xCAFEDEAD);
821 	r = amdgpu_ring_alloc(ring, 3);
822 	if (r)
823 		goto error_free_scratch;
824 
825 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
826 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
827 	amdgpu_ring_write(ring, 0xDEADBEEF);
828 	amdgpu_ring_commit(ring);
829 
830 	for (i = 0; i < adev->usec_timeout; i++) {
831 		tmp = RREG32(scratch);
832 		if (tmp == 0xDEADBEEF)
833 			break;
834 		udelay(1);
835 	}
836 
837 	if (i >= adev->usec_timeout)
838 		r = -ETIMEDOUT;
839 
840 error_free_scratch:
841 	amdgpu_gfx_scratch_free(adev, scratch);
842 	return r;
843 }
844 
845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
846 {
847 	struct amdgpu_device *adev = ring->adev;
848 	struct amdgpu_ib ib;
849 	struct dma_fence *f = NULL;
850 
851 	unsigned index;
852 	uint64_t gpu_addr;
853 	uint32_t tmp;
854 	long r;
855 
856 	r = amdgpu_device_wb_get(adev, &index);
857 	if (r)
858 		return r;
859 
860 	gpu_addr = adev->wb.gpu_addr + (index * 4);
861 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
862 	memset(&ib, 0, sizeof(ib));
863 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
864 	if (r)
865 		goto err1;
866 
867 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
868 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
869 	ib.ptr[2] = lower_32_bits(gpu_addr);
870 	ib.ptr[3] = upper_32_bits(gpu_addr);
871 	ib.ptr[4] = 0xDEADBEEF;
872 	ib.length_dw = 5;
873 
874 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
875 	if (r)
876 		goto err2;
877 
878 	r = dma_fence_wait_timeout(f, false, timeout);
879 	if (r == 0) {
880 		r = -ETIMEDOUT;
881 		goto err2;
882 	} else if (r < 0) {
883 		goto err2;
884 	}
885 
886 	tmp = adev->wb.wb[index];
887 	if (tmp == 0xDEADBEEF)
888 		r = 0;
889 	else
890 		r = -EINVAL;
891 
892 err2:
893 	amdgpu_ib_free(adev, &ib, NULL);
894 	dma_fence_put(f);
895 err1:
896 	amdgpu_device_wb_free(adev, index);
897 	return r;
898 }
899 
900 
901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
902 {
903 	release_firmware(adev->gfx.pfp_fw);
904 	adev->gfx.pfp_fw = NULL;
905 	release_firmware(adev->gfx.me_fw);
906 	adev->gfx.me_fw = NULL;
907 	release_firmware(adev->gfx.ce_fw);
908 	adev->gfx.ce_fw = NULL;
909 	release_firmware(adev->gfx.rlc_fw);
910 	adev->gfx.rlc_fw = NULL;
911 	release_firmware(adev->gfx.mec_fw);
912 	adev->gfx.mec_fw = NULL;
913 	release_firmware(adev->gfx.mec2_fw);
914 	adev->gfx.mec2_fw = NULL;
915 
916 	kfree(adev->gfx.rlc.register_list_format);
917 }
918 
919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
920 {
921 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
922 
923 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
924 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
925 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
926 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
927 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
928 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
929 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
930 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
931 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
932 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
933 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
934 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
935 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
936 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
937 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
938 }
939 
940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
941 {
942 	adev->gfx.me_fw_write_wait = false;
943 	adev->gfx.mec_fw_write_wait = false;
944 
945 	switch (adev->asic_type) {
946 	case CHIP_VEGA10:
947 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
948 		    (adev->gfx.me_feature_version >= 42) &&
949 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
950 		    (adev->gfx.pfp_feature_version >= 42))
951 			adev->gfx.me_fw_write_wait = true;
952 
953 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
954 		    (adev->gfx.mec_feature_version >= 42))
955 			adev->gfx.mec_fw_write_wait = true;
956 		break;
957 	case CHIP_VEGA12:
958 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
959 		    (adev->gfx.me_feature_version >= 44) &&
960 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
961 		    (adev->gfx.pfp_feature_version >= 44))
962 			adev->gfx.me_fw_write_wait = true;
963 
964 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
965 		    (adev->gfx.mec_feature_version >= 44))
966 			adev->gfx.mec_fw_write_wait = true;
967 		break;
968 	case CHIP_VEGA20:
969 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
970 		    (adev->gfx.me_feature_version >= 44) &&
971 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
972 		    (adev->gfx.pfp_feature_version >= 44))
973 			adev->gfx.me_fw_write_wait = true;
974 
975 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
976 		    (adev->gfx.mec_feature_version >= 44))
977 			adev->gfx.mec_fw_write_wait = true;
978 		break;
979 	case CHIP_RAVEN:
980 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981 		    (adev->gfx.me_feature_version >= 42) &&
982 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
983 		    (adev->gfx.pfp_feature_version >= 42))
984 			adev->gfx.me_fw_write_wait = true;
985 
986 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
987 		    (adev->gfx.mec_feature_version >= 42))
988 			adev->gfx.mec_fw_write_wait = true;
989 		break;
990 	default:
991 		break;
992 	}
993 }
994 
995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
996 {
997 	switch (adev->asic_type) {
998 	case CHIP_VEGA10:
999 	case CHIP_VEGA12:
1000 	case CHIP_VEGA20:
1001 		break;
1002 	case CHIP_RAVEN:
1003 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1004 			break;
1005 		if ((adev->gfx.rlc_fw_version != 106 &&
1006 		     adev->gfx.rlc_fw_version < 531) ||
1007 		    (adev->gfx.rlc_fw_version == 53815) ||
1008 		    (adev->gfx.rlc_feature_version < 1) ||
1009 		    !adev->gfx.rlc.is_rlc_v2_1)
1010 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1011 		break;
1012 	default:
1013 		break;
1014 	}
1015 }
1016 
1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1018 					  const char *chip_name)
1019 {
1020 	char fw_name[30];
1021 	int err;
1022 	struct amdgpu_firmware_info *info = NULL;
1023 	const struct common_firmware_header *header = NULL;
1024 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1025 
1026 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1027 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1028 	if (err)
1029 		goto out;
1030 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1031 	if (err)
1032 		goto out;
1033 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1034 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1035 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1036 
1037 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1038 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1039 	if (err)
1040 		goto out;
1041 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1042 	if (err)
1043 		goto out;
1044 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1045 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1046 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047 
1048 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050 	if (err)
1051 		goto out;
1052 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1053 	if (err)
1054 		goto out;
1055 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1056 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1057 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1058 
1059 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1060 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1061 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1062 		info->fw = adev->gfx.pfp_fw;
1063 		header = (const struct common_firmware_header *)info->fw->data;
1064 		adev->firmware.fw_size +=
1065 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1066 
1067 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1068 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1069 		info->fw = adev->gfx.me_fw;
1070 		header = (const struct common_firmware_header *)info->fw->data;
1071 		adev->firmware.fw_size +=
1072 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073 
1074 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1075 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1076 		info->fw = adev->gfx.ce_fw;
1077 		header = (const struct common_firmware_header *)info->fw->data;
1078 		adev->firmware.fw_size +=
1079 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080 	}
1081 
1082 out:
1083 	if (err) {
1084 		dev_err(adev->dev,
1085 			"gfx9: Failed to load firmware \"%s\"\n",
1086 			fw_name);
1087 		release_firmware(adev->gfx.pfp_fw);
1088 		adev->gfx.pfp_fw = NULL;
1089 		release_firmware(adev->gfx.me_fw);
1090 		adev->gfx.me_fw = NULL;
1091 		release_firmware(adev->gfx.ce_fw);
1092 		adev->gfx.ce_fw = NULL;
1093 	}
1094 	return err;
1095 }
1096 
1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1098 					  const char *chip_name)
1099 {
1100 	char fw_name[30];
1101 	int err;
1102 	struct amdgpu_firmware_info *info = NULL;
1103 	const struct common_firmware_header *header = NULL;
1104 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1105 	unsigned int *tmp = NULL;
1106 	unsigned int i = 0;
1107 	uint16_t version_major;
1108 	uint16_t version_minor;
1109 	uint32_t smu_version;
1110 
1111 	/*
1112 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1113 	 * instead of picasso_rlc.bin.
1114 	 * Judgment method:
1115 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1116 	 *          or revision >= 0xD8 && revision <= 0xDF
1117 	 * otherwise is PCO FP5
1118 	 */
1119 	if (!strcmp(chip_name, "picasso") &&
1120 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1121 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1122 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1123 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1124 		(smu_version >= 0x41e2b))
1125 		/**
1126 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1127 		*/
1128 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1129 	else
1130 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1131 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1132 	if (err)
1133 		goto out;
1134 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1135 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1136 
1137 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1138 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1139 	if (version_major == 2 && version_minor == 1)
1140 		adev->gfx.rlc.is_rlc_v2_1 = true;
1141 
1142 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1143 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1144 	adev->gfx.rlc.save_and_restore_offset =
1145 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1146 	adev->gfx.rlc.clear_state_descriptor_offset =
1147 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1148 	adev->gfx.rlc.avail_scratch_ram_locations =
1149 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1150 	adev->gfx.rlc.reg_restore_list_size =
1151 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1152 	adev->gfx.rlc.reg_list_format_start =
1153 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1154 	adev->gfx.rlc.reg_list_format_separate_start =
1155 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1156 	adev->gfx.rlc.starting_offsets_start =
1157 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1158 	adev->gfx.rlc.reg_list_format_size_bytes =
1159 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1160 	adev->gfx.rlc.reg_list_size_bytes =
1161 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1162 	adev->gfx.rlc.register_list_format =
1163 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1164 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1165 	if (!adev->gfx.rlc.register_list_format) {
1166 		err = -ENOMEM;
1167 		goto out;
1168 	}
1169 
1170 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1171 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1172 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1173 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1174 
1175 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1176 
1177 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1179 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1180 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1181 
1182 	if (adev->gfx.rlc.is_rlc_v2_1)
1183 		gfx_v9_0_init_rlc_ext_microcode(adev);
1184 
1185 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1186 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188 		info->fw = adev->gfx.rlc_fw;
1189 		header = (const struct common_firmware_header *)info->fw->data;
1190 		adev->firmware.fw_size +=
1191 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192 
1193 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1194 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1195 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1196 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1197 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1198 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1199 			info->fw = adev->gfx.rlc_fw;
1200 			adev->firmware.fw_size +=
1201 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1202 
1203 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1204 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1205 			info->fw = adev->gfx.rlc_fw;
1206 			adev->firmware.fw_size +=
1207 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1208 
1209 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1210 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1211 			info->fw = adev->gfx.rlc_fw;
1212 			adev->firmware.fw_size +=
1213 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1214 		}
1215 	}
1216 
1217 out:
1218 	if (err) {
1219 		dev_err(adev->dev,
1220 			"gfx9: Failed to load firmware \"%s\"\n",
1221 			fw_name);
1222 		release_firmware(adev->gfx.rlc_fw);
1223 		adev->gfx.rlc_fw = NULL;
1224 	}
1225 	return err;
1226 }
1227 
1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1229 					  const char *chip_name)
1230 {
1231 	char fw_name[30];
1232 	int err;
1233 	struct amdgpu_firmware_info *info = NULL;
1234 	const struct common_firmware_header *header = NULL;
1235 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1236 
1237 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1238 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1239 	if (err)
1240 		goto out;
1241 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1242 	if (err)
1243 		goto out;
1244 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1245 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1246 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1247 
1248 
1249 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1250 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1251 	if (!err) {
1252 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1253 		if (err)
1254 			goto out;
1255 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1256 		adev->gfx.mec2_fw->data;
1257 		adev->gfx.mec2_fw_version =
1258 		le32_to_cpu(cp_hdr->header.ucode_version);
1259 		adev->gfx.mec2_feature_version =
1260 		le32_to_cpu(cp_hdr->ucode_feature_version);
1261 	} else {
1262 		err = 0;
1263 		adev->gfx.mec2_fw = NULL;
1264 	}
1265 
1266 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1267 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1268 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1269 		info->fw = adev->gfx.mec_fw;
1270 		header = (const struct common_firmware_header *)info->fw->data;
1271 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1272 		adev->firmware.fw_size +=
1273 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1274 
1275 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1276 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1277 		info->fw = adev->gfx.mec_fw;
1278 		adev->firmware.fw_size +=
1279 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1280 
1281 		if (adev->gfx.mec2_fw) {
1282 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1283 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1284 			info->fw = adev->gfx.mec2_fw;
1285 			header = (const struct common_firmware_header *)info->fw->data;
1286 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1287 			adev->firmware.fw_size +=
1288 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1289 
1290 			/* TODO: Determine if MEC2 JT FW loading can be removed
1291 				 for all GFX V9 asic and above */
1292 			if (adev->asic_type != CHIP_ARCTURUS) {
1293 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1294 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1295 				info->fw = adev->gfx.mec2_fw;
1296 				adev->firmware.fw_size +=
1297 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1298 					PAGE_SIZE);
1299 			}
1300 		}
1301 	}
1302 
1303 out:
1304 	gfx_v9_0_check_if_need_gfxoff(adev);
1305 	gfx_v9_0_check_fw_write_wait(adev);
1306 	if (err) {
1307 		dev_err(adev->dev,
1308 			"gfx9: Failed to load firmware \"%s\"\n",
1309 			fw_name);
1310 		release_firmware(adev->gfx.mec_fw);
1311 		adev->gfx.mec_fw = NULL;
1312 		release_firmware(adev->gfx.mec2_fw);
1313 		adev->gfx.mec2_fw = NULL;
1314 	}
1315 	return err;
1316 }
1317 
1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1319 {
1320 	const char *chip_name;
1321 	int r;
1322 
1323 	DRM_DEBUG("\n");
1324 
1325 	switch (adev->asic_type) {
1326 	case CHIP_VEGA10:
1327 		chip_name = "vega10";
1328 		break;
1329 	case CHIP_VEGA12:
1330 		chip_name = "vega12";
1331 		break;
1332 	case CHIP_VEGA20:
1333 		chip_name = "vega20";
1334 		break;
1335 	case CHIP_RAVEN:
1336 		if (adev->rev_id >= 8)
1337 			chip_name = "raven2";
1338 		else if (adev->pdev->device == 0x15d8)
1339 			chip_name = "picasso";
1340 		else
1341 			chip_name = "raven";
1342 		break;
1343 	case CHIP_ARCTURUS:
1344 		chip_name = "arcturus";
1345 		break;
1346 	default:
1347 		BUG();
1348 	}
1349 
1350 	/* No CPG in Arcturus */
1351 	if (adev->asic_type != CHIP_ARCTURUS) {
1352 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1353 		if (r)
1354 			return r;
1355 	}
1356 
1357 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1358 	if (r)
1359 		return r;
1360 
1361 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1362 	if (r)
1363 		return r;
1364 
1365 	return r;
1366 }
1367 
1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1369 {
1370 	u32 count = 0;
1371 	const struct cs_section_def *sect = NULL;
1372 	const struct cs_extent_def *ext = NULL;
1373 
1374 	/* begin clear state */
1375 	count += 2;
1376 	/* context control state */
1377 	count += 3;
1378 
1379 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1380 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1381 			if (sect->id == SECT_CONTEXT)
1382 				count += 2 + ext->reg_count;
1383 			else
1384 				return 0;
1385 		}
1386 	}
1387 
1388 	/* end clear state */
1389 	count += 2;
1390 	/* clear state */
1391 	count += 2;
1392 
1393 	return count;
1394 }
1395 
1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1397 				    volatile u32 *buffer)
1398 {
1399 	u32 count = 0, i;
1400 	const struct cs_section_def *sect = NULL;
1401 	const struct cs_extent_def *ext = NULL;
1402 
1403 	if (adev->gfx.rlc.cs_data == NULL)
1404 		return;
1405 	if (buffer == NULL)
1406 		return;
1407 
1408 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1409 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1410 
1411 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1412 	buffer[count++] = cpu_to_le32(0x80000000);
1413 	buffer[count++] = cpu_to_le32(0x80000000);
1414 
1415 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1416 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1417 			if (sect->id == SECT_CONTEXT) {
1418 				buffer[count++] =
1419 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1420 				buffer[count++] = cpu_to_le32(ext->reg_index -
1421 						PACKET3_SET_CONTEXT_REG_START);
1422 				for (i = 0; i < ext->reg_count; i++)
1423 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1424 			} else {
1425 				return;
1426 			}
1427 		}
1428 	}
1429 
1430 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1431 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1432 
1433 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1434 	buffer[count++] = cpu_to_le32(0);
1435 }
1436 
1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1438 {
1439 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1440 	uint32_t pg_always_on_cu_num = 2;
1441 	uint32_t always_on_cu_num;
1442 	uint32_t i, j, k;
1443 	uint32_t mask, cu_bitmap, counter;
1444 
1445 	if (adev->flags & AMD_IS_APU)
1446 		always_on_cu_num = 4;
1447 	else if (adev->asic_type == CHIP_VEGA12)
1448 		always_on_cu_num = 8;
1449 	else
1450 		always_on_cu_num = 12;
1451 
1452 	mutex_lock(&adev->grbm_idx_mutex);
1453 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1454 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1455 			mask = 1;
1456 			cu_bitmap = 0;
1457 			counter = 0;
1458 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1459 
1460 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1461 				if (cu_info->bitmap[i][j] & mask) {
1462 					if (counter == pg_always_on_cu_num)
1463 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1464 					if (counter < always_on_cu_num)
1465 						cu_bitmap |= mask;
1466 					else
1467 						break;
1468 					counter++;
1469 				}
1470 				mask <<= 1;
1471 			}
1472 
1473 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1474 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1475 		}
1476 	}
1477 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1478 	mutex_unlock(&adev->grbm_idx_mutex);
1479 }
1480 
1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1482 {
1483 	uint32_t data;
1484 
1485 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1486 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1487 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1488 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1489 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1490 
1491 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1492 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1493 
1494 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1495 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1496 
1497 	mutex_lock(&adev->grbm_idx_mutex);
1498 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1499 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1500 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1501 
1502 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1503 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1504 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1505 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1506 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1507 
1508 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1509 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1510 	data &= 0x0000FFFF;
1511 	data |= 0x00C00000;
1512 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1513 
1514 	/*
1515 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1516 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1517 	 */
1518 
1519 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1520 	 * but used for RLC_LB_CNTL configuration */
1521 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1522 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1523 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1524 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1525 	mutex_unlock(&adev->grbm_idx_mutex);
1526 
1527 	gfx_v9_0_init_always_on_cu_mask(adev);
1528 }
1529 
1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1531 {
1532 	uint32_t data;
1533 
1534 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1535 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1536 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1537 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1538 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1539 
1540 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1541 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1542 
1543 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1544 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1545 
1546 	mutex_lock(&adev->grbm_idx_mutex);
1547 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1548 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1550 
1551 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1552 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1553 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1554 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1555 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1556 
1557 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1558 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1559 	data &= 0x0000FFFF;
1560 	data |= 0x00C00000;
1561 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1562 
1563 	/*
1564 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1565 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1566 	 */
1567 
1568 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1569 	 * but used for RLC_LB_CNTL configuration */
1570 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1571 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1572 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1573 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1574 	mutex_unlock(&adev->grbm_idx_mutex);
1575 
1576 	gfx_v9_0_init_always_on_cu_mask(adev);
1577 }
1578 
1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1580 {
1581 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1582 }
1583 
1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1585 {
1586 	return 5;
1587 }
1588 
1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1590 {
1591 	const struct cs_section_def *cs_data;
1592 	int r;
1593 
1594 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1595 
1596 	cs_data = adev->gfx.rlc.cs_data;
1597 
1598 	if (cs_data) {
1599 		/* init clear state block */
1600 		r = amdgpu_gfx_rlc_init_csb(adev);
1601 		if (r)
1602 			return r;
1603 	}
1604 
1605 	if (adev->asic_type == CHIP_RAVEN) {
1606 		/* TODO: double check the cp_table_size for RV */
1607 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1608 		r = amdgpu_gfx_rlc_init_cpt(adev);
1609 		if (r)
1610 			return r;
1611 	}
1612 
1613 	switch (adev->asic_type) {
1614 	case CHIP_RAVEN:
1615 		gfx_v9_0_init_lbpw(adev);
1616 		break;
1617 	case CHIP_VEGA20:
1618 		gfx_v9_4_init_lbpw(adev);
1619 		break;
1620 	default:
1621 		break;
1622 	}
1623 
1624 	return 0;
1625 }
1626 
1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1628 {
1629 	int r;
1630 
1631 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1632 	if (unlikely(r != 0))
1633 		return r;
1634 
1635 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1636 			AMDGPU_GEM_DOMAIN_VRAM);
1637 	if (!r)
1638 		adev->gfx.rlc.clear_state_gpu_addr =
1639 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1640 
1641 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1642 
1643 	return r;
1644 }
1645 
1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1647 {
1648 	int r;
1649 
1650 	if (!adev->gfx.rlc.clear_state_obj)
1651 		return;
1652 
1653 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1654 	if (likely(r == 0)) {
1655 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1656 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1657 	}
1658 }
1659 
1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1661 {
1662 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1663 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1664 }
1665 
1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1667 {
1668 	int r;
1669 	u32 *hpd;
1670 	const __le32 *fw_data;
1671 	unsigned fw_size;
1672 	u32 *fw;
1673 	size_t mec_hpd_size;
1674 
1675 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1676 
1677 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1678 
1679 	/* take ownership of the relevant compute queues */
1680 	amdgpu_gfx_compute_queue_acquire(adev);
1681 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1682 
1683 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1684 				      AMDGPU_GEM_DOMAIN_VRAM,
1685 				      &adev->gfx.mec.hpd_eop_obj,
1686 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1687 				      (void **)&hpd);
1688 	if (r) {
1689 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1690 		gfx_v9_0_mec_fini(adev);
1691 		return r;
1692 	}
1693 
1694 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1695 
1696 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1697 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1698 
1699 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1700 
1701 	fw_data = (const __le32 *)
1702 		(adev->gfx.mec_fw->data +
1703 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1704 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1705 
1706 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1707 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1708 				      &adev->gfx.mec.mec_fw_obj,
1709 				      &adev->gfx.mec.mec_fw_gpu_addr,
1710 				      (void **)&fw);
1711 	if (r) {
1712 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1713 		gfx_v9_0_mec_fini(adev);
1714 		return r;
1715 	}
1716 
1717 	memcpy(fw, fw_data, fw_size);
1718 
1719 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1720 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1721 
1722 	return 0;
1723 }
1724 
1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1726 {
1727 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1728 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1729 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1730 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1731 		(SQ_IND_INDEX__FORCE_READ_MASK));
1732 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1733 }
1734 
1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1736 			   uint32_t wave, uint32_t thread,
1737 			   uint32_t regno, uint32_t num, uint32_t *out)
1738 {
1739 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1740 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1741 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1742 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1743 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1744 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1745 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1746 	while (num--)
1747 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1748 }
1749 
1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1751 {
1752 	/* type 1 wave data */
1753 	dst[(*no_fields)++] = 1;
1754 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1755 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1756 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1757 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1758 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1759 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1760 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1761 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1762 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1763 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1764 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1765 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1766 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1767 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1768 }
1769 
1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1771 				     uint32_t wave, uint32_t start,
1772 				     uint32_t size, uint32_t *dst)
1773 {
1774 	wave_read_regs(
1775 		adev, simd, wave, 0,
1776 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1777 }
1778 
1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1780 				     uint32_t wave, uint32_t thread,
1781 				     uint32_t start, uint32_t size,
1782 				     uint32_t *dst)
1783 {
1784 	wave_read_regs(
1785 		adev, simd, wave, thread,
1786 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1787 }
1788 
1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1790 				  u32 me, u32 pipe, u32 q, u32 vm)
1791 {
1792 	soc15_grbm_select(adev, me, pipe, q, vm);
1793 }
1794 
1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1796 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1797 	.select_se_sh = &gfx_v9_0_select_se_sh,
1798 	.read_wave_data = &gfx_v9_0_read_wave_data,
1799 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1800 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1801 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1802 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1803 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1804 };
1805 
1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1807 {
1808 	u32 gb_addr_config;
1809 	int err;
1810 
1811 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1812 
1813 	switch (adev->asic_type) {
1814 	case CHIP_VEGA10:
1815 		adev->gfx.config.max_hw_contexts = 8;
1816 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1820 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1821 		break;
1822 	case CHIP_VEGA12:
1823 		adev->gfx.config.max_hw_contexts = 8;
1824 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1825 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1826 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1827 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1828 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1829 		DRM_INFO("fix gfx.config for vega12\n");
1830 		break;
1831 	case CHIP_VEGA20:
1832 		adev->gfx.config.max_hw_contexts = 8;
1833 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1837 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1838 		gb_addr_config &= ~0xf3e777ff;
1839 		gb_addr_config |= 0x22014042;
1840 		/* check vbios table if gpu info is not available */
1841 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1842 		if (err)
1843 			return err;
1844 		break;
1845 	case CHIP_RAVEN:
1846 		adev->gfx.config.max_hw_contexts = 8;
1847 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851 		if (adev->rev_id >= 8)
1852 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1853 		else
1854 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1855 		break;
1856 	case CHIP_ARCTURUS:
1857 		adev->gfx.config.max_hw_contexts = 8;
1858 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1859 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1860 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1861 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1862 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1863 		gb_addr_config &= ~0xf3e777ff;
1864 		gb_addr_config |= 0x22014042;
1865 		break;
1866 	default:
1867 		BUG();
1868 		break;
1869 	}
1870 
1871 	adev->gfx.config.gb_addr_config = gb_addr_config;
1872 
1873 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1874 			REG_GET_FIELD(
1875 					adev->gfx.config.gb_addr_config,
1876 					GB_ADDR_CONFIG,
1877 					NUM_PIPES);
1878 
1879 	adev->gfx.config.max_tile_pipes =
1880 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1881 
1882 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1883 			REG_GET_FIELD(
1884 					adev->gfx.config.gb_addr_config,
1885 					GB_ADDR_CONFIG,
1886 					NUM_BANKS);
1887 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1888 			REG_GET_FIELD(
1889 					adev->gfx.config.gb_addr_config,
1890 					GB_ADDR_CONFIG,
1891 					MAX_COMPRESSED_FRAGS);
1892 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1893 			REG_GET_FIELD(
1894 					adev->gfx.config.gb_addr_config,
1895 					GB_ADDR_CONFIG,
1896 					NUM_RB_PER_SE);
1897 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1898 			REG_GET_FIELD(
1899 					adev->gfx.config.gb_addr_config,
1900 					GB_ADDR_CONFIG,
1901 					NUM_SHADER_ENGINES);
1902 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1903 			REG_GET_FIELD(
1904 					adev->gfx.config.gb_addr_config,
1905 					GB_ADDR_CONFIG,
1906 					PIPE_INTERLEAVE_SIZE));
1907 
1908 	return 0;
1909 }
1910 
1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1912 				   struct amdgpu_ngg_buf *ngg_buf,
1913 				   int size_se,
1914 				   int default_size_se)
1915 {
1916 	int r;
1917 
1918 	if (size_se < 0) {
1919 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1920 		return -EINVAL;
1921 	}
1922 	size_se = size_se ? size_se : default_size_se;
1923 
1924 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1925 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1926 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1927 				    &ngg_buf->bo,
1928 				    &ngg_buf->gpu_addr,
1929 				    NULL);
1930 	if (r) {
1931 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1932 		return r;
1933 	}
1934 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1935 
1936 	return r;
1937 }
1938 
1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1940 {
1941 	int i;
1942 
1943 	for (i = 0; i < NGG_BUF_MAX; i++)
1944 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1945 				      &adev->gfx.ngg.buf[i].gpu_addr,
1946 				      NULL);
1947 
1948 	memset(&adev->gfx.ngg.buf[0], 0,
1949 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1950 
1951 	adev->gfx.ngg.init = false;
1952 
1953 	return 0;
1954 }
1955 
1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1957 {
1958 	int r;
1959 
1960 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1961 		return 0;
1962 
1963 	/* GDS reserve memory: 64 bytes alignment */
1964 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1965 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1966 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1967 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1968 
1969 	/* Primitive Buffer */
1970 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1971 				    amdgpu_prim_buf_per_se,
1972 				    64 * 1024);
1973 	if (r) {
1974 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1975 		goto err;
1976 	}
1977 
1978 	/* Position Buffer */
1979 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1980 				    amdgpu_pos_buf_per_se,
1981 				    256 * 1024);
1982 	if (r) {
1983 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1984 		goto err;
1985 	}
1986 
1987 	/* Control Sideband */
1988 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1989 				    amdgpu_cntl_sb_buf_per_se,
1990 				    256);
1991 	if (r) {
1992 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1993 		goto err;
1994 	}
1995 
1996 	/* Parameter Cache, not created by default */
1997 	if (amdgpu_param_buf_per_se <= 0)
1998 		goto out;
1999 
2000 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2001 				    amdgpu_param_buf_per_se,
2002 				    512 * 1024);
2003 	if (r) {
2004 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2005 		goto err;
2006 	}
2007 
2008 out:
2009 	adev->gfx.ngg.init = true;
2010 	return 0;
2011 err:
2012 	gfx_v9_0_ngg_fini(adev);
2013 	return r;
2014 }
2015 
2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2017 {
2018 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2019 	int r;
2020 	u32 data, base;
2021 
2022 	if (!amdgpu_ngg)
2023 		return 0;
2024 
2025 	/* Program buffer size */
2026 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2027 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2028 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2029 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2030 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2031 
2032 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2033 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2034 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2035 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2036 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2037 
2038 	/* Program buffer base address */
2039 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2040 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2041 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2042 
2043 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2044 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2045 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2046 
2047 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2048 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2049 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2050 
2051 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2052 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2053 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2054 
2055 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2056 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2057 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2058 
2059 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2060 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2061 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2062 
2063 	/* Clear GDS reserved memory */
2064 	r = amdgpu_ring_alloc(ring, 17);
2065 	if (r) {
2066 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2067 			  ring->name, r);
2068 		return r;
2069 	}
2070 
2071 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2072 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2073 			           (adev->gds.gds_size +
2074 				    adev->gfx.ngg.gds_reserve_size));
2075 
2076 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2077 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2078 				PACKET3_DMA_DATA_DST_SEL(1) |
2079 				PACKET3_DMA_DATA_SRC_SEL(2)));
2080 	amdgpu_ring_write(ring, 0);
2081 	amdgpu_ring_write(ring, 0);
2082 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2083 	amdgpu_ring_write(ring, 0);
2084 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2085 				adev->gfx.ngg.gds_reserve_size);
2086 
2087 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2088 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2089 
2090 	amdgpu_ring_commit(ring);
2091 
2092 	return 0;
2093 }
2094 
2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2096 				      int mec, int pipe, int queue)
2097 {
2098 	int r;
2099 	unsigned irq_type;
2100 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2101 
2102 	ring = &adev->gfx.compute_ring[ring_id];
2103 
2104 	/* mec0 is me1 */
2105 	ring->me = mec + 1;
2106 	ring->pipe = pipe;
2107 	ring->queue = queue;
2108 
2109 	ring->ring_obj = NULL;
2110 	ring->use_doorbell = true;
2111 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2112 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2113 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2114 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115 
2116 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118 		+ ring->pipe;
2119 
2120 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2121 	r = amdgpu_ring_init(adev, ring, 1024,
2122 			     &adev->gfx.eop_irq, irq_type);
2123 	if (r)
2124 		return r;
2125 
2126 
2127 	return 0;
2128 }
2129 
2130 static int gfx_v9_0_sw_init(void *handle)
2131 {
2132 	int i, j, k, r, ring_id;
2133 	struct amdgpu_ring *ring;
2134 	struct amdgpu_kiq *kiq;
2135 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2136 
2137 	switch (adev->asic_type) {
2138 	case CHIP_VEGA10:
2139 	case CHIP_VEGA12:
2140 	case CHIP_VEGA20:
2141 	case CHIP_RAVEN:
2142 	case CHIP_ARCTURUS:
2143 		adev->gfx.mec.num_mec = 2;
2144 		break;
2145 	default:
2146 		adev->gfx.mec.num_mec = 1;
2147 		break;
2148 	}
2149 
2150 	adev->gfx.mec.num_pipe_per_mec = 4;
2151 	adev->gfx.mec.num_queue_per_pipe = 8;
2152 
2153 	/* EOP Event */
2154 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2155 	if (r)
2156 		return r;
2157 
2158 	/* Privileged reg */
2159 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2160 			      &adev->gfx.priv_reg_irq);
2161 	if (r)
2162 		return r;
2163 
2164 	/* Privileged inst */
2165 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2166 			      &adev->gfx.priv_inst_irq);
2167 	if (r)
2168 		return r;
2169 
2170 	/* ECC error */
2171 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2172 			      &adev->gfx.cp_ecc_error_irq);
2173 	if (r)
2174 		return r;
2175 
2176 	/* FUE error */
2177 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2178 			      &adev->gfx.cp_ecc_error_irq);
2179 	if (r)
2180 		return r;
2181 
2182 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2183 
2184 	gfx_v9_0_scratch_init(adev);
2185 
2186 	r = gfx_v9_0_init_microcode(adev);
2187 	if (r) {
2188 		DRM_ERROR("Failed to load gfx firmware!\n");
2189 		return r;
2190 	}
2191 
2192 	r = adev->gfx.rlc.funcs->init(adev);
2193 	if (r) {
2194 		DRM_ERROR("Failed to init rlc BOs!\n");
2195 		return r;
2196 	}
2197 
2198 	r = gfx_v9_0_mec_init(adev);
2199 	if (r) {
2200 		DRM_ERROR("Failed to init MEC BOs!\n");
2201 		return r;
2202 	}
2203 
2204 	/* set up the gfx ring */
2205 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2206 		ring = &adev->gfx.gfx_ring[i];
2207 		ring->ring_obj = NULL;
2208 		if (!i)
2209 			sprintf(ring->name, "gfx");
2210 		else
2211 			sprintf(ring->name, "gfx_%d", i);
2212 		ring->use_doorbell = true;
2213 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2214 		r = amdgpu_ring_init(adev, ring, 1024,
2215 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2216 		if (r)
2217 			return r;
2218 	}
2219 
2220 	/* set up the compute queues - allocate horizontally across pipes */
2221 	ring_id = 0;
2222 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2223 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2224 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2225 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2226 					continue;
2227 
2228 				r = gfx_v9_0_compute_ring_init(adev,
2229 							       ring_id,
2230 							       i, k, j);
2231 				if (r)
2232 					return r;
2233 
2234 				ring_id++;
2235 			}
2236 		}
2237 	}
2238 
2239 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2240 	if (r) {
2241 		DRM_ERROR("Failed to init KIQ BOs!\n");
2242 		return r;
2243 	}
2244 
2245 	kiq = &adev->gfx.kiq;
2246 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2247 	if (r)
2248 		return r;
2249 
2250 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2251 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2252 	if (r)
2253 		return r;
2254 
2255 	adev->gfx.ce_ram_size = 0x8000;
2256 
2257 	r = gfx_v9_0_gpu_early_init(adev);
2258 	if (r)
2259 		return r;
2260 
2261 	r = gfx_v9_0_ngg_init(adev);
2262 	if (r)
2263 		return r;
2264 
2265 	return 0;
2266 }
2267 
2268 
2269 static int gfx_v9_0_sw_fini(void *handle)
2270 {
2271 	int i;
2272 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2273 
2274 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2275 			adev->gfx.ras_if) {
2276 		struct ras_common_if *ras_if = adev->gfx.ras_if;
2277 		struct ras_ih_if ih_info = {
2278 			.head = *ras_if,
2279 		};
2280 
2281 		amdgpu_ras_debugfs_remove(adev, ras_if);
2282 		amdgpu_ras_sysfs_remove(adev, ras_if);
2283 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2284 		amdgpu_ras_feature_enable(adev, ras_if, 0);
2285 		kfree(ras_if);
2286 	}
2287 
2288 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2289 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2290 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2291 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2292 
2293 	amdgpu_gfx_mqd_sw_fini(adev);
2294 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2295 	amdgpu_gfx_kiq_fini(adev);
2296 
2297 	gfx_v9_0_mec_fini(adev);
2298 	gfx_v9_0_ngg_fini(adev);
2299 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2300 	if (adev->asic_type == CHIP_RAVEN) {
2301 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2302 				&adev->gfx.rlc.cp_table_gpu_addr,
2303 				(void **)&adev->gfx.rlc.cp_table_ptr);
2304 	}
2305 	gfx_v9_0_free_microcode(adev);
2306 
2307 	return 0;
2308 }
2309 
2310 
2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2312 {
2313 	/* TODO */
2314 }
2315 
2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2317 {
2318 	u32 data;
2319 
2320 	if (instance == 0xffffffff)
2321 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2322 	else
2323 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2324 
2325 	if (se_num == 0xffffffff)
2326 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2327 	else
2328 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2329 
2330 	if (sh_num == 0xffffffff)
2331 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2332 	else
2333 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2334 
2335 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2336 }
2337 
2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2339 {
2340 	u32 data, mask;
2341 
2342 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2343 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2344 
2345 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2346 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2347 
2348 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2349 					 adev->gfx.config.max_sh_per_se);
2350 
2351 	return (~data) & mask;
2352 }
2353 
2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2355 {
2356 	int i, j;
2357 	u32 data;
2358 	u32 active_rbs = 0;
2359 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2360 					adev->gfx.config.max_sh_per_se;
2361 
2362 	mutex_lock(&adev->grbm_idx_mutex);
2363 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2364 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2365 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2366 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2367 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2368 					       rb_bitmap_width_per_sh);
2369 		}
2370 	}
2371 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2372 	mutex_unlock(&adev->grbm_idx_mutex);
2373 
2374 	adev->gfx.config.backend_enable_mask = active_rbs;
2375 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2376 }
2377 
2378 #define DEFAULT_SH_MEM_BASES	(0x6000)
2379 #define FIRST_COMPUTE_VMID	(8)
2380 #define LAST_COMPUTE_VMID	(16)
2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2382 {
2383 	int i;
2384 	uint32_t sh_mem_config;
2385 	uint32_t sh_mem_bases;
2386 
2387 	/*
2388 	 * Configure apertures:
2389 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2390 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2391 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2392 	 */
2393 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2394 
2395 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2396 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2397 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2398 
2399 	mutex_lock(&adev->srbm_mutex);
2400 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2401 		soc15_grbm_select(adev, 0, 0, 0, i);
2402 		/* CP and shaders */
2403 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2404 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2405 	}
2406 	soc15_grbm_select(adev, 0, 0, 0, 0);
2407 	mutex_unlock(&adev->srbm_mutex);
2408 
2409 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2410 	   acccess. These should be enabled by FW for target VMIDs. */
2411 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2412 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2413 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2414 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2415 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2416 	}
2417 }
2418 
2419 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2420 {
2421 	int vmid;
2422 
2423 	/*
2424 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2425 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2426 	 * the driver can enable them for graphics. VMID0 should maintain
2427 	 * access so that HWS firmware can save/restore entries.
2428 	 */
2429 	for (vmid = 1; vmid < 16; vmid++) {
2430 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2431 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2432 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2433 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2434 	}
2435 }
2436 
2437 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2438 {
2439 	u32 tmp;
2440 	int i;
2441 
2442 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2443 
2444 	gfx_v9_0_tiling_mode_table_init(adev);
2445 
2446 	gfx_v9_0_setup_rb(adev);
2447 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2448 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2449 
2450 	/* XXX SH_MEM regs */
2451 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2452 	mutex_lock(&adev->srbm_mutex);
2453 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2454 		soc15_grbm_select(adev, 0, 0, 0, i);
2455 		/* CP and shaders */
2456 		if (i == 0) {
2457 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2458 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2459 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2460 					    !!amdgpu_noretry);
2461 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2462 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2463 		} else {
2464 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2465 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2466 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2467 					    !!amdgpu_noretry);
2468 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2469 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2470 				(adev->gmc.private_aperture_start >> 48));
2471 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2472 				(adev->gmc.shared_aperture_start >> 48));
2473 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2474 		}
2475 	}
2476 	soc15_grbm_select(adev, 0, 0, 0, 0);
2477 
2478 	mutex_unlock(&adev->srbm_mutex);
2479 
2480 	gfx_v9_0_init_compute_vmid(adev);
2481 	gfx_v9_0_init_gds_vmid(adev);
2482 }
2483 
2484 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2485 {
2486 	u32 i, j, k;
2487 	u32 mask;
2488 
2489 	mutex_lock(&adev->grbm_idx_mutex);
2490 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2491 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2492 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2493 			for (k = 0; k < adev->usec_timeout; k++) {
2494 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2495 					break;
2496 				udelay(1);
2497 			}
2498 			if (k == adev->usec_timeout) {
2499 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2500 						      0xffffffff, 0xffffffff);
2501 				mutex_unlock(&adev->grbm_idx_mutex);
2502 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2503 					 i, j);
2504 				return;
2505 			}
2506 		}
2507 	}
2508 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2509 	mutex_unlock(&adev->grbm_idx_mutex);
2510 
2511 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2512 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2513 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2514 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2515 	for (k = 0; k < adev->usec_timeout; k++) {
2516 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2517 			break;
2518 		udelay(1);
2519 	}
2520 }
2521 
2522 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2523 					       bool enable)
2524 {
2525 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2526 
2527 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2528 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2529 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2530 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2531 
2532 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2533 }
2534 
2535 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2536 {
2537 	/* csib */
2538 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2539 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2540 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2541 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2542 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2543 			adev->gfx.rlc.clear_state_size);
2544 }
2545 
2546 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2547 				int indirect_offset,
2548 				int list_size,
2549 				int *unique_indirect_regs,
2550 				int unique_indirect_reg_count,
2551 				int *indirect_start_offsets,
2552 				int *indirect_start_offsets_count,
2553 				int max_start_offsets_count)
2554 {
2555 	int idx;
2556 
2557 	for (; indirect_offset < list_size; indirect_offset++) {
2558 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2559 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2560 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2561 
2562 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2563 			indirect_offset += 2;
2564 
2565 			/* look for the matching indice */
2566 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2567 				if (unique_indirect_regs[idx] ==
2568 					register_list_format[indirect_offset] ||
2569 					!unique_indirect_regs[idx])
2570 					break;
2571 			}
2572 
2573 			BUG_ON(idx >= unique_indirect_reg_count);
2574 
2575 			if (!unique_indirect_regs[idx])
2576 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2577 
2578 			indirect_offset++;
2579 		}
2580 	}
2581 }
2582 
2583 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2584 {
2585 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2586 	int unique_indirect_reg_count = 0;
2587 
2588 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2589 	int indirect_start_offsets_count = 0;
2590 
2591 	int list_size = 0;
2592 	int i = 0, j = 0;
2593 	u32 tmp = 0;
2594 
2595 	u32 *register_list_format =
2596 		kmemdup(adev->gfx.rlc.register_list_format,
2597 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2598 	if (!register_list_format)
2599 		return -ENOMEM;
2600 
2601 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2602 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2603 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2604 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2605 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2606 				    unique_indirect_regs,
2607 				    unique_indirect_reg_count,
2608 				    indirect_start_offsets,
2609 				    &indirect_start_offsets_count,
2610 				    ARRAY_SIZE(indirect_start_offsets));
2611 
2612 	/* enable auto inc in case it is disabled */
2613 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2614 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2615 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2616 
2617 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2618 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2619 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2620 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2621 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2622 			adev->gfx.rlc.register_restore[i]);
2623 
2624 	/* load indirect register */
2625 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2626 		adev->gfx.rlc.reg_list_format_start);
2627 
2628 	/* direct register portion */
2629 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2630 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2631 			register_list_format[i]);
2632 
2633 	/* indirect register portion */
2634 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2635 		if (register_list_format[i] == 0xFFFFFFFF) {
2636 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2637 			continue;
2638 		}
2639 
2640 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2641 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2642 
2643 		for (j = 0; j < unique_indirect_reg_count; j++) {
2644 			if (register_list_format[i] == unique_indirect_regs[j]) {
2645 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2646 				break;
2647 			}
2648 		}
2649 
2650 		BUG_ON(j >= unique_indirect_reg_count);
2651 
2652 		i++;
2653 	}
2654 
2655 	/* set save/restore list size */
2656 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2657 	list_size = list_size >> 1;
2658 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2659 		adev->gfx.rlc.reg_restore_list_size);
2660 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2661 
2662 	/* write the starting offsets to RLC scratch ram */
2663 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2664 		adev->gfx.rlc.starting_offsets_start);
2665 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2666 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2667 		       indirect_start_offsets[i]);
2668 
2669 	/* load unique indirect regs*/
2670 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2671 		if (unique_indirect_regs[i] != 0) {
2672 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2673 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2674 			       unique_indirect_regs[i] & 0x3FFFF);
2675 
2676 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2677 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2678 			       unique_indirect_regs[i] >> 20);
2679 		}
2680 	}
2681 
2682 	kfree(register_list_format);
2683 	return 0;
2684 }
2685 
2686 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2687 {
2688 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2689 }
2690 
2691 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2692 					     bool enable)
2693 {
2694 	uint32_t data = 0;
2695 	uint32_t default_data = 0;
2696 
2697 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2698 	if (enable == true) {
2699 		/* enable GFXIP control over CGPG */
2700 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2701 		if(default_data != data)
2702 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2703 
2704 		/* update status */
2705 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2706 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2707 		if(default_data != data)
2708 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2709 	} else {
2710 		/* restore GFXIP control over GCPG */
2711 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2712 		if(default_data != data)
2713 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2714 	}
2715 }
2716 
2717 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2718 {
2719 	uint32_t data = 0;
2720 
2721 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2722 			      AMD_PG_SUPPORT_GFX_SMG |
2723 			      AMD_PG_SUPPORT_GFX_DMG)) {
2724 		/* init IDLE_POLL_COUNT = 60 */
2725 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2726 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2727 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2728 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2729 
2730 		/* init RLC PG Delay */
2731 		data = 0;
2732 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2733 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2734 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2735 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2736 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2737 
2738 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2739 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2740 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2741 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2742 
2743 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2744 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2745 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2746 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2747 
2748 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2749 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2750 
2751 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2752 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2753 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2754 
2755 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2756 	}
2757 }
2758 
2759 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2760 						bool enable)
2761 {
2762 	uint32_t data = 0;
2763 	uint32_t default_data = 0;
2764 
2765 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2766 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2767 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2768 			     enable ? 1 : 0);
2769 	if (default_data != data)
2770 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2771 }
2772 
2773 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2774 						bool enable)
2775 {
2776 	uint32_t data = 0;
2777 	uint32_t default_data = 0;
2778 
2779 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2780 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2781 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2782 			     enable ? 1 : 0);
2783 	if(default_data != data)
2784 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2785 }
2786 
2787 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2788 					bool enable)
2789 {
2790 	uint32_t data = 0;
2791 	uint32_t default_data = 0;
2792 
2793 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795 			     CP_PG_DISABLE,
2796 			     enable ? 0 : 1);
2797 	if(default_data != data)
2798 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2799 }
2800 
2801 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2802 						bool enable)
2803 {
2804 	uint32_t data, default_data;
2805 
2806 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808 			     GFX_POWER_GATING_ENABLE,
2809 			     enable ? 1 : 0);
2810 	if(default_data != data)
2811 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2812 }
2813 
2814 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2815 						bool enable)
2816 {
2817 	uint32_t data, default_data;
2818 
2819 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2820 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2821 			     GFX_PIPELINE_PG_ENABLE,
2822 			     enable ? 1 : 0);
2823 	if(default_data != data)
2824 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2825 
2826 	if (!enable)
2827 		/* read any GFX register to wake up GFX */
2828 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2829 }
2830 
2831 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2832 						       bool enable)
2833 {
2834 	uint32_t data, default_data;
2835 
2836 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2837 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2838 			     STATIC_PER_CU_PG_ENABLE,
2839 			     enable ? 1 : 0);
2840 	if(default_data != data)
2841 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2842 }
2843 
2844 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2845 						bool enable)
2846 {
2847 	uint32_t data, default_data;
2848 
2849 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2850 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2851 			     DYN_PER_CU_PG_ENABLE,
2852 			     enable ? 1 : 0);
2853 	if(default_data != data)
2854 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2855 }
2856 
2857 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2858 {
2859 	gfx_v9_0_init_csb(adev);
2860 
2861 	/*
2862 	 * Rlc save restore list is workable since v2_1.
2863 	 * And it's needed by gfxoff feature.
2864 	 */
2865 	if (adev->gfx.rlc.is_rlc_v2_1) {
2866 		gfx_v9_1_init_rlc_save_restore_list(adev);
2867 		gfx_v9_0_enable_save_restore_machine(adev);
2868 	}
2869 
2870 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2871 			      AMD_PG_SUPPORT_GFX_SMG |
2872 			      AMD_PG_SUPPORT_GFX_DMG |
2873 			      AMD_PG_SUPPORT_CP |
2874 			      AMD_PG_SUPPORT_GDS |
2875 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2876 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2877 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2878 		gfx_v9_0_init_gfx_power_gating(adev);
2879 	}
2880 }
2881 
2882 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2883 {
2884 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2885 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2886 	gfx_v9_0_wait_for_rlc_serdes(adev);
2887 }
2888 
2889 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2890 {
2891 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2892 	udelay(50);
2893 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2894 	udelay(50);
2895 }
2896 
2897 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2898 {
2899 #ifdef AMDGPU_RLC_DEBUG_RETRY
2900 	u32 rlc_ucode_ver;
2901 #endif
2902 
2903 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2904 	udelay(50);
2905 
2906 	/* carrizo do enable cp interrupt after cp inited */
2907 	if (!(adev->flags & AMD_IS_APU)) {
2908 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2909 		udelay(50);
2910 	}
2911 
2912 #ifdef AMDGPU_RLC_DEBUG_RETRY
2913 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2914 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2915 	if(rlc_ucode_ver == 0x108) {
2916 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2917 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2918 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2919 		 * default is 0x9C4 to create a 100us interval */
2920 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2921 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2922 		 * to disable the page fault retry interrupts, default is
2923 		 * 0x100 (256) */
2924 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2925 	}
2926 #endif
2927 }
2928 
2929 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2930 {
2931 	const struct rlc_firmware_header_v2_0 *hdr;
2932 	const __le32 *fw_data;
2933 	unsigned i, fw_size;
2934 
2935 	if (!adev->gfx.rlc_fw)
2936 		return -EINVAL;
2937 
2938 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2939 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2940 
2941 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2942 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2943 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2944 
2945 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2946 			RLCG_UCODE_LOADING_START_ADDRESS);
2947 	for (i = 0; i < fw_size; i++)
2948 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2949 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2950 
2951 	return 0;
2952 }
2953 
2954 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2955 {
2956 	int r;
2957 
2958 	if (amdgpu_sriov_vf(adev)) {
2959 		gfx_v9_0_init_csb(adev);
2960 		return 0;
2961 	}
2962 
2963 	adev->gfx.rlc.funcs->stop(adev);
2964 
2965 	/* disable CG */
2966 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2967 
2968 	gfx_v9_0_init_pg(adev);
2969 
2970 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2971 		/* legacy rlc firmware loading */
2972 		r = gfx_v9_0_rlc_load_microcode(adev);
2973 		if (r)
2974 			return r;
2975 	}
2976 
2977 	switch (adev->asic_type) {
2978 	case CHIP_RAVEN:
2979 		if (amdgpu_lbpw == 0)
2980 			gfx_v9_0_enable_lbpw(adev, false);
2981 		else
2982 			gfx_v9_0_enable_lbpw(adev, true);
2983 		break;
2984 	case CHIP_VEGA20:
2985 		if (amdgpu_lbpw > 0)
2986 			gfx_v9_0_enable_lbpw(adev, true);
2987 		else
2988 			gfx_v9_0_enable_lbpw(adev, false);
2989 		break;
2990 	default:
2991 		break;
2992 	}
2993 
2994 	adev->gfx.rlc.funcs->start(adev);
2995 
2996 	return 0;
2997 }
2998 
2999 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3000 {
3001 	int i;
3002 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3003 
3004 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3005 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3006 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3007 	if (!enable) {
3008 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3009 			adev->gfx.gfx_ring[i].sched.ready = false;
3010 	}
3011 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3012 	udelay(50);
3013 }
3014 
3015 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3016 {
3017 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3018 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3019 	const struct gfx_firmware_header_v1_0 *me_hdr;
3020 	const __le32 *fw_data;
3021 	unsigned i, fw_size;
3022 
3023 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3024 		return -EINVAL;
3025 
3026 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3027 		adev->gfx.pfp_fw->data;
3028 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3029 		adev->gfx.ce_fw->data;
3030 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3031 		adev->gfx.me_fw->data;
3032 
3033 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3034 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3035 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3036 
3037 	gfx_v9_0_cp_gfx_enable(adev, false);
3038 
3039 	/* PFP */
3040 	fw_data = (const __le32 *)
3041 		(adev->gfx.pfp_fw->data +
3042 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3043 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3044 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3045 	for (i = 0; i < fw_size; i++)
3046 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3047 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3048 
3049 	/* CE */
3050 	fw_data = (const __le32 *)
3051 		(adev->gfx.ce_fw->data +
3052 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3053 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3054 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3055 	for (i = 0; i < fw_size; i++)
3056 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3057 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3058 
3059 	/* ME */
3060 	fw_data = (const __le32 *)
3061 		(adev->gfx.me_fw->data +
3062 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3063 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3064 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3065 	for (i = 0; i < fw_size; i++)
3066 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3067 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3068 
3069 	return 0;
3070 }
3071 
3072 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3073 {
3074 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3075 	const struct cs_section_def *sect = NULL;
3076 	const struct cs_extent_def *ext = NULL;
3077 	int r, i, tmp;
3078 
3079 	/* init the CP */
3080 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3081 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3082 
3083 	gfx_v9_0_cp_gfx_enable(adev, true);
3084 
3085 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3086 	if (r) {
3087 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3088 		return r;
3089 	}
3090 
3091 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3092 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3093 
3094 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3095 	amdgpu_ring_write(ring, 0x80000000);
3096 	amdgpu_ring_write(ring, 0x80000000);
3097 
3098 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3099 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3100 			if (sect->id == SECT_CONTEXT) {
3101 				amdgpu_ring_write(ring,
3102 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3103 					       ext->reg_count));
3104 				amdgpu_ring_write(ring,
3105 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3106 				for (i = 0; i < ext->reg_count; i++)
3107 					amdgpu_ring_write(ring, ext->extent[i]);
3108 			}
3109 		}
3110 	}
3111 
3112 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3113 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3114 
3115 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3116 	amdgpu_ring_write(ring, 0);
3117 
3118 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3119 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3120 	amdgpu_ring_write(ring, 0x8000);
3121 	amdgpu_ring_write(ring, 0x8000);
3122 
3123 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3124 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3125 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3126 	amdgpu_ring_write(ring, tmp);
3127 	amdgpu_ring_write(ring, 0);
3128 
3129 	amdgpu_ring_commit(ring);
3130 
3131 	return 0;
3132 }
3133 
3134 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3135 {
3136 	struct amdgpu_ring *ring;
3137 	u32 tmp;
3138 	u32 rb_bufsz;
3139 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3140 
3141 	/* Set the write pointer delay */
3142 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3143 
3144 	/* set the RB to use vmid 0 */
3145 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3146 
3147 	/* Set ring buffer size */
3148 	ring = &adev->gfx.gfx_ring[0];
3149 	rb_bufsz = order_base_2(ring->ring_size / 8);
3150 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3151 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3152 #ifdef __BIG_ENDIAN
3153 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3154 #endif
3155 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3156 
3157 	/* Initialize the ring buffer's write pointers */
3158 	ring->wptr = 0;
3159 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3160 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3161 
3162 	/* set the wb address wether it's enabled or not */
3163 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3164 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3165 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3166 
3167 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3168 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3169 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3170 
3171 	mdelay(1);
3172 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3173 
3174 	rb_addr = ring->gpu_addr >> 8;
3175 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3176 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3177 
3178 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3179 	if (ring->use_doorbell) {
3180 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3181 				    DOORBELL_OFFSET, ring->doorbell_index);
3182 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3183 				    DOORBELL_EN, 1);
3184 	} else {
3185 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3186 	}
3187 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3188 
3189 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3190 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3191 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3192 
3193 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3194 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3195 
3196 
3197 	/* start the ring */
3198 	gfx_v9_0_cp_gfx_start(adev);
3199 	ring->sched.ready = true;
3200 
3201 	return 0;
3202 }
3203 
3204 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3205 {
3206 	int i;
3207 
3208 	if (enable) {
3209 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3210 	} else {
3211 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3212 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3213 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3214 			adev->gfx.compute_ring[i].sched.ready = false;
3215 		adev->gfx.kiq.ring.sched.ready = false;
3216 	}
3217 	udelay(50);
3218 }
3219 
3220 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3221 {
3222 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3223 	const __le32 *fw_data;
3224 	unsigned i;
3225 	u32 tmp;
3226 
3227 	if (!adev->gfx.mec_fw)
3228 		return -EINVAL;
3229 
3230 	gfx_v9_0_cp_compute_enable(adev, false);
3231 
3232 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3233 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3234 
3235 	fw_data = (const __le32 *)
3236 		(adev->gfx.mec_fw->data +
3237 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3238 	tmp = 0;
3239 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3240 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3241 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3242 
3243 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3244 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3245 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3246 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3247 
3248 	/* MEC1 */
3249 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3250 			 mec_hdr->jt_offset);
3251 	for (i = 0; i < mec_hdr->jt_size; i++)
3252 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3253 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3254 
3255 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3256 			adev->gfx.mec_fw_version);
3257 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3258 
3259 	return 0;
3260 }
3261 
3262 /* KIQ functions */
3263 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3264 {
3265 	uint32_t tmp;
3266 	struct amdgpu_device *adev = ring->adev;
3267 
3268 	/* tell RLC which is KIQ queue */
3269 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3270 	tmp &= 0xffffff00;
3271 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3272 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3273 	tmp |= 0x80;
3274 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3275 }
3276 
3277 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3278 {
3279 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3280 	uint64_t queue_mask = 0;
3281 	int r, i;
3282 
3283 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3284 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3285 			continue;
3286 
3287 		/* This situation may be hit in the future if a new HW
3288 		 * generation exposes more than 64 queues. If so, the
3289 		 * definition of queue_mask needs updating */
3290 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3291 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3292 			break;
3293 		}
3294 
3295 		queue_mask |= (1ull << i);
3296 	}
3297 
3298 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3299 	if (r) {
3300 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3301 		return r;
3302 	}
3303 
3304 	/* set resources */
3305 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3306 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3307 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3308 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3309 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3310 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3311 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3312 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3313 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3314 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3315 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3316 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3317 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3318 
3319 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3320 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3321 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3322 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3323 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3324 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3325 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3326 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3327 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3328 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3329 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3330 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3331 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3332 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3333 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3334 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3335 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3336 	}
3337 
3338 	r = amdgpu_ring_test_helper(kiq_ring);
3339 	if (r)
3340 		DRM_ERROR("KCQ enable failed\n");
3341 
3342 	return r;
3343 }
3344 
3345 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3346 {
3347 	struct amdgpu_device *adev = ring->adev;
3348 	struct v9_mqd *mqd = ring->mqd_ptr;
3349 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3350 	uint32_t tmp;
3351 
3352 	mqd->header = 0xC0310800;
3353 	mqd->compute_pipelinestat_enable = 0x00000001;
3354 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3355 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3356 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3357 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3358 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3359 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3360 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3361 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3362 	mqd->compute_misc_reserved = 0x00000003;
3363 
3364 	mqd->dynamic_cu_mask_addr_lo =
3365 		lower_32_bits(ring->mqd_gpu_addr
3366 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3367 	mqd->dynamic_cu_mask_addr_hi =
3368 		upper_32_bits(ring->mqd_gpu_addr
3369 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3370 
3371 	eop_base_addr = ring->eop_gpu_addr >> 8;
3372 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3373 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3374 
3375 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3376 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3377 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3378 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3379 
3380 	mqd->cp_hqd_eop_control = tmp;
3381 
3382 	/* enable doorbell? */
3383 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3384 
3385 	if (ring->use_doorbell) {
3386 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3387 				    DOORBELL_OFFSET, ring->doorbell_index);
3388 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3389 				    DOORBELL_EN, 1);
3390 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3391 				    DOORBELL_SOURCE, 0);
3392 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3393 				    DOORBELL_HIT, 0);
3394 	} else {
3395 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3396 					 DOORBELL_EN, 0);
3397 	}
3398 
3399 	mqd->cp_hqd_pq_doorbell_control = tmp;
3400 
3401 	/* disable the queue if it's active */
3402 	ring->wptr = 0;
3403 	mqd->cp_hqd_dequeue_request = 0;
3404 	mqd->cp_hqd_pq_rptr = 0;
3405 	mqd->cp_hqd_pq_wptr_lo = 0;
3406 	mqd->cp_hqd_pq_wptr_hi = 0;
3407 
3408 	/* set the pointer to the MQD */
3409 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3410 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3411 
3412 	/* set MQD vmid to 0 */
3413 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3414 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3415 	mqd->cp_mqd_control = tmp;
3416 
3417 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3418 	hqd_gpu_addr = ring->gpu_addr >> 8;
3419 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3420 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3421 
3422 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3423 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3424 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3425 			    (order_base_2(ring->ring_size / 4) - 1));
3426 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3427 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3428 #ifdef __BIG_ENDIAN
3429 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3430 #endif
3431 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3432 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3433 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3434 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3435 	mqd->cp_hqd_pq_control = tmp;
3436 
3437 	/* set the wb address whether it's enabled or not */
3438 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3439 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3440 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3441 		upper_32_bits(wb_gpu_addr) & 0xffff;
3442 
3443 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3444 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3445 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3446 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3447 
3448 	tmp = 0;
3449 	/* enable the doorbell if requested */
3450 	if (ring->use_doorbell) {
3451 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3452 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453 				DOORBELL_OFFSET, ring->doorbell_index);
3454 
3455 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456 					 DOORBELL_EN, 1);
3457 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458 					 DOORBELL_SOURCE, 0);
3459 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3460 					 DOORBELL_HIT, 0);
3461 	}
3462 
3463 	mqd->cp_hqd_pq_doorbell_control = tmp;
3464 
3465 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3466 	ring->wptr = 0;
3467 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3468 
3469 	/* set the vmid for the queue */
3470 	mqd->cp_hqd_vmid = 0;
3471 
3472 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3473 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3474 	mqd->cp_hqd_persistent_state = tmp;
3475 
3476 	/* set MIN_IB_AVAIL_SIZE */
3477 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3478 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3479 	mqd->cp_hqd_ib_control = tmp;
3480 
3481 	/* activate the queue */
3482 	mqd->cp_hqd_active = 1;
3483 
3484 	return 0;
3485 }
3486 
3487 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3488 {
3489 	struct amdgpu_device *adev = ring->adev;
3490 	struct v9_mqd *mqd = ring->mqd_ptr;
3491 	int j;
3492 
3493 	/* disable wptr polling */
3494 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3495 
3496 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3497 	       mqd->cp_hqd_eop_base_addr_lo);
3498 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3499 	       mqd->cp_hqd_eop_base_addr_hi);
3500 
3501 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3502 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3503 	       mqd->cp_hqd_eop_control);
3504 
3505 	/* enable doorbell? */
3506 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3507 	       mqd->cp_hqd_pq_doorbell_control);
3508 
3509 	/* disable the queue if it's active */
3510 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3511 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3512 		for (j = 0; j < adev->usec_timeout; j++) {
3513 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3514 				break;
3515 			udelay(1);
3516 		}
3517 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3518 		       mqd->cp_hqd_dequeue_request);
3519 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3520 		       mqd->cp_hqd_pq_rptr);
3521 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3522 		       mqd->cp_hqd_pq_wptr_lo);
3523 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3524 		       mqd->cp_hqd_pq_wptr_hi);
3525 	}
3526 
3527 	/* set the pointer to the MQD */
3528 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3529 	       mqd->cp_mqd_base_addr_lo);
3530 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3531 	       mqd->cp_mqd_base_addr_hi);
3532 
3533 	/* set MQD vmid to 0 */
3534 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3535 	       mqd->cp_mqd_control);
3536 
3537 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3538 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3539 	       mqd->cp_hqd_pq_base_lo);
3540 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3541 	       mqd->cp_hqd_pq_base_hi);
3542 
3543 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3544 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3545 	       mqd->cp_hqd_pq_control);
3546 
3547 	/* set the wb address whether it's enabled or not */
3548 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3549 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3550 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3551 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3552 
3553 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3554 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3555 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3556 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3557 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3558 
3559 	/* enable the doorbell if requested */
3560 	if (ring->use_doorbell) {
3561 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3562 					(adev->doorbell_index.kiq * 2) << 2);
3563 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3564 					(adev->doorbell_index.userqueue_end * 2) << 2);
3565 	}
3566 
3567 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3568 	       mqd->cp_hqd_pq_doorbell_control);
3569 
3570 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3571 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572 	       mqd->cp_hqd_pq_wptr_lo);
3573 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574 	       mqd->cp_hqd_pq_wptr_hi);
3575 
3576 	/* set the vmid for the queue */
3577 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3578 
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3580 	       mqd->cp_hqd_persistent_state);
3581 
3582 	/* activate the queue */
3583 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3584 	       mqd->cp_hqd_active);
3585 
3586 	if (ring->use_doorbell)
3587 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3588 
3589 	return 0;
3590 }
3591 
3592 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3593 {
3594 	struct amdgpu_device *adev = ring->adev;
3595 	int j;
3596 
3597 	/* disable the queue if it's active */
3598 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3599 
3600 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3601 
3602 		for (j = 0; j < adev->usec_timeout; j++) {
3603 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3604 				break;
3605 			udelay(1);
3606 		}
3607 
3608 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3609 			DRM_DEBUG("KIQ dequeue request failed.\n");
3610 
3611 			/* Manual disable if dequeue request times out */
3612 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3613 		}
3614 
3615 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3616 		      0);
3617 	}
3618 
3619 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3620 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3621 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3622 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3623 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3624 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3625 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3626 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3627 
3628 	return 0;
3629 }
3630 
3631 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3632 {
3633 	struct amdgpu_device *adev = ring->adev;
3634 	struct v9_mqd *mqd = ring->mqd_ptr;
3635 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3636 
3637 	gfx_v9_0_kiq_setting(ring);
3638 
3639 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3640 		/* reset MQD to a clean status */
3641 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3642 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3643 
3644 		/* reset ring buffer */
3645 		ring->wptr = 0;
3646 		amdgpu_ring_clear_ring(ring);
3647 
3648 		mutex_lock(&adev->srbm_mutex);
3649 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3650 		gfx_v9_0_kiq_init_register(ring);
3651 		soc15_grbm_select(adev, 0, 0, 0, 0);
3652 		mutex_unlock(&adev->srbm_mutex);
3653 	} else {
3654 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3655 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3656 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3657 		mutex_lock(&adev->srbm_mutex);
3658 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3659 		gfx_v9_0_mqd_init(ring);
3660 		gfx_v9_0_kiq_init_register(ring);
3661 		soc15_grbm_select(adev, 0, 0, 0, 0);
3662 		mutex_unlock(&adev->srbm_mutex);
3663 
3664 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3665 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3666 	}
3667 
3668 	return 0;
3669 }
3670 
3671 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3672 {
3673 	struct amdgpu_device *adev = ring->adev;
3674 	struct v9_mqd *mqd = ring->mqd_ptr;
3675 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3676 
3677 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3678 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3679 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3680 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3681 		mutex_lock(&adev->srbm_mutex);
3682 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3683 		gfx_v9_0_mqd_init(ring);
3684 		soc15_grbm_select(adev, 0, 0, 0, 0);
3685 		mutex_unlock(&adev->srbm_mutex);
3686 
3687 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3688 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3689 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3690 		/* reset MQD to a clean status */
3691 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3692 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3693 
3694 		/* reset ring buffer */
3695 		ring->wptr = 0;
3696 		amdgpu_ring_clear_ring(ring);
3697 	} else {
3698 		amdgpu_ring_clear_ring(ring);
3699 	}
3700 
3701 	return 0;
3702 }
3703 
3704 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3705 {
3706 	struct amdgpu_ring *ring;
3707 	int r;
3708 
3709 	ring = &adev->gfx.kiq.ring;
3710 
3711 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3712 	if (unlikely(r != 0))
3713 		return r;
3714 
3715 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3716 	if (unlikely(r != 0))
3717 		return r;
3718 
3719 	gfx_v9_0_kiq_init_queue(ring);
3720 	amdgpu_bo_kunmap(ring->mqd_obj);
3721 	ring->mqd_ptr = NULL;
3722 	amdgpu_bo_unreserve(ring->mqd_obj);
3723 	ring->sched.ready = true;
3724 	return 0;
3725 }
3726 
3727 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3728 {
3729 	struct amdgpu_ring *ring = NULL;
3730 	int r = 0, i;
3731 
3732 	gfx_v9_0_cp_compute_enable(adev, true);
3733 
3734 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3735 		ring = &adev->gfx.compute_ring[i];
3736 
3737 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3738 		if (unlikely(r != 0))
3739 			goto done;
3740 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3741 		if (!r) {
3742 			r = gfx_v9_0_kcq_init_queue(ring);
3743 			amdgpu_bo_kunmap(ring->mqd_obj);
3744 			ring->mqd_ptr = NULL;
3745 		}
3746 		amdgpu_bo_unreserve(ring->mqd_obj);
3747 		if (r)
3748 			goto done;
3749 	}
3750 
3751 	r = gfx_v9_0_kiq_kcq_enable(adev);
3752 done:
3753 	return r;
3754 }
3755 
3756 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3757 {
3758 	int r, i;
3759 	struct amdgpu_ring *ring;
3760 
3761 	if (!(adev->flags & AMD_IS_APU))
3762 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3763 
3764 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3765 		if (adev->asic_type != CHIP_ARCTURUS) {
3766 			/* legacy firmware loading */
3767 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3768 			if (r)
3769 				return r;
3770 		}
3771 
3772 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3773 		if (r)
3774 			return r;
3775 	}
3776 
3777 	r = gfx_v9_0_kiq_resume(adev);
3778 	if (r)
3779 		return r;
3780 
3781 	if (adev->asic_type != CHIP_ARCTURUS) {
3782 		r = gfx_v9_0_cp_gfx_resume(adev);
3783 		if (r)
3784 			return r;
3785 	}
3786 
3787 	r = gfx_v9_0_kcq_resume(adev);
3788 	if (r)
3789 		return r;
3790 
3791 	if (adev->asic_type != CHIP_ARCTURUS) {
3792 		ring = &adev->gfx.gfx_ring[0];
3793 		r = amdgpu_ring_test_helper(ring);
3794 		if (r)
3795 			return r;
3796 	}
3797 
3798 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3799 		ring = &adev->gfx.compute_ring[i];
3800 		amdgpu_ring_test_helper(ring);
3801 	}
3802 
3803 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3804 
3805 	return 0;
3806 }
3807 
3808 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3809 {
3810 	if (adev->asic_type != CHIP_ARCTURUS)
3811 		gfx_v9_0_cp_gfx_enable(adev, enable);
3812 	gfx_v9_0_cp_compute_enable(adev, enable);
3813 }
3814 
3815 static int gfx_v9_0_hw_init(void *handle)
3816 {
3817 	int r;
3818 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3819 
3820 	if (!amdgpu_sriov_vf(adev))
3821 		gfx_v9_0_init_golden_registers(adev);
3822 
3823 	gfx_v9_0_constants_init(adev);
3824 
3825 	r = gfx_v9_0_csb_vram_pin(adev);
3826 	if (r)
3827 		return r;
3828 
3829 	r = adev->gfx.rlc.funcs->resume(adev);
3830 	if (r)
3831 		return r;
3832 
3833 	r = gfx_v9_0_cp_resume(adev);
3834 	if (r)
3835 		return r;
3836 
3837 	if (adev->asic_type != CHIP_ARCTURUS) {
3838 		r = gfx_v9_0_ngg_en(adev);
3839 		if (r)
3840 			return r;
3841 	}
3842 
3843 	return r;
3844 }
3845 
3846 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3847 {
3848 	int r, i;
3849 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3850 
3851 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3852 	if (r)
3853 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3854 
3855 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3856 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3857 
3858 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3859 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3860 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3861 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3862 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3863 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3864 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3865 		amdgpu_ring_write(kiq_ring, 0);
3866 		amdgpu_ring_write(kiq_ring, 0);
3867 		amdgpu_ring_write(kiq_ring, 0);
3868 	}
3869 	r = amdgpu_ring_test_helper(kiq_ring);
3870 	if (r)
3871 		DRM_ERROR("KCQ disable failed\n");
3872 
3873 	return r;
3874 }
3875 
3876 static int gfx_v9_0_hw_fini(void *handle)
3877 {
3878 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3879 
3880 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3881 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3882 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3883 
3884 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3885 	gfx_v9_0_kcq_disable(adev);
3886 
3887 	if (amdgpu_sriov_vf(adev)) {
3888 		gfx_v9_0_cp_gfx_enable(adev, false);
3889 		/* must disable polling for SRIOV when hw finished, otherwise
3890 		 * CPC engine may still keep fetching WB address which is already
3891 		 * invalid after sw finished and trigger DMAR reading error in
3892 		 * hypervisor side.
3893 		 */
3894 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3895 		return 0;
3896 	}
3897 
3898 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3899 	 * otherwise KIQ is hanging when binding back
3900 	 */
3901 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3902 		mutex_lock(&adev->srbm_mutex);
3903 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3904 				adev->gfx.kiq.ring.pipe,
3905 				adev->gfx.kiq.ring.queue, 0);
3906 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3907 		soc15_grbm_select(adev, 0, 0, 0, 0);
3908 		mutex_unlock(&adev->srbm_mutex);
3909 	}
3910 
3911 	gfx_v9_0_cp_enable(adev, false);
3912 	adev->gfx.rlc.funcs->stop(adev);
3913 
3914 	gfx_v9_0_csb_vram_unpin(adev);
3915 
3916 	return 0;
3917 }
3918 
3919 static int gfx_v9_0_suspend(void *handle)
3920 {
3921 	return gfx_v9_0_hw_fini(handle);
3922 }
3923 
3924 static int gfx_v9_0_resume(void *handle)
3925 {
3926 	return gfx_v9_0_hw_init(handle);
3927 }
3928 
3929 static bool gfx_v9_0_is_idle(void *handle)
3930 {
3931 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3932 
3933 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3934 				GRBM_STATUS, GUI_ACTIVE))
3935 		return false;
3936 	else
3937 		return true;
3938 }
3939 
3940 static int gfx_v9_0_wait_for_idle(void *handle)
3941 {
3942 	unsigned i;
3943 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3944 
3945 	for (i = 0; i < adev->usec_timeout; i++) {
3946 		if (gfx_v9_0_is_idle(handle))
3947 			return 0;
3948 		udelay(1);
3949 	}
3950 	return -ETIMEDOUT;
3951 }
3952 
3953 static int gfx_v9_0_soft_reset(void *handle)
3954 {
3955 	u32 grbm_soft_reset = 0;
3956 	u32 tmp;
3957 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3958 
3959 	/* GRBM_STATUS */
3960 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3961 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3962 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3963 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3964 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3965 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3966 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3967 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3968 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3969 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3970 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3971 	}
3972 
3973 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3974 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3975 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3976 	}
3977 
3978 	/* GRBM_STATUS2 */
3979 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3980 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3981 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3982 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3983 
3984 
3985 	if (grbm_soft_reset) {
3986 		/* stop the rlc */
3987 		adev->gfx.rlc.funcs->stop(adev);
3988 
3989 		if (adev->asic_type != CHIP_ARCTURUS)
3990 			/* Disable GFX parsing/prefetching */
3991 			gfx_v9_0_cp_gfx_enable(adev, false);
3992 
3993 		/* Disable MEC parsing/prefetching */
3994 		gfx_v9_0_cp_compute_enable(adev, false);
3995 
3996 		if (grbm_soft_reset) {
3997 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3998 			tmp |= grbm_soft_reset;
3999 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4000 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4001 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4002 
4003 			udelay(50);
4004 
4005 			tmp &= ~grbm_soft_reset;
4006 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4007 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4008 		}
4009 
4010 		/* Wait a little for things to settle down */
4011 		udelay(50);
4012 	}
4013 	return 0;
4014 }
4015 
4016 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4017 {
4018 	uint64_t clock;
4019 
4020 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4021 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4022 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4023 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4024 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4025 	return clock;
4026 }
4027 
4028 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4029 					  uint32_t vmid,
4030 					  uint32_t gds_base, uint32_t gds_size,
4031 					  uint32_t gws_base, uint32_t gws_size,
4032 					  uint32_t oa_base, uint32_t oa_size)
4033 {
4034 	struct amdgpu_device *adev = ring->adev;
4035 
4036 	/* GDS Base */
4037 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4038 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4039 				   gds_base);
4040 
4041 	/* GDS Size */
4042 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4043 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4044 				   gds_size);
4045 
4046 	/* GWS */
4047 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4048 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4049 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4050 
4051 	/* OA */
4052 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4053 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4054 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4055 }
4056 
4057 static const u32 vgpr_init_compute_shader[] =
4058 {
4059 	0xb07c0000, 0xbe8000ff,
4060 	0x000000f8, 0xbf110800,
4061 	0x7e000280, 0x7e020280,
4062 	0x7e040280, 0x7e060280,
4063 	0x7e080280, 0x7e0a0280,
4064 	0x7e0c0280, 0x7e0e0280,
4065 	0x80808800, 0xbe803200,
4066 	0xbf84fff5, 0xbf9c0000,
4067 	0xd28c0001, 0x0001007f,
4068 	0xd28d0001, 0x0002027e,
4069 	0x10020288, 0xb8810904,
4070 	0xb7814000, 0xd1196a01,
4071 	0x00000301, 0xbe800087,
4072 	0xbefc00c1, 0xd89c4000,
4073 	0x00020201, 0xd89cc080,
4074 	0x00040401, 0x320202ff,
4075 	0x00000800, 0x80808100,
4076 	0xbf84fff8, 0x7e020280,
4077 	0xbf810000, 0x00000000,
4078 };
4079 
4080 static const u32 sgpr_init_compute_shader[] =
4081 {
4082 	0xb07c0000, 0xbe8000ff,
4083 	0x0000005f, 0xbee50080,
4084 	0xbe812c65, 0xbe822c65,
4085 	0xbe832c65, 0xbe842c65,
4086 	0xbe852c65, 0xb77c0005,
4087 	0x80808500, 0xbf84fff8,
4088 	0xbe800080, 0xbf810000,
4089 };
4090 
4091 static const struct soc15_reg_entry vgpr_init_regs[] = {
4092    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4093    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4094    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4095    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4096    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4097    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4098    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4099    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4100    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4101    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4102 };
4103 
4104 static const struct soc15_reg_entry sgpr_init_regs[] = {
4105    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4106    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4107    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4108    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4109    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4110    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4111    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4112    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4113    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4114    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4115 };
4116 
4117 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4118    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4119    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4120    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4121    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4122    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4123    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4124    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4125    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4126    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4127    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4128    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4129    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4130    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4131    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4132    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4133    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4134    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4135    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4136    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4137    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4138    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4139    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4140    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4141    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4142    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4143    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4144    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4145    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4146    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4147    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4148    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4149    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4150 };
4151 
4152 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4153 {
4154 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4155 	int i, r;
4156 
4157 	r = amdgpu_ring_alloc(ring, 7);
4158 	if (r) {
4159 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4160 			ring->name, r);
4161 		return r;
4162 	}
4163 
4164 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4165 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4166 
4167 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4168 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4169 				PACKET3_DMA_DATA_DST_SEL(1) |
4170 				PACKET3_DMA_DATA_SRC_SEL(2) |
4171 				PACKET3_DMA_DATA_ENGINE(0)));
4172 	amdgpu_ring_write(ring, 0);
4173 	amdgpu_ring_write(ring, 0);
4174 	amdgpu_ring_write(ring, 0);
4175 	amdgpu_ring_write(ring, 0);
4176 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4177 				adev->gds.gds_size);
4178 
4179 	amdgpu_ring_commit(ring);
4180 
4181 	for (i = 0; i < adev->usec_timeout; i++) {
4182 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4183 			break;
4184 		udelay(1);
4185 	}
4186 
4187 	if (i >= adev->usec_timeout)
4188 		r = -ETIMEDOUT;
4189 
4190 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4191 
4192 	return r;
4193 }
4194 
4195 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4196 {
4197 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4198 	struct amdgpu_ib ib;
4199 	struct dma_fence *f = NULL;
4200 	int r, i, j, k;
4201 	unsigned total_size, vgpr_offset, sgpr_offset;
4202 	u64 gpu_addr;
4203 
4204 	/* only support when RAS is enabled */
4205 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4206 		return 0;
4207 
4208 	/* bail if the compute ring is not ready */
4209 	if (!ring->sched.ready)
4210 		return 0;
4211 
4212 	total_size =
4213 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4214 	total_size +=
4215 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4216 	total_size = ALIGN(total_size, 256);
4217 	vgpr_offset = total_size;
4218 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4219 	sgpr_offset = total_size;
4220 	total_size += sizeof(sgpr_init_compute_shader);
4221 
4222 	/* allocate an indirect buffer to put the commands in */
4223 	memset(&ib, 0, sizeof(ib));
4224 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4225 	if (r) {
4226 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4227 		return r;
4228 	}
4229 
4230 	/* load the compute shaders */
4231 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4232 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4233 
4234 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4235 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4236 
4237 	/* init the ib length to 0 */
4238 	ib.length_dw = 0;
4239 
4240 	/* VGPR */
4241 	/* write the register state for the compute dispatch */
4242 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4243 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4244 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4245 								- PACKET3_SET_SH_REG_START;
4246 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4247 	}
4248 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4249 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4250 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4251 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4252 							- PACKET3_SET_SH_REG_START;
4253 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4254 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4255 
4256 	/* write dispatch packet */
4257 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4258 	ib.ptr[ib.length_dw++] = 128; /* x */
4259 	ib.ptr[ib.length_dw++] = 1; /* y */
4260 	ib.ptr[ib.length_dw++] = 1; /* z */
4261 	ib.ptr[ib.length_dw++] =
4262 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4263 
4264 	/* write CS partial flush packet */
4265 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4266 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4267 
4268 	/* SGPR */
4269 	/* write the register state for the compute dispatch */
4270 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4271 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4272 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4273 								- PACKET3_SET_SH_REG_START;
4274 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4275 	}
4276 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4277 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4278 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4279 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4280 							- PACKET3_SET_SH_REG_START;
4281 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4282 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4283 
4284 	/* write dispatch packet */
4285 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4286 	ib.ptr[ib.length_dw++] = 128; /* x */
4287 	ib.ptr[ib.length_dw++] = 1; /* y */
4288 	ib.ptr[ib.length_dw++] = 1; /* z */
4289 	ib.ptr[ib.length_dw++] =
4290 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4291 
4292 	/* write CS partial flush packet */
4293 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4294 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4295 
4296 	/* shedule the ib on the ring */
4297 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4298 	if (r) {
4299 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4300 		goto fail;
4301 	}
4302 
4303 	/* wait for the GPU to finish processing the IB */
4304 	r = dma_fence_wait(f, false);
4305 	if (r) {
4306 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4307 		goto fail;
4308 	}
4309 
4310 	/* read back registers to clear the counters */
4311 	mutex_lock(&adev->grbm_idx_mutex);
4312 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4313 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4314 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4315 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4316 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4317 			}
4318 		}
4319 	}
4320 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4321 	mutex_unlock(&adev->grbm_idx_mutex);
4322 
4323 fail:
4324 	amdgpu_ib_free(adev, &ib, NULL);
4325 	dma_fence_put(f);
4326 
4327 	return r;
4328 }
4329 
4330 static int gfx_v9_0_early_init(void *handle)
4331 {
4332 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4333 
4334 	if (adev->asic_type == CHIP_ARCTURUS)
4335 		adev->gfx.num_gfx_rings = 0;
4336 	else
4337 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4338 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4339 	gfx_v9_0_set_ring_funcs(adev);
4340 	gfx_v9_0_set_irq_funcs(adev);
4341 	gfx_v9_0_set_gds_init(adev);
4342 	gfx_v9_0_set_rlc_funcs(adev);
4343 
4344 	return 0;
4345 }
4346 
4347 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4348 		struct ras_err_data *err_data,
4349 		struct amdgpu_iv_entry *entry);
4350 
4351 static int gfx_v9_0_ecc_late_init(void *handle)
4352 {
4353 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4354 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4355 	struct ras_ih_if ih_info = {
4356 		.cb = gfx_v9_0_process_ras_data_cb,
4357 	};
4358 	struct ras_fs_if fs_info = {
4359 		.sysfs_name = "gfx_err_count",
4360 		.debugfs_name = "gfx_err_inject",
4361 	};
4362 	struct ras_common_if ras_block = {
4363 		.block = AMDGPU_RAS_BLOCK__GFX,
4364 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4365 		.sub_block_index = 0,
4366 		.name = "gfx",
4367 	};
4368 	int r;
4369 
4370 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4371 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4372 		return 0;
4373 	}
4374 
4375 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4376 	if (r)
4377 		return r;
4378 
4379 	/* requires IBs so do in late init after IB pool is initialized */
4380 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4381 	if (r)
4382 		return r;
4383 
4384 	/* handle resume path. */
4385 	if (*ras_if) {
4386 		/* resend ras TA enable cmd during resume.
4387 		 * prepare to handle failure.
4388 		 */
4389 		ih_info.head = **ras_if;
4390 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4391 		if (r) {
4392 			if (r == -EAGAIN) {
4393 				/* request a gpu reset. will run again. */
4394 				amdgpu_ras_request_reset_on_boot(adev,
4395 						AMDGPU_RAS_BLOCK__GFX);
4396 				return 0;
4397 			}
4398 			/* fail to enable ras, cleanup all. */
4399 			goto irq;
4400 		}
4401 		/* enable successfully. continue. */
4402 		goto resume;
4403 	}
4404 
4405 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4406 	if (!*ras_if)
4407 		return -ENOMEM;
4408 
4409 	**ras_if = ras_block;
4410 
4411 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4412 	if (r) {
4413 		if (r == -EAGAIN) {
4414 			amdgpu_ras_request_reset_on_boot(adev,
4415 					AMDGPU_RAS_BLOCK__GFX);
4416 			r = 0;
4417 		}
4418 		goto feature;
4419 	}
4420 
4421 	ih_info.head = **ras_if;
4422 	fs_info.head = **ras_if;
4423 
4424 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4425 	if (r)
4426 		goto interrupt;
4427 
4428 	amdgpu_ras_debugfs_create(adev, &fs_info);
4429 
4430 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4431 	if (r)
4432 		goto sysfs;
4433 resume:
4434 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4435 	if (r)
4436 		goto irq;
4437 
4438 	return 0;
4439 irq:
4440 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4441 sysfs:
4442 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4443 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4444 interrupt:
4445 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4446 feature:
4447 	kfree(*ras_if);
4448 	*ras_if = NULL;
4449 	return r;
4450 }
4451 
4452 static int gfx_v9_0_late_init(void *handle)
4453 {
4454 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4455 	int r;
4456 
4457 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4458 	if (r)
4459 		return r;
4460 
4461 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4462 	if (r)
4463 		return r;
4464 
4465 	r = gfx_v9_0_ecc_late_init(handle);
4466 	if (r)
4467 		return r;
4468 
4469 	return 0;
4470 }
4471 
4472 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4473 {
4474 	uint32_t rlc_setting;
4475 
4476 	/* if RLC is not enabled, do nothing */
4477 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4478 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4479 		return false;
4480 
4481 	return true;
4482 }
4483 
4484 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4485 {
4486 	uint32_t data;
4487 	unsigned i;
4488 
4489 	data = RLC_SAFE_MODE__CMD_MASK;
4490 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4491 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4492 
4493 	/* wait for RLC_SAFE_MODE */
4494 	for (i = 0; i < adev->usec_timeout; i++) {
4495 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4496 			break;
4497 		udelay(1);
4498 	}
4499 }
4500 
4501 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4502 {
4503 	uint32_t data;
4504 
4505 	data = RLC_SAFE_MODE__CMD_MASK;
4506 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4507 }
4508 
4509 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4510 						bool enable)
4511 {
4512 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4513 
4514 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4515 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4516 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4517 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4518 	} else {
4519 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4520 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4521 	}
4522 
4523 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4524 }
4525 
4526 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4527 						bool enable)
4528 {
4529 	/* TODO: double check if we need to perform under safe mode */
4530 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4531 
4532 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4533 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4534 	else
4535 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4536 
4537 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4538 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4539 	else
4540 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4541 
4542 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4543 }
4544 
4545 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4546 						      bool enable)
4547 {
4548 	uint32_t data, def;
4549 
4550 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4551 
4552 	/* It is disabled by HW by default */
4553 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4554 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4555 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4556 
4557 		if (adev->asic_type != CHIP_VEGA12)
4558 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4559 
4560 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4561 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4562 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4563 
4564 		/* only for Vega10 & Raven1 */
4565 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4566 
4567 		if (def != data)
4568 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4569 
4570 		/* MGLS is a global flag to control all MGLS in GFX */
4571 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4572 			/* 2 - RLC memory Light sleep */
4573 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4574 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4575 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4576 				if (def != data)
4577 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4578 			}
4579 			/* 3 - CP memory Light sleep */
4580 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4581 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4582 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4583 				if (def != data)
4584 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4585 			}
4586 		}
4587 	} else {
4588 		/* 1 - MGCG_OVERRIDE */
4589 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4590 
4591 		if (adev->asic_type != CHIP_VEGA12)
4592 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4593 
4594 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4595 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4596 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4597 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4598 
4599 		if (def != data)
4600 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4601 
4602 		/* 2 - disable MGLS in RLC */
4603 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4604 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4605 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4606 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4607 		}
4608 
4609 		/* 3 - disable MGLS in CP */
4610 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4611 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4612 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4613 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4614 		}
4615 	}
4616 
4617 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4618 }
4619 
4620 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4621 					   bool enable)
4622 {
4623 	uint32_t data, def;
4624 
4625 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4626 
4627 	/* Enable 3D CGCG/CGLS */
4628 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4629 		/* write cmd to clear cgcg/cgls ov */
4630 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4631 		/* unset CGCG override */
4632 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4633 		/* update CGCG and CGLS override bits */
4634 		if (def != data)
4635 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4636 
4637 		/* enable 3Dcgcg FSM(0x0000363f) */
4638 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4639 
4640 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4641 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4642 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4643 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4644 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4645 		if (def != data)
4646 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4647 
4648 		/* set IDLE_POLL_COUNT(0x00900100) */
4649 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4650 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4651 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4652 		if (def != data)
4653 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4654 	} else {
4655 		/* Disable CGCG/CGLS */
4656 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4657 		/* disable cgcg, cgls should be disabled */
4658 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4659 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4660 		/* disable cgcg and cgls in FSM */
4661 		if (def != data)
4662 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4663 	}
4664 
4665 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4666 }
4667 
4668 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4669 						      bool enable)
4670 {
4671 	uint32_t def, data;
4672 
4673 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4674 
4675 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4676 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4677 		/* unset CGCG override */
4678 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4679 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4680 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4681 		else
4682 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4683 		/* update CGCG and CGLS override bits */
4684 		if (def != data)
4685 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4686 
4687 		/* enable cgcg FSM(0x0000363F) */
4688 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4689 
4690 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4691 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4692 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4693 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4694 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4695 		if (def != data)
4696 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4697 
4698 		/* set IDLE_POLL_COUNT(0x00900100) */
4699 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4700 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4701 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4702 		if (def != data)
4703 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4704 	} else {
4705 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4706 		/* reset CGCG/CGLS bits */
4707 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4708 		/* disable cgcg and cgls in FSM */
4709 		if (def != data)
4710 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4711 	}
4712 
4713 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4714 }
4715 
4716 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4717 					    bool enable)
4718 {
4719 	if (enable) {
4720 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4721 		 * ===  MGCG + MGLS ===
4722 		 */
4723 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4724 		/* ===  CGCG /CGLS for GFX 3D Only === */
4725 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4726 		/* ===  CGCG + CGLS === */
4727 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4728 	} else {
4729 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4730 		 * ===  CGCG + CGLS ===
4731 		 */
4732 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4733 		/* ===  CGCG /CGLS for GFX 3D Only === */
4734 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4735 		/* ===  MGCG + MGLS === */
4736 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4737 	}
4738 	return 0;
4739 }
4740 
4741 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4742 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4743 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4744 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4745 	.init = gfx_v9_0_rlc_init,
4746 	.get_csb_size = gfx_v9_0_get_csb_size,
4747 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4748 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4749 	.resume = gfx_v9_0_rlc_resume,
4750 	.stop = gfx_v9_0_rlc_stop,
4751 	.reset = gfx_v9_0_rlc_reset,
4752 	.start = gfx_v9_0_rlc_start
4753 };
4754 
4755 static int gfx_v9_0_set_powergating_state(void *handle,
4756 					  enum amd_powergating_state state)
4757 {
4758 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4759 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4760 
4761 	switch (adev->asic_type) {
4762 	case CHIP_RAVEN:
4763 		if (!enable) {
4764 			amdgpu_gfx_off_ctrl(adev, false);
4765 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4766 		}
4767 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4768 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4769 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4770 		} else {
4771 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4772 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4773 		}
4774 
4775 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4776 			gfx_v9_0_enable_cp_power_gating(adev, true);
4777 		else
4778 			gfx_v9_0_enable_cp_power_gating(adev, false);
4779 
4780 		/* update gfx cgpg state */
4781 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4782 
4783 		/* update mgcg state */
4784 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4785 
4786 		if (enable)
4787 			amdgpu_gfx_off_ctrl(adev, true);
4788 		break;
4789 	case CHIP_VEGA12:
4790 		if (!enable) {
4791 			amdgpu_gfx_off_ctrl(adev, false);
4792 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4793 		} else {
4794 			amdgpu_gfx_off_ctrl(adev, true);
4795 		}
4796 		break;
4797 	default:
4798 		break;
4799 	}
4800 
4801 	return 0;
4802 }
4803 
4804 static int gfx_v9_0_set_clockgating_state(void *handle,
4805 					  enum amd_clockgating_state state)
4806 {
4807 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4808 
4809 	if (amdgpu_sriov_vf(adev))
4810 		return 0;
4811 
4812 	switch (adev->asic_type) {
4813 	case CHIP_VEGA10:
4814 	case CHIP_VEGA12:
4815 	case CHIP_VEGA20:
4816 	case CHIP_RAVEN:
4817 		gfx_v9_0_update_gfx_clock_gating(adev,
4818 						 state == AMD_CG_STATE_GATE ? true : false);
4819 		break;
4820 	default:
4821 		break;
4822 	}
4823 	return 0;
4824 }
4825 
4826 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4827 {
4828 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4829 	int data;
4830 
4831 	if (amdgpu_sriov_vf(adev))
4832 		*flags = 0;
4833 
4834 	/* AMD_CG_SUPPORT_GFX_MGCG */
4835 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4836 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4837 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4838 
4839 	/* AMD_CG_SUPPORT_GFX_CGCG */
4840 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4841 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4842 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4843 
4844 	/* AMD_CG_SUPPORT_GFX_CGLS */
4845 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4846 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4847 
4848 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4849 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4850 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4851 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4852 
4853 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4854 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4855 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4856 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4857 
4858 	if (adev->asic_type != CHIP_ARCTURUS) {
4859 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4860 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4861 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4862 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4863 
4864 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4865 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4866 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4867 	}
4868 }
4869 
4870 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4871 {
4872 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4873 }
4874 
4875 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4876 {
4877 	struct amdgpu_device *adev = ring->adev;
4878 	u64 wptr;
4879 
4880 	/* XXX check if swapping is necessary on BE */
4881 	if (ring->use_doorbell) {
4882 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4883 	} else {
4884 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4885 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4886 	}
4887 
4888 	return wptr;
4889 }
4890 
4891 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4892 {
4893 	struct amdgpu_device *adev = ring->adev;
4894 
4895 	if (ring->use_doorbell) {
4896 		/* XXX check if swapping is necessary on BE */
4897 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4898 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4899 	} else {
4900 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4901 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4902 	}
4903 }
4904 
4905 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4906 {
4907 	struct amdgpu_device *adev = ring->adev;
4908 	u32 ref_and_mask, reg_mem_engine;
4909 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4910 
4911 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4912 		switch (ring->me) {
4913 		case 1:
4914 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4915 			break;
4916 		case 2:
4917 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4918 			break;
4919 		default:
4920 			return;
4921 		}
4922 		reg_mem_engine = 0;
4923 	} else {
4924 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4925 		reg_mem_engine = 1; /* pfp */
4926 	}
4927 
4928 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4929 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4930 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4931 			      ref_and_mask, ref_and_mask, 0x20);
4932 }
4933 
4934 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4935 					struct amdgpu_job *job,
4936 					struct amdgpu_ib *ib,
4937 					uint32_t flags)
4938 {
4939 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4940 	u32 header, control = 0;
4941 
4942 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4943 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4944 	else
4945 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4946 
4947 	control |= ib->length_dw | (vmid << 24);
4948 
4949 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4950 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4951 
4952 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4953 			gfx_v9_0_ring_emit_de_meta(ring);
4954 	}
4955 
4956 	amdgpu_ring_write(ring, header);
4957 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4958 	amdgpu_ring_write(ring,
4959 #ifdef __BIG_ENDIAN
4960 		(2 << 0) |
4961 #endif
4962 		lower_32_bits(ib->gpu_addr));
4963 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4964 	amdgpu_ring_write(ring, control);
4965 }
4966 
4967 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4968 					  struct amdgpu_job *job,
4969 					  struct amdgpu_ib *ib,
4970 					  uint32_t flags)
4971 {
4972 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4973 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4974 
4975 	/* Currently, there is a high possibility to get wave ID mismatch
4976 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4977 	 * different wave IDs than the GDS expects. This situation happens
4978 	 * randomly when at least 5 compute pipes use GDS ordered append.
4979 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4980 	 * Those are probably bugs somewhere else in the kernel driver.
4981 	 *
4982 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4983 	 * GDS to 0 for this ring (me/pipe).
4984 	 */
4985 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4986 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4987 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4988 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4989 	}
4990 
4991 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4992 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4993 	amdgpu_ring_write(ring,
4994 #ifdef __BIG_ENDIAN
4995 				(2 << 0) |
4996 #endif
4997 				lower_32_bits(ib->gpu_addr));
4998 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4999 	amdgpu_ring_write(ring, control);
5000 }
5001 
5002 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5003 				     u64 seq, unsigned flags)
5004 {
5005 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5006 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5007 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5008 
5009 	/* RELEASE_MEM - flush caches, send int */
5010 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5011 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5012 					       EOP_TC_NC_ACTION_EN) :
5013 					      (EOP_TCL1_ACTION_EN |
5014 					       EOP_TC_ACTION_EN |
5015 					       EOP_TC_WB_ACTION_EN |
5016 					       EOP_TC_MD_ACTION_EN)) |
5017 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5018 				 EVENT_INDEX(5)));
5019 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5020 
5021 	/*
5022 	 * the address should be Qword aligned if 64bit write, Dword
5023 	 * aligned if only send 32bit data low (discard data high)
5024 	 */
5025 	if (write64bit)
5026 		BUG_ON(addr & 0x7);
5027 	else
5028 		BUG_ON(addr & 0x3);
5029 	amdgpu_ring_write(ring, lower_32_bits(addr));
5030 	amdgpu_ring_write(ring, upper_32_bits(addr));
5031 	amdgpu_ring_write(ring, lower_32_bits(seq));
5032 	amdgpu_ring_write(ring, upper_32_bits(seq));
5033 	amdgpu_ring_write(ring, 0);
5034 }
5035 
5036 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5037 {
5038 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5039 	uint32_t seq = ring->fence_drv.sync_seq;
5040 	uint64_t addr = ring->fence_drv.gpu_addr;
5041 
5042 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5043 			      lower_32_bits(addr), upper_32_bits(addr),
5044 			      seq, 0xffffffff, 4);
5045 }
5046 
5047 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5048 					unsigned vmid, uint64_t pd_addr)
5049 {
5050 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5051 
5052 	/* compute doesn't have PFP */
5053 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5054 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5055 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5056 		amdgpu_ring_write(ring, 0x0);
5057 	}
5058 }
5059 
5060 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5061 {
5062 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5063 }
5064 
5065 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5066 {
5067 	u64 wptr;
5068 
5069 	/* XXX check if swapping is necessary on BE */
5070 	if (ring->use_doorbell)
5071 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5072 	else
5073 		BUG();
5074 	return wptr;
5075 }
5076 
5077 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5078 					   bool acquire)
5079 {
5080 	struct amdgpu_device *adev = ring->adev;
5081 	int pipe_num, tmp, reg;
5082 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5083 
5084 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5085 
5086 	/* first me only has 2 entries, GFX and HP3D */
5087 	if (ring->me > 0)
5088 		pipe_num -= 2;
5089 
5090 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5091 	tmp = RREG32(reg);
5092 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5093 	WREG32(reg, tmp);
5094 }
5095 
5096 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5097 					    struct amdgpu_ring *ring,
5098 					    bool acquire)
5099 {
5100 	int i, pipe;
5101 	bool reserve;
5102 	struct amdgpu_ring *iring;
5103 
5104 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5105 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5106 	if (acquire)
5107 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5108 	else
5109 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5110 
5111 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5112 		/* Clear all reservations - everyone reacquires all resources */
5113 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5114 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5115 						       true);
5116 
5117 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5118 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5119 						       true);
5120 	} else {
5121 		/* Lower all pipes without a current reservation */
5122 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5123 			iring = &adev->gfx.gfx_ring[i];
5124 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5125 							   iring->me,
5126 							   iring->pipe,
5127 							   0);
5128 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5129 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5130 		}
5131 
5132 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5133 			iring = &adev->gfx.compute_ring[i];
5134 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5135 							   iring->me,
5136 							   iring->pipe,
5137 							   0);
5138 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5139 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5140 		}
5141 	}
5142 
5143 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5144 }
5145 
5146 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5147 				      struct amdgpu_ring *ring,
5148 				      bool acquire)
5149 {
5150 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5151 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5152 
5153 	mutex_lock(&adev->srbm_mutex);
5154 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5155 
5156 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5157 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5158 
5159 	soc15_grbm_select(adev, 0, 0, 0, 0);
5160 	mutex_unlock(&adev->srbm_mutex);
5161 }
5162 
5163 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5164 					       enum drm_sched_priority priority)
5165 {
5166 	struct amdgpu_device *adev = ring->adev;
5167 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5168 
5169 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5170 		return;
5171 
5172 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5173 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5174 }
5175 
5176 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5177 {
5178 	struct amdgpu_device *adev = ring->adev;
5179 
5180 	/* XXX check if swapping is necessary on BE */
5181 	if (ring->use_doorbell) {
5182 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5183 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5184 	} else{
5185 		BUG(); /* only DOORBELL method supported on gfx9 now */
5186 	}
5187 }
5188 
5189 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5190 					 u64 seq, unsigned int flags)
5191 {
5192 	struct amdgpu_device *adev = ring->adev;
5193 
5194 	/* we only allocate 32bit for each seq wb address */
5195 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5196 
5197 	/* write fence seq to the "addr" */
5198 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5199 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5200 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5201 	amdgpu_ring_write(ring, lower_32_bits(addr));
5202 	amdgpu_ring_write(ring, upper_32_bits(addr));
5203 	amdgpu_ring_write(ring, lower_32_bits(seq));
5204 
5205 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5206 		/* set register to trigger INT */
5207 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5208 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5209 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5210 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5211 		amdgpu_ring_write(ring, 0);
5212 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5213 	}
5214 }
5215 
5216 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5217 {
5218 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5219 	amdgpu_ring_write(ring, 0);
5220 }
5221 
5222 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5223 {
5224 	struct v9_ce_ib_state ce_payload = {0};
5225 	uint64_t csa_addr;
5226 	int cnt;
5227 
5228 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5229 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5230 
5231 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5232 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5233 				 WRITE_DATA_DST_SEL(8) |
5234 				 WR_CONFIRM) |
5235 				 WRITE_DATA_CACHE_POLICY(0));
5236 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5237 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5238 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5239 }
5240 
5241 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5242 {
5243 	struct v9_de_ib_state de_payload = {0};
5244 	uint64_t csa_addr, gds_addr;
5245 	int cnt;
5246 
5247 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5248 	gds_addr = csa_addr + 4096;
5249 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5250 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5251 
5252 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5253 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5254 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5255 				 WRITE_DATA_DST_SEL(8) |
5256 				 WR_CONFIRM) |
5257 				 WRITE_DATA_CACHE_POLICY(0));
5258 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5259 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5260 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5261 }
5262 
5263 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5264 {
5265 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5266 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5267 }
5268 
5269 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5270 {
5271 	uint32_t dw2 = 0;
5272 
5273 	if (amdgpu_sriov_vf(ring->adev))
5274 		gfx_v9_0_ring_emit_ce_meta(ring);
5275 
5276 	gfx_v9_0_ring_emit_tmz(ring, true);
5277 
5278 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5279 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5280 		/* set load_global_config & load_global_uconfig */
5281 		dw2 |= 0x8001;
5282 		/* set load_cs_sh_regs */
5283 		dw2 |= 0x01000000;
5284 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5285 		dw2 |= 0x10002;
5286 
5287 		/* set load_ce_ram if preamble presented */
5288 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5289 			dw2 |= 0x10000000;
5290 	} else {
5291 		/* still load_ce_ram if this is the first time preamble presented
5292 		 * although there is no context switch happens.
5293 		 */
5294 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5295 			dw2 |= 0x10000000;
5296 	}
5297 
5298 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5299 	amdgpu_ring_write(ring, dw2);
5300 	amdgpu_ring_write(ring, 0);
5301 }
5302 
5303 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5304 {
5305 	unsigned ret;
5306 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5307 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5308 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5309 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5310 	ret = ring->wptr & ring->buf_mask;
5311 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5312 	return ret;
5313 }
5314 
5315 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5316 {
5317 	unsigned cur;
5318 	BUG_ON(offset > ring->buf_mask);
5319 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5320 
5321 	cur = (ring->wptr & ring->buf_mask) - 1;
5322 	if (likely(cur > offset))
5323 		ring->ring[offset] = cur - offset;
5324 	else
5325 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5326 }
5327 
5328 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5329 {
5330 	struct amdgpu_device *adev = ring->adev;
5331 
5332 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5333 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5334 				(5 << 8) |	/* dst: memory */
5335 				(1 << 20));	/* write confirm */
5336 	amdgpu_ring_write(ring, reg);
5337 	amdgpu_ring_write(ring, 0);
5338 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5339 				adev->virt.reg_val_offs * 4));
5340 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5341 				adev->virt.reg_val_offs * 4));
5342 }
5343 
5344 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5345 				    uint32_t val)
5346 {
5347 	uint32_t cmd = 0;
5348 
5349 	switch (ring->funcs->type) {
5350 	case AMDGPU_RING_TYPE_GFX:
5351 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5352 		break;
5353 	case AMDGPU_RING_TYPE_KIQ:
5354 		cmd = (1 << 16); /* no inc addr */
5355 		break;
5356 	default:
5357 		cmd = WR_CONFIRM;
5358 		break;
5359 	}
5360 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361 	amdgpu_ring_write(ring, cmd);
5362 	amdgpu_ring_write(ring, reg);
5363 	amdgpu_ring_write(ring, 0);
5364 	amdgpu_ring_write(ring, val);
5365 }
5366 
5367 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5368 					uint32_t val, uint32_t mask)
5369 {
5370 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5371 }
5372 
5373 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5374 						  uint32_t reg0, uint32_t reg1,
5375 						  uint32_t ref, uint32_t mask)
5376 {
5377 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5378 	struct amdgpu_device *adev = ring->adev;
5379 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5380 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5381 
5382 	if (fw_version_ok)
5383 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5384 				      ref, mask, 0x20);
5385 	else
5386 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5387 							   ref, mask);
5388 }
5389 
5390 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5391 {
5392 	struct amdgpu_device *adev = ring->adev;
5393 	uint32_t value = 0;
5394 
5395 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5396 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5397 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5398 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5399 	WREG32(mmSQ_CMD, value);
5400 }
5401 
5402 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5403 						 enum amdgpu_interrupt_state state)
5404 {
5405 	switch (state) {
5406 	case AMDGPU_IRQ_STATE_DISABLE:
5407 	case AMDGPU_IRQ_STATE_ENABLE:
5408 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5409 			       TIME_STAMP_INT_ENABLE,
5410 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5411 		break;
5412 	default:
5413 		break;
5414 	}
5415 }
5416 
5417 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5418 						     int me, int pipe,
5419 						     enum amdgpu_interrupt_state state)
5420 {
5421 	u32 mec_int_cntl, mec_int_cntl_reg;
5422 
5423 	/*
5424 	 * amdgpu controls only the first MEC. That's why this function only
5425 	 * handles the setting of interrupts for this specific MEC. All other
5426 	 * pipes' interrupts are set by amdkfd.
5427 	 */
5428 
5429 	if (me == 1) {
5430 		switch (pipe) {
5431 		case 0:
5432 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5433 			break;
5434 		case 1:
5435 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5436 			break;
5437 		case 2:
5438 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5439 			break;
5440 		case 3:
5441 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5442 			break;
5443 		default:
5444 			DRM_DEBUG("invalid pipe %d\n", pipe);
5445 			return;
5446 		}
5447 	} else {
5448 		DRM_DEBUG("invalid me %d\n", me);
5449 		return;
5450 	}
5451 
5452 	switch (state) {
5453 	case AMDGPU_IRQ_STATE_DISABLE:
5454 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5455 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5456 					     TIME_STAMP_INT_ENABLE, 0);
5457 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5458 		break;
5459 	case AMDGPU_IRQ_STATE_ENABLE:
5460 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5461 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5462 					     TIME_STAMP_INT_ENABLE, 1);
5463 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5464 		break;
5465 	default:
5466 		break;
5467 	}
5468 }
5469 
5470 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5471 					     struct amdgpu_irq_src *source,
5472 					     unsigned type,
5473 					     enum amdgpu_interrupt_state state)
5474 {
5475 	switch (state) {
5476 	case AMDGPU_IRQ_STATE_DISABLE:
5477 	case AMDGPU_IRQ_STATE_ENABLE:
5478 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5479 			       PRIV_REG_INT_ENABLE,
5480 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5481 		break;
5482 	default:
5483 		break;
5484 	}
5485 
5486 	return 0;
5487 }
5488 
5489 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5490 					      struct amdgpu_irq_src *source,
5491 					      unsigned type,
5492 					      enum amdgpu_interrupt_state state)
5493 {
5494 	switch (state) {
5495 	case AMDGPU_IRQ_STATE_DISABLE:
5496 	case AMDGPU_IRQ_STATE_ENABLE:
5497 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5498 			       PRIV_INSTR_INT_ENABLE,
5499 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5500 	default:
5501 		break;
5502 	}
5503 
5504 	return 0;
5505 }
5506 
5507 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5508 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5509 			CP_ECC_ERROR_INT_ENABLE, 1)
5510 
5511 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5512 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5513 			CP_ECC_ERROR_INT_ENABLE, 0)
5514 
5515 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5516 					      struct amdgpu_irq_src *source,
5517 					      unsigned type,
5518 					      enum amdgpu_interrupt_state state)
5519 {
5520 	switch (state) {
5521 	case AMDGPU_IRQ_STATE_DISABLE:
5522 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5523 				CP_ECC_ERROR_INT_ENABLE, 0);
5524 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5525 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5526 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5527 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5528 		break;
5529 
5530 	case AMDGPU_IRQ_STATE_ENABLE:
5531 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5532 				CP_ECC_ERROR_INT_ENABLE, 1);
5533 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5534 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5535 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5536 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5537 		break;
5538 	default:
5539 		break;
5540 	}
5541 
5542 	return 0;
5543 }
5544 
5545 
5546 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5547 					    struct amdgpu_irq_src *src,
5548 					    unsigned type,
5549 					    enum amdgpu_interrupt_state state)
5550 {
5551 	switch (type) {
5552 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5553 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5554 		break;
5555 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5556 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5557 		break;
5558 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5559 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5560 		break;
5561 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5562 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5563 		break;
5564 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5565 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5566 		break;
5567 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5568 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5569 		break;
5570 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5571 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5572 		break;
5573 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5574 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5575 		break;
5576 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5577 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5578 		break;
5579 	default:
5580 		break;
5581 	}
5582 	return 0;
5583 }
5584 
5585 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5586 			    struct amdgpu_irq_src *source,
5587 			    struct amdgpu_iv_entry *entry)
5588 {
5589 	int i;
5590 	u8 me_id, pipe_id, queue_id;
5591 	struct amdgpu_ring *ring;
5592 
5593 	DRM_DEBUG("IH: CP EOP\n");
5594 	me_id = (entry->ring_id & 0x0c) >> 2;
5595 	pipe_id = (entry->ring_id & 0x03) >> 0;
5596 	queue_id = (entry->ring_id & 0x70) >> 4;
5597 
5598 	switch (me_id) {
5599 	case 0:
5600 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5601 		break;
5602 	case 1:
5603 	case 2:
5604 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5605 			ring = &adev->gfx.compute_ring[i];
5606 			/* Per-queue interrupt is supported for MEC starting from VI.
5607 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5608 			  */
5609 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5610 				amdgpu_fence_process(ring);
5611 		}
5612 		break;
5613 	}
5614 	return 0;
5615 }
5616 
5617 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5618 			   struct amdgpu_iv_entry *entry)
5619 {
5620 	u8 me_id, pipe_id, queue_id;
5621 	struct amdgpu_ring *ring;
5622 	int i;
5623 
5624 	me_id = (entry->ring_id & 0x0c) >> 2;
5625 	pipe_id = (entry->ring_id & 0x03) >> 0;
5626 	queue_id = (entry->ring_id & 0x70) >> 4;
5627 
5628 	switch (me_id) {
5629 	case 0:
5630 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5631 		break;
5632 	case 1:
5633 	case 2:
5634 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5635 			ring = &adev->gfx.compute_ring[i];
5636 			if (ring->me == me_id && ring->pipe == pipe_id &&
5637 			    ring->queue == queue_id)
5638 				drm_sched_fault(&ring->sched);
5639 		}
5640 		break;
5641 	}
5642 }
5643 
5644 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5645 				 struct amdgpu_irq_src *source,
5646 				 struct amdgpu_iv_entry *entry)
5647 {
5648 	DRM_ERROR("Illegal register access in command stream\n");
5649 	gfx_v9_0_fault(adev, entry);
5650 	return 0;
5651 }
5652 
5653 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5654 				  struct amdgpu_irq_src *source,
5655 				  struct amdgpu_iv_entry *entry)
5656 {
5657 	DRM_ERROR("Illegal instruction in command stream\n");
5658 	gfx_v9_0_fault(adev, entry);
5659 	return 0;
5660 }
5661 
5662 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5663 		struct ras_err_data *err_data,
5664 		struct amdgpu_iv_entry *entry)
5665 {
5666 	/* TODO ue will trigger an interrupt. */
5667 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5668 	if (adev->gfx.funcs->query_ras_error_count)
5669 		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5670 	amdgpu_ras_reset_gpu(adev, 0);
5671 	return AMDGPU_RAS_SUCCESS;
5672 }
5673 
5674 static const struct {
5675 	const char *name;
5676 	uint32_t ip;
5677 	uint32_t inst;
5678 	uint32_t seg;
5679 	uint32_t reg_offset;
5680 	uint32_t per_se_instance;
5681 	int32_t num_instance;
5682 	uint32_t sec_count_mask;
5683 	uint32_t ded_count_mask;
5684 } gfx_ras_edc_regs[] = {
5685 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5686 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5687 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5688 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5689 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5690 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5691 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5692 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5693 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5694 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5695 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5696 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5697 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5698 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5699 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5700 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5701 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5702 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5703 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5704 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5705 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5706 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5707 	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5708 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5709 	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5710 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5711 	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5712 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5713 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5714 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5715 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5716 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5717 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5718 	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5719 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5720 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5721 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5722 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5723 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5724 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5725 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5726 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5727 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5728 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5729 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5730 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5731 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5732 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5733 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5734 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5735 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5736 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5737 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5738 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5739 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5740 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5741 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5742 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5743 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5744 	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5745 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5746 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5747 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5748 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5749 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5750 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5751 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5752 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5753 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5754 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5755 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5756 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5757 	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5758 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5759 	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5760 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5761 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5762 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5763 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5764 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5765 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5766 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5767 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5768 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5769 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5770 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5771 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5772 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5773 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5774 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5775 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5776 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5777 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5778 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5779 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5780 	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5781 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5782 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5783 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5784 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5785 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5786 	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5787 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5788 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5789 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5790 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5791 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5792 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5793 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5794 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5795 	  0 },
5796 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5797 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5798 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5799 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5800 	  0 },
5801 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5802 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5803 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5804 	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5805 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5806 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5807 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5808 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5809 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5810 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5811 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5812 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5813 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5814 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5815 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5816 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5817 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5818 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5819 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5820 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5821 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5822 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5823 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5824 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5825 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5826 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5827 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5828 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5829 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5830 	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5831 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5832 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5833 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5834 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5835 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5836 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5837 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5838 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5839 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5840 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5841 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5842 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5843 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5844 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5845 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5846 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5847 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5848 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5849 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5850 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5851 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5852 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5853 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5854 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5855 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5856 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5857 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5858 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5859 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5860 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5861 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5862 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5863 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5864 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5865 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5866 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5867 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5868 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5869 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5870 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5871 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5872 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5873 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5874 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5875 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5876 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5877 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5878 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5879 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5880 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5881 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5882 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5883 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5884 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5885 	  0 },
5886 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5887 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5888 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5889 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5890 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5891 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5892 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5893 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5894 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5895 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5896 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5897 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5898 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5899 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5900 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5901 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5902 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5903 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5904 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5905 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5906 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5907 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5908 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5909 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5910 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5911 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5912 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5913 	  0 },
5914 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5915 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5916 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5917 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5918 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5919 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5920 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5921 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5922 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5923 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5924 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5925 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5926 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5927 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5928 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5929 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5930 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5931 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5932 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5933 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5934 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5935 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5936 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5937 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5938 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5939 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5940 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5941 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
5942 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5943 	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
5944 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5945 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
5946 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5947 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
5948 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5949 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5950 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
5951 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5952 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5953 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
5954 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5955 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5956 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
5957 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5958 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
5959 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5960 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
5961 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5962 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
5963 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5964 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
5965 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5966 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
5967 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5968 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
5969 };
5970 
5971 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5972 				     void *inject_if)
5973 {
5974 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5975 	int ret;
5976 	struct ta_ras_trigger_error_input block_info = { 0 };
5977 
5978 	if (adev->asic_type != CHIP_VEGA20)
5979 		return -EINVAL;
5980 
5981 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5982 		return -EPERM;
5983 
5984 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5985 	      info->head.type)) {
5986 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5987 			ras_gfx_subblocks[info->head.sub_block_index].name,
5988 			info->head.type);
5989 		return -EPERM;
5990 	}
5991 
5992 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
5993 	      info->head.type)) {
5994 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
5995 			ras_gfx_subblocks[info->head.sub_block_index].name,
5996 			info->head.type);
5997 		return -EPERM;
5998 	}
5999 
6000 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6001 	block_info.sub_block_index =
6002 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6003 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6004 	block_info.address = info->address;
6005 	block_info.value = info->value;
6006 
6007 	mutex_lock(&adev->grbm_idx_mutex);
6008 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6009 	mutex_unlock(&adev->grbm_idx_mutex);
6010 
6011 	return ret;
6012 }
6013 
6014 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6015 					  void *ras_error_status)
6016 {
6017 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6018 	uint32_t sec_count, ded_count;
6019 	uint32_t i;
6020 	uint32_t reg_value;
6021 	uint32_t se_id, instance_id;
6022 
6023 	if (adev->asic_type != CHIP_VEGA20)
6024 		return -EINVAL;
6025 
6026 	err_data->ue_count = 0;
6027 	err_data->ce_count = 0;
6028 
6029 	mutex_lock(&adev->grbm_idx_mutex);
6030 	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6031 		for (instance_id = 0; instance_id < 256; instance_id++) {
6032 			for (i = 0;
6033 			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6034 			     i++) {
6035 				if (se_id != 0 &&
6036 				    !gfx_ras_edc_regs[i].per_se_instance)
6037 					continue;
6038 				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6039 					continue;
6040 
6041 				gfx_v9_0_select_se_sh(adev, se_id, 0,
6042 						      instance_id);
6043 
6044 				reg_value = RREG32(
6045 					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6046 							[gfx_ras_edc_regs[i].inst]
6047 							[gfx_ras_edc_regs[i].seg] +
6048 					gfx_ras_edc_regs[i].reg_offset);
6049 				sec_count = reg_value &
6050 					    gfx_ras_edc_regs[i].sec_count_mask;
6051 				ded_count = reg_value &
6052 					    gfx_ras_edc_regs[i].ded_count_mask;
6053 				if (sec_count) {
6054 					DRM_INFO(
6055 						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6056 						se_id, instance_id,
6057 						gfx_ras_edc_regs[i].name,
6058 						sec_count);
6059 					err_data->ce_count++;
6060 				}
6061 
6062 				if (ded_count) {
6063 					DRM_INFO(
6064 						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6065 						se_id, instance_id,
6066 						gfx_ras_edc_regs[i].name,
6067 						ded_count);
6068 					err_data->ue_count++;
6069 				}
6070 			}
6071 		}
6072 	}
6073 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6074 	mutex_unlock(&adev->grbm_idx_mutex);
6075 
6076 	return 0;
6077 }
6078 
6079 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6080 				  struct amdgpu_irq_src *source,
6081 				  struct amdgpu_iv_entry *entry)
6082 {
6083 	struct ras_common_if *ras_if = adev->gfx.ras_if;
6084 	struct ras_dispatch_if ih_data = {
6085 		.entry = entry,
6086 	};
6087 
6088 	if (!ras_if)
6089 		return 0;
6090 
6091 	ih_data.head = *ras_if;
6092 
6093 	DRM_ERROR("CP ECC ERROR IRQ\n");
6094 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6095 	return 0;
6096 }
6097 
6098 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6099 	.name = "gfx_v9_0",
6100 	.early_init = gfx_v9_0_early_init,
6101 	.late_init = gfx_v9_0_late_init,
6102 	.sw_init = gfx_v9_0_sw_init,
6103 	.sw_fini = gfx_v9_0_sw_fini,
6104 	.hw_init = gfx_v9_0_hw_init,
6105 	.hw_fini = gfx_v9_0_hw_fini,
6106 	.suspend = gfx_v9_0_suspend,
6107 	.resume = gfx_v9_0_resume,
6108 	.is_idle = gfx_v9_0_is_idle,
6109 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6110 	.soft_reset = gfx_v9_0_soft_reset,
6111 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6112 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6113 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6114 };
6115 
6116 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6117 	.type = AMDGPU_RING_TYPE_GFX,
6118 	.align_mask = 0xff,
6119 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6120 	.support_64bit_ptrs = true,
6121 	.vmhub = AMDGPU_GFXHUB_0,
6122 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6123 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6124 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6125 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6126 		5 +  /* COND_EXEC */
6127 		7 +  /* PIPELINE_SYNC */
6128 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6129 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6130 		2 + /* VM_FLUSH */
6131 		8 +  /* FENCE for VM_FLUSH */
6132 		20 + /* GDS switch */
6133 		4 + /* double SWITCH_BUFFER,
6134 		       the first COND_EXEC jump to the place just
6135 			   prior to this double SWITCH_BUFFER  */
6136 		5 + /* COND_EXEC */
6137 		7 +	 /*	HDP_flush */
6138 		4 +	 /*	VGT_flush */
6139 		14 + /*	CE_META */
6140 		31 + /*	DE_META */
6141 		3 + /* CNTX_CTRL */
6142 		5 + /* HDP_INVL */
6143 		8 + 8 + /* FENCE x2 */
6144 		2, /* SWITCH_BUFFER */
6145 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6146 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6147 	.emit_fence = gfx_v9_0_ring_emit_fence,
6148 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6149 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6150 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6151 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6152 	.test_ring = gfx_v9_0_ring_test_ring,
6153 	.test_ib = gfx_v9_0_ring_test_ib,
6154 	.insert_nop = amdgpu_ring_insert_nop,
6155 	.pad_ib = amdgpu_ring_generic_pad_ib,
6156 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6157 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6158 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6159 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6160 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6161 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6162 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6163 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6164 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6165 };
6166 
6167 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6168 	.type = AMDGPU_RING_TYPE_COMPUTE,
6169 	.align_mask = 0xff,
6170 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6171 	.support_64bit_ptrs = true,
6172 	.vmhub = AMDGPU_GFXHUB_0,
6173 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6174 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6175 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6176 	.emit_frame_size =
6177 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6178 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6179 		5 + /* hdp invalidate */
6180 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6181 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6182 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6183 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6184 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6185 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6186 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6187 	.emit_fence = gfx_v9_0_ring_emit_fence,
6188 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6189 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6190 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6191 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6192 	.test_ring = gfx_v9_0_ring_test_ring,
6193 	.test_ib = gfx_v9_0_ring_test_ib,
6194 	.insert_nop = amdgpu_ring_insert_nop,
6195 	.pad_ib = amdgpu_ring_generic_pad_ib,
6196 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6197 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6198 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6199 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6200 };
6201 
6202 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6203 	.type = AMDGPU_RING_TYPE_KIQ,
6204 	.align_mask = 0xff,
6205 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6206 	.support_64bit_ptrs = true,
6207 	.vmhub = AMDGPU_GFXHUB_0,
6208 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6209 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6210 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6211 	.emit_frame_size =
6212 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6213 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6214 		5 + /* hdp invalidate */
6215 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6216 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6217 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6218 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6219 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6220 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6221 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6222 	.test_ring = gfx_v9_0_ring_test_ring,
6223 	.insert_nop = amdgpu_ring_insert_nop,
6224 	.pad_ib = amdgpu_ring_generic_pad_ib,
6225 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6226 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6227 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6228 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6229 };
6230 
6231 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6232 {
6233 	int i;
6234 
6235 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6236 
6237 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6238 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6239 
6240 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6241 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6242 }
6243 
6244 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6245 	.set = gfx_v9_0_set_eop_interrupt_state,
6246 	.process = gfx_v9_0_eop_irq,
6247 };
6248 
6249 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6250 	.set = gfx_v9_0_set_priv_reg_fault_state,
6251 	.process = gfx_v9_0_priv_reg_irq,
6252 };
6253 
6254 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6255 	.set = gfx_v9_0_set_priv_inst_fault_state,
6256 	.process = gfx_v9_0_priv_inst_irq,
6257 };
6258 
6259 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6260 	.set = gfx_v9_0_set_cp_ecc_error_state,
6261 	.process = gfx_v9_0_cp_ecc_error_irq,
6262 };
6263 
6264 
6265 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6266 {
6267 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6268 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6269 
6270 	adev->gfx.priv_reg_irq.num_types = 1;
6271 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6272 
6273 	adev->gfx.priv_inst_irq.num_types = 1;
6274 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6275 
6276 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6277 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6278 }
6279 
6280 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6281 {
6282 	switch (adev->asic_type) {
6283 	case CHIP_VEGA10:
6284 	case CHIP_VEGA12:
6285 	case CHIP_VEGA20:
6286 	case CHIP_RAVEN:
6287 	case CHIP_ARCTURUS:
6288 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6289 		break;
6290 	default:
6291 		break;
6292 	}
6293 }
6294 
6295 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6296 {
6297 	/* init asci gds info */
6298 	switch (adev->asic_type) {
6299 	case CHIP_VEGA10:
6300 	case CHIP_VEGA12:
6301 	case CHIP_VEGA20:
6302 		adev->gds.gds_size = 0x10000;
6303 		break;
6304 	case CHIP_RAVEN:
6305 	case CHIP_ARCTURUS:
6306 		adev->gds.gds_size = 0x1000;
6307 		break;
6308 	default:
6309 		adev->gds.gds_size = 0x10000;
6310 		break;
6311 	}
6312 
6313 	switch (adev->asic_type) {
6314 	case CHIP_VEGA10:
6315 	case CHIP_VEGA20:
6316 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6317 		break;
6318 	case CHIP_VEGA12:
6319 		adev->gds.gds_compute_max_wave_id = 0x27f;
6320 		break;
6321 	case CHIP_RAVEN:
6322 		if (adev->rev_id >= 0x8)
6323 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6324 		else
6325 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6326 		break;
6327 	case CHIP_ARCTURUS:
6328 		adev->gds.gds_compute_max_wave_id = 0xfff;
6329 		break;
6330 	default:
6331 		/* this really depends on the chip */
6332 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6333 		break;
6334 	}
6335 
6336 	adev->gds.gws_size = 64;
6337 	adev->gds.oa_size = 16;
6338 }
6339 
6340 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6341 						 u32 bitmap)
6342 {
6343 	u32 data;
6344 
6345 	if (!bitmap)
6346 		return;
6347 
6348 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6349 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6350 
6351 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6352 }
6353 
6354 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6355 {
6356 	u32 data, mask;
6357 
6358 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6359 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6360 
6361 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6362 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6363 
6364 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6365 
6366 	return (~data) & mask;
6367 }
6368 
6369 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6370 				 struct amdgpu_cu_info *cu_info)
6371 {
6372 	int i, j, k, counter, active_cu_number = 0;
6373 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6374 	unsigned disable_masks[4 * 4];
6375 
6376 	if (!adev || !cu_info)
6377 		return -EINVAL;
6378 
6379 	/*
6380 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6381 	 */
6382 	if (adev->gfx.config.max_shader_engines *
6383 		adev->gfx.config.max_sh_per_se > 16)
6384 		return -EINVAL;
6385 
6386 	amdgpu_gfx_parse_disable_cu(disable_masks,
6387 				    adev->gfx.config.max_shader_engines,
6388 				    adev->gfx.config.max_sh_per_se);
6389 
6390 	mutex_lock(&adev->grbm_idx_mutex);
6391 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6392 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6393 			mask = 1;
6394 			ao_bitmap = 0;
6395 			counter = 0;
6396 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6397 			gfx_v9_0_set_user_cu_inactive_bitmap(
6398 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6399 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6400 
6401 			/*
6402 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6403 			 * 4x4 size array, and it's usually suitable for Vega
6404 			 * ASICs which has 4*2 SE/SH layout.
6405 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6406 			 * To mostly reduce the impact, we make it compatible
6407 			 * with current bitmap array as below:
6408 			 *    SE4,SH0 --> bitmap[0][1]
6409 			 *    SE5,SH0 --> bitmap[1][1]
6410 			 *    SE6,SH0 --> bitmap[2][1]
6411 			 *    SE7,SH0 --> bitmap[3][1]
6412 			 */
6413 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6414 
6415 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6416 				if (bitmap & mask) {
6417 					if (counter < adev->gfx.config.max_cu_per_sh)
6418 						ao_bitmap |= mask;
6419 					counter ++;
6420 				}
6421 				mask <<= 1;
6422 			}
6423 			active_cu_number += counter;
6424 			if (i < 2 && j < 2)
6425 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6426 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6427 		}
6428 	}
6429 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6430 	mutex_unlock(&adev->grbm_idx_mutex);
6431 
6432 	cu_info->number = active_cu_number;
6433 	cu_info->ao_cu_mask = ao_cu_mask;
6434 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6435 
6436 	return 0;
6437 }
6438 
6439 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6440 {
6441 	.type = AMD_IP_BLOCK_TYPE_GFX,
6442 	.major = 9,
6443 	.minor = 0,
6444 	.rev = 0,
6445 	.funcs = &gfx_v9_0_ip_funcs,
6446 };
6447