xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision d8a46257c22906db38590eb7986c07770bee002c)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110 
111 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
113 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
115 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
117 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
119 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
121 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
123 
124 enum ta_ras_gfx_subblock {
125 	/*CPC*/
126 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
127 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
128 	TA_RAS_BLOCK__GFX_CPC_UCODE,
129 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
130 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
131 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
132 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
133 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
134 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
135 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
136 	/* CPF*/
137 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
139 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
140 	TA_RAS_BLOCK__GFX_CPF_TAG,
141 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
142 	/* CPG*/
143 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
144 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
145 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
146 	TA_RAS_BLOCK__GFX_CPG_TAG,
147 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
148 	/* GDS*/
149 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
150 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
151 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
152 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
153 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
154 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
155 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
156 	/* SPI*/
157 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
158 	/* SQ*/
159 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
160 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
161 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
162 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
163 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
164 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
165 	/* SQC (3 ranges)*/
166 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
167 	/* SQC range 0*/
168 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
169 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
170 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
171 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
172 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
173 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
174 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
175 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
176 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
177 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
178 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
179 	/* SQC range 1*/
180 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
181 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
182 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
183 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
184 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
185 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
187 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
188 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
189 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
190 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
191 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
192 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
193 	/* SQC range 2*/
194 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
196 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
197 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
202 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
203 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
204 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
205 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
206 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
207 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
208 	/* TA*/
209 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
210 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
211 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
212 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
213 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
214 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
215 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
216 	/* TCA*/
217 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
218 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
219 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
220 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
221 	/* TCC (5 sub-ranges)*/
222 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
223 	/* TCC range 0*/
224 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
225 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
226 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
227 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
228 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
229 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
230 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
231 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
232 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
233 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
234 	/* TCC range 1*/
235 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
236 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
237 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
238 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
239 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
240 	/* TCC range 2*/
241 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
242 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
243 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
244 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
245 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
246 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
247 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
248 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
249 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
250 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
251 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
252 	/* TCC range 3*/
253 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
254 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
255 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
256 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
257 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
258 	/* TCC range 4*/
259 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
260 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
261 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
262 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
263 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
264 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
265 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
266 	/* TCI*/
267 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
268 	/* TCP*/
269 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
270 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
271 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
272 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
273 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
274 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
275 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
276 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
277 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
278 	/* TD*/
279 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
282 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
283 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
284 	/* EA (3 sub-ranges)*/
285 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
286 	/* EA range 0*/
287 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
288 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
289 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
290 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
291 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
292 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
293 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
294 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
295 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
296 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
297 	/* EA range 1*/
298 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
301 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
302 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
303 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
307 	/* EA range 2*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
309 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
310 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
311 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
312 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
313 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
314 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
315 	/* UTC VM L2 bank*/
316 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
317 	/* UTC VM walker*/
318 	TA_RAS_BLOCK__UTC_VML2_WALKER,
319 	/* UTC ATC L2 2MB cache*/
320 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
321 	/* UTC ATC L2 4KB cache*/
322 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
323 	TA_RAS_BLOCK__GFX_MAX
324 };
325 
326 struct ras_gfx_subblock {
327 	unsigned char *name;
328 	int ta_subblock;
329 	int hw_supported_error_type;
330 	int sw_supported_error_type;
331 };
332 
333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
334 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
335 		#subblock,                                                     \
336 		TA_RAS_BLOCK__##subblock,                                      \
337 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
338 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
339 	}
340 
341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
342 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
343 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
344 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
345 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
346 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
347 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
348 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
349 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
350 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
351 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
359 			     0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
361 			     0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
369 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
370 			     0, 0),
371 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
372 			     0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
374 			     0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
376 			     0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378 			     0, 0),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380 			     0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
382 			     1),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
384 			     0, 0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
388 			     0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
392 			     0),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394 			     0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
396 			     0, 0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
402 			     0, 0, 0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406 			     0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412 			     0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
414 			     0, 0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
418 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
420 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
422 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
424 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
426 			     1),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
428 			     1),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
430 			     1),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
432 			     0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
434 			     0),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
436 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
438 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
440 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
444 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
447 			     0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
450 			     0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
452 			     0, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
454 			     0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
457 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
460 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
462 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
464 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
489 };
490 
491 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
492 {
493 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
494 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
495 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
496 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
497 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
498 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
499 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
500 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
501 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
502 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
513 };
514 
515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
516 {
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
523 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
524 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
525 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
526 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
535 };
536 
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
538 {
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
545 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
546 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
547 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
548 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
550 };
551 
552 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
553 {
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
560 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
561 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
562 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
563 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
578 };
579 
580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
581 {
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
588 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
589 };
590 
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
592 {
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
599 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
600 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
601 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
602 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 };
613 
614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
615 {
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
619 };
620 
621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
622 {
623 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
624 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
625 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
638 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
639 };
640 
641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
642 {
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
646 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
647 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
648 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
656 };
657 
658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
659 {
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
665 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
666 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
667 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
668 };
669 
670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
671 {
672 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
673 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
674 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
675 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
676 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
677 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
678 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
679 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
680 };
681 
682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
683 {
684 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
685 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
686 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
687 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
688 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
689 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
690 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
691 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
692 };
693 
694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
698 
699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
704                                  struct amdgpu_cu_info *cu_info);
705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
710 					  void *ras_error_status);
711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
712 				     void *inject_if);
713 
714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
715 {
716 	switch (adev->asic_type) {
717 	case CHIP_VEGA10:
718 		soc15_program_register_sequence(adev,
719 						golden_settings_gc_9_0,
720 						ARRAY_SIZE(golden_settings_gc_9_0));
721 		soc15_program_register_sequence(adev,
722 						golden_settings_gc_9_0_vg10,
723 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
724 		break;
725 	case CHIP_VEGA12:
726 		soc15_program_register_sequence(adev,
727 						golden_settings_gc_9_2_1,
728 						ARRAY_SIZE(golden_settings_gc_9_2_1));
729 		soc15_program_register_sequence(adev,
730 						golden_settings_gc_9_2_1_vg12,
731 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
732 		break;
733 	case CHIP_VEGA20:
734 		soc15_program_register_sequence(adev,
735 						golden_settings_gc_9_0,
736 						ARRAY_SIZE(golden_settings_gc_9_0));
737 		soc15_program_register_sequence(adev,
738 						golden_settings_gc_9_0_vg20,
739 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
740 		break;
741 	case CHIP_ARCTURUS:
742 		soc15_program_register_sequence(adev,
743 						golden_settings_gc_9_4_1_arct,
744 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
745 		break;
746 	case CHIP_RAVEN:
747 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
748 						ARRAY_SIZE(golden_settings_gc_9_1));
749 		if (adev->rev_id >= 8)
750 			soc15_program_register_sequence(adev,
751 							golden_settings_gc_9_1_rv2,
752 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
753 		else
754 			soc15_program_register_sequence(adev,
755 							golden_settings_gc_9_1_rv1,
756 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
757 		break;
758 	default:
759 		break;
760 	}
761 
762 	if (adev->asic_type != CHIP_ARCTURUS)
763 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
764 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
765 }
766 
767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
768 {
769 	adev->gfx.scratch.num_reg = 8;
770 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
771 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773 
774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
775 				       bool wc, uint32_t reg, uint32_t val)
776 {
777 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
778 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
779 				WRITE_DATA_DST_SEL(0) |
780 				(wc ? WR_CONFIRM : 0));
781 	amdgpu_ring_write(ring, reg);
782 	amdgpu_ring_write(ring, 0);
783 	amdgpu_ring_write(ring, val);
784 }
785 
786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
787 				  int mem_space, int opt, uint32_t addr0,
788 				  uint32_t addr1, uint32_t ref, uint32_t mask,
789 				  uint32_t inv)
790 {
791 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
792 	amdgpu_ring_write(ring,
793 				 /* memory (1) or register (0) */
794 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
795 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
796 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
797 				 WAIT_REG_MEM_ENGINE(eng_sel)));
798 
799 	if (mem_space)
800 		BUG_ON(addr0 & 0x3); /* Dword align */
801 	amdgpu_ring_write(ring, addr0);
802 	amdgpu_ring_write(ring, addr1);
803 	amdgpu_ring_write(ring, ref);
804 	amdgpu_ring_write(ring, mask);
805 	amdgpu_ring_write(ring, inv); /* poll interval */
806 }
807 
808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
809 {
810 	struct amdgpu_device *adev = ring->adev;
811 	uint32_t scratch;
812 	uint32_t tmp = 0;
813 	unsigned i;
814 	int r;
815 
816 	r = amdgpu_gfx_scratch_get(adev, &scratch);
817 	if (r)
818 		return r;
819 
820 	WREG32(scratch, 0xCAFEDEAD);
821 	r = amdgpu_ring_alloc(ring, 3);
822 	if (r)
823 		goto error_free_scratch;
824 
825 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
826 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
827 	amdgpu_ring_write(ring, 0xDEADBEEF);
828 	amdgpu_ring_commit(ring);
829 
830 	for (i = 0; i < adev->usec_timeout; i++) {
831 		tmp = RREG32(scratch);
832 		if (tmp == 0xDEADBEEF)
833 			break;
834 		udelay(1);
835 	}
836 
837 	if (i >= adev->usec_timeout)
838 		r = -ETIMEDOUT;
839 
840 error_free_scratch:
841 	amdgpu_gfx_scratch_free(adev, scratch);
842 	return r;
843 }
844 
845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
846 {
847 	struct amdgpu_device *adev = ring->adev;
848 	struct amdgpu_ib ib;
849 	struct dma_fence *f = NULL;
850 
851 	unsigned index;
852 	uint64_t gpu_addr;
853 	uint32_t tmp;
854 	long r;
855 
856 	r = amdgpu_device_wb_get(adev, &index);
857 	if (r)
858 		return r;
859 
860 	gpu_addr = adev->wb.gpu_addr + (index * 4);
861 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
862 	memset(&ib, 0, sizeof(ib));
863 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
864 	if (r)
865 		goto err1;
866 
867 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
868 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
869 	ib.ptr[2] = lower_32_bits(gpu_addr);
870 	ib.ptr[3] = upper_32_bits(gpu_addr);
871 	ib.ptr[4] = 0xDEADBEEF;
872 	ib.length_dw = 5;
873 
874 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
875 	if (r)
876 		goto err2;
877 
878 	r = dma_fence_wait_timeout(f, false, timeout);
879 	if (r == 0) {
880 		r = -ETIMEDOUT;
881 		goto err2;
882 	} else if (r < 0) {
883 		goto err2;
884 	}
885 
886 	tmp = adev->wb.wb[index];
887 	if (tmp == 0xDEADBEEF)
888 		r = 0;
889 	else
890 		r = -EINVAL;
891 
892 err2:
893 	amdgpu_ib_free(adev, &ib, NULL);
894 	dma_fence_put(f);
895 err1:
896 	amdgpu_device_wb_free(adev, index);
897 	return r;
898 }
899 
900 
901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
902 {
903 	release_firmware(adev->gfx.pfp_fw);
904 	adev->gfx.pfp_fw = NULL;
905 	release_firmware(adev->gfx.me_fw);
906 	adev->gfx.me_fw = NULL;
907 	release_firmware(adev->gfx.ce_fw);
908 	adev->gfx.ce_fw = NULL;
909 	release_firmware(adev->gfx.rlc_fw);
910 	adev->gfx.rlc_fw = NULL;
911 	release_firmware(adev->gfx.mec_fw);
912 	adev->gfx.mec_fw = NULL;
913 	release_firmware(adev->gfx.mec2_fw);
914 	adev->gfx.mec2_fw = NULL;
915 
916 	kfree(adev->gfx.rlc.register_list_format);
917 }
918 
919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
920 {
921 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
922 
923 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
924 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
925 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
926 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
927 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
928 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
929 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
930 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
931 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
932 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
933 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
934 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
935 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
936 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
937 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
938 }
939 
940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
941 {
942 	adev->gfx.me_fw_write_wait = false;
943 	adev->gfx.mec_fw_write_wait = false;
944 
945 	switch (adev->asic_type) {
946 	case CHIP_VEGA10:
947 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
948 		    (adev->gfx.me_feature_version >= 42) &&
949 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
950 		    (adev->gfx.pfp_feature_version >= 42))
951 			adev->gfx.me_fw_write_wait = true;
952 
953 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
954 		    (adev->gfx.mec_feature_version >= 42))
955 			adev->gfx.mec_fw_write_wait = true;
956 		break;
957 	case CHIP_VEGA12:
958 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
959 		    (adev->gfx.me_feature_version >= 44) &&
960 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
961 		    (adev->gfx.pfp_feature_version >= 44))
962 			adev->gfx.me_fw_write_wait = true;
963 
964 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
965 		    (adev->gfx.mec_feature_version >= 44))
966 			adev->gfx.mec_fw_write_wait = true;
967 		break;
968 	case CHIP_VEGA20:
969 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
970 		    (adev->gfx.me_feature_version >= 44) &&
971 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
972 		    (adev->gfx.pfp_feature_version >= 44))
973 			adev->gfx.me_fw_write_wait = true;
974 
975 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
976 		    (adev->gfx.mec_feature_version >= 44))
977 			adev->gfx.mec_fw_write_wait = true;
978 		break;
979 	case CHIP_RAVEN:
980 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981 		    (adev->gfx.me_feature_version >= 42) &&
982 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
983 		    (adev->gfx.pfp_feature_version >= 42))
984 			adev->gfx.me_fw_write_wait = true;
985 
986 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
987 		    (adev->gfx.mec_feature_version >= 42))
988 			adev->gfx.mec_fw_write_wait = true;
989 		break;
990 	default:
991 		break;
992 	}
993 }
994 
995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
996 {
997 	switch (adev->asic_type) {
998 	case CHIP_VEGA10:
999 	case CHIP_VEGA12:
1000 	case CHIP_VEGA20:
1001 		break;
1002 	case CHIP_RAVEN:
1003 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1004 			break;
1005 		if ((adev->gfx.rlc_fw_version != 106 &&
1006 		     adev->gfx.rlc_fw_version < 531) ||
1007 		    (adev->gfx.rlc_fw_version == 53815) ||
1008 		    (adev->gfx.rlc_feature_version < 1) ||
1009 		    !adev->gfx.rlc.is_rlc_v2_1)
1010 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1011 		break;
1012 	default:
1013 		break;
1014 	}
1015 }
1016 
1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1018 					  const char *chip_name)
1019 {
1020 	char fw_name[30];
1021 	int err;
1022 	struct amdgpu_firmware_info *info = NULL;
1023 	const struct common_firmware_header *header = NULL;
1024 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1025 
1026 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1027 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1028 	if (err)
1029 		goto out;
1030 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1031 	if (err)
1032 		goto out;
1033 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1034 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1035 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1036 
1037 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1038 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1039 	if (err)
1040 		goto out;
1041 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1042 	if (err)
1043 		goto out;
1044 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1045 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1046 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047 
1048 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050 	if (err)
1051 		goto out;
1052 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1053 	if (err)
1054 		goto out;
1055 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1056 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1057 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1058 
1059 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1060 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1061 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1062 		info->fw = adev->gfx.pfp_fw;
1063 		header = (const struct common_firmware_header *)info->fw->data;
1064 		adev->firmware.fw_size +=
1065 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1066 
1067 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1068 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1069 		info->fw = adev->gfx.me_fw;
1070 		header = (const struct common_firmware_header *)info->fw->data;
1071 		adev->firmware.fw_size +=
1072 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073 
1074 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1075 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1076 		info->fw = adev->gfx.ce_fw;
1077 		header = (const struct common_firmware_header *)info->fw->data;
1078 		adev->firmware.fw_size +=
1079 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080 	}
1081 
1082 out:
1083 	if (err) {
1084 		dev_err(adev->dev,
1085 			"gfx9: Failed to load firmware \"%s\"\n",
1086 			fw_name);
1087 		release_firmware(adev->gfx.pfp_fw);
1088 		adev->gfx.pfp_fw = NULL;
1089 		release_firmware(adev->gfx.me_fw);
1090 		adev->gfx.me_fw = NULL;
1091 		release_firmware(adev->gfx.ce_fw);
1092 		adev->gfx.ce_fw = NULL;
1093 	}
1094 	return err;
1095 }
1096 
1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1098 					  const char *chip_name)
1099 {
1100 	char fw_name[30];
1101 	int err;
1102 	struct amdgpu_firmware_info *info = NULL;
1103 	const struct common_firmware_header *header = NULL;
1104 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1105 	unsigned int *tmp = NULL;
1106 	unsigned int i = 0;
1107 	uint16_t version_major;
1108 	uint16_t version_minor;
1109 	uint32_t smu_version;
1110 
1111 	/*
1112 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1113 	 * instead of picasso_rlc.bin.
1114 	 * Judgment method:
1115 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1116 	 *          or revision >= 0xD8 && revision <= 0xDF
1117 	 * otherwise is PCO FP5
1118 	 */
1119 	if (!strcmp(chip_name, "picasso") &&
1120 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1121 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1122 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1123 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1124 		(smu_version >= 0x41e2b))
1125 		/**
1126 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1127 		*/
1128 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1129 	else
1130 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1131 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1132 	if (err)
1133 		goto out;
1134 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1135 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1136 
1137 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1138 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1139 	if (version_major == 2 && version_minor == 1)
1140 		adev->gfx.rlc.is_rlc_v2_1 = true;
1141 
1142 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1143 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1144 	adev->gfx.rlc.save_and_restore_offset =
1145 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1146 	adev->gfx.rlc.clear_state_descriptor_offset =
1147 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1148 	adev->gfx.rlc.avail_scratch_ram_locations =
1149 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1150 	adev->gfx.rlc.reg_restore_list_size =
1151 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1152 	adev->gfx.rlc.reg_list_format_start =
1153 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1154 	adev->gfx.rlc.reg_list_format_separate_start =
1155 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1156 	adev->gfx.rlc.starting_offsets_start =
1157 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1158 	adev->gfx.rlc.reg_list_format_size_bytes =
1159 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1160 	adev->gfx.rlc.reg_list_size_bytes =
1161 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1162 	adev->gfx.rlc.register_list_format =
1163 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1164 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1165 	if (!adev->gfx.rlc.register_list_format) {
1166 		err = -ENOMEM;
1167 		goto out;
1168 	}
1169 
1170 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1171 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1172 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1173 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1174 
1175 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1176 
1177 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1179 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1180 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1181 
1182 	if (adev->gfx.rlc.is_rlc_v2_1)
1183 		gfx_v9_0_init_rlc_ext_microcode(adev);
1184 
1185 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1186 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188 		info->fw = adev->gfx.rlc_fw;
1189 		header = (const struct common_firmware_header *)info->fw->data;
1190 		adev->firmware.fw_size +=
1191 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192 
1193 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1194 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1195 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1196 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1197 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1198 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1199 			info->fw = adev->gfx.rlc_fw;
1200 			adev->firmware.fw_size +=
1201 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1202 
1203 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1204 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1205 			info->fw = adev->gfx.rlc_fw;
1206 			adev->firmware.fw_size +=
1207 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1208 
1209 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1210 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1211 			info->fw = adev->gfx.rlc_fw;
1212 			adev->firmware.fw_size +=
1213 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1214 		}
1215 	}
1216 
1217 out:
1218 	if (err) {
1219 		dev_err(adev->dev,
1220 			"gfx9: Failed to load firmware \"%s\"\n",
1221 			fw_name);
1222 		release_firmware(adev->gfx.rlc_fw);
1223 		adev->gfx.rlc_fw = NULL;
1224 	}
1225 	return err;
1226 }
1227 
1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1229 					  const char *chip_name)
1230 {
1231 	char fw_name[30];
1232 	int err;
1233 	struct amdgpu_firmware_info *info = NULL;
1234 	const struct common_firmware_header *header = NULL;
1235 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1236 
1237 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1238 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1239 	if (err)
1240 		goto out;
1241 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1242 	if (err)
1243 		goto out;
1244 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1245 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1246 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1247 
1248 
1249 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1250 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1251 	if (!err) {
1252 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1253 		if (err)
1254 			goto out;
1255 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1256 		adev->gfx.mec2_fw->data;
1257 		adev->gfx.mec2_fw_version =
1258 		le32_to_cpu(cp_hdr->header.ucode_version);
1259 		adev->gfx.mec2_feature_version =
1260 		le32_to_cpu(cp_hdr->ucode_feature_version);
1261 	} else {
1262 		err = 0;
1263 		adev->gfx.mec2_fw = NULL;
1264 	}
1265 
1266 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1267 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1268 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1269 		info->fw = adev->gfx.mec_fw;
1270 		header = (const struct common_firmware_header *)info->fw->data;
1271 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1272 		adev->firmware.fw_size +=
1273 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1274 
1275 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1276 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1277 		info->fw = adev->gfx.mec_fw;
1278 		adev->firmware.fw_size +=
1279 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1280 
1281 		if (adev->gfx.mec2_fw) {
1282 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1283 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1284 			info->fw = adev->gfx.mec2_fw;
1285 			header = (const struct common_firmware_header *)info->fw->data;
1286 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1287 			adev->firmware.fw_size +=
1288 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1289 
1290 			/* TODO: Determine if MEC2 JT FW loading can be removed
1291 				 for all GFX V9 asic and above */
1292 			if (adev->asic_type != CHIP_ARCTURUS) {
1293 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1294 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1295 				info->fw = adev->gfx.mec2_fw;
1296 				adev->firmware.fw_size +=
1297 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1298 					PAGE_SIZE);
1299 			}
1300 		}
1301 	}
1302 
1303 out:
1304 	gfx_v9_0_check_if_need_gfxoff(adev);
1305 	gfx_v9_0_check_fw_write_wait(adev);
1306 	if (err) {
1307 		dev_err(adev->dev,
1308 			"gfx9: Failed to load firmware \"%s\"\n",
1309 			fw_name);
1310 		release_firmware(adev->gfx.mec_fw);
1311 		adev->gfx.mec_fw = NULL;
1312 		release_firmware(adev->gfx.mec2_fw);
1313 		adev->gfx.mec2_fw = NULL;
1314 	}
1315 	return err;
1316 }
1317 
1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1319 {
1320 	const char *chip_name;
1321 	int r;
1322 
1323 	DRM_DEBUG("\n");
1324 
1325 	switch (adev->asic_type) {
1326 	case CHIP_VEGA10:
1327 		chip_name = "vega10";
1328 		break;
1329 	case CHIP_VEGA12:
1330 		chip_name = "vega12";
1331 		break;
1332 	case CHIP_VEGA20:
1333 		chip_name = "vega20";
1334 		break;
1335 	case CHIP_RAVEN:
1336 		if (adev->rev_id >= 8)
1337 			chip_name = "raven2";
1338 		else if (adev->pdev->device == 0x15d8)
1339 			chip_name = "picasso";
1340 		else
1341 			chip_name = "raven";
1342 		break;
1343 	case CHIP_ARCTURUS:
1344 		chip_name = "arcturus";
1345 		break;
1346 	default:
1347 		BUG();
1348 	}
1349 
1350 	/* No CPG in Arcturus */
1351 	if (adev->asic_type != CHIP_ARCTURUS) {
1352 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1353 		if (r)
1354 			return r;
1355 	}
1356 
1357 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1358 	if (r)
1359 		return r;
1360 
1361 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1362 	if (r)
1363 		return r;
1364 
1365 	return r;
1366 }
1367 
1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1369 {
1370 	u32 count = 0;
1371 	const struct cs_section_def *sect = NULL;
1372 	const struct cs_extent_def *ext = NULL;
1373 
1374 	/* begin clear state */
1375 	count += 2;
1376 	/* context control state */
1377 	count += 3;
1378 
1379 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1380 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1381 			if (sect->id == SECT_CONTEXT)
1382 				count += 2 + ext->reg_count;
1383 			else
1384 				return 0;
1385 		}
1386 	}
1387 
1388 	/* end clear state */
1389 	count += 2;
1390 	/* clear state */
1391 	count += 2;
1392 
1393 	return count;
1394 }
1395 
1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1397 				    volatile u32 *buffer)
1398 {
1399 	u32 count = 0, i;
1400 	const struct cs_section_def *sect = NULL;
1401 	const struct cs_extent_def *ext = NULL;
1402 
1403 	if (adev->gfx.rlc.cs_data == NULL)
1404 		return;
1405 	if (buffer == NULL)
1406 		return;
1407 
1408 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1409 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1410 
1411 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1412 	buffer[count++] = cpu_to_le32(0x80000000);
1413 	buffer[count++] = cpu_to_le32(0x80000000);
1414 
1415 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1416 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1417 			if (sect->id == SECT_CONTEXT) {
1418 				buffer[count++] =
1419 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1420 				buffer[count++] = cpu_to_le32(ext->reg_index -
1421 						PACKET3_SET_CONTEXT_REG_START);
1422 				for (i = 0; i < ext->reg_count; i++)
1423 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1424 			} else {
1425 				return;
1426 			}
1427 		}
1428 	}
1429 
1430 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1431 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1432 
1433 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1434 	buffer[count++] = cpu_to_le32(0);
1435 }
1436 
1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1438 {
1439 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1440 	uint32_t pg_always_on_cu_num = 2;
1441 	uint32_t always_on_cu_num;
1442 	uint32_t i, j, k;
1443 	uint32_t mask, cu_bitmap, counter;
1444 
1445 	if (adev->flags & AMD_IS_APU)
1446 		always_on_cu_num = 4;
1447 	else if (adev->asic_type == CHIP_VEGA12)
1448 		always_on_cu_num = 8;
1449 	else
1450 		always_on_cu_num = 12;
1451 
1452 	mutex_lock(&adev->grbm_idx_mutex);
1453 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1454 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1455 			mask = 1;
1456 			cu_bitmap = 0;
1457 			counter = 0;
1458 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1459 
1460 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1461 				if (cu_info->bitmap[i][j] & mask) {
1462 					if (counter == pg_always_on_cu_num)
1463 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1464 					if (counter < always_on_cu_num)
1465 						cu_bitmap |= mask;
1466 					else
1467 						break;
1468 					counter++;
1469 				}
1470 				mask <<= 1;
1471 			}
1472 
1473 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1474 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1475 		}
1476 	}
1477 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1478 	mutex_unlock(&adev->grbm_idx_mutex);
1479 }
1480 
1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1482 {
1483 	uint32_t data;
1484 
1485 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1486 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1487 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1488 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1489 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1490 
1491 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1492 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1493 
1494 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1495 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1496 
1497 	mutex_lock(&adev->grbm_idx_mutex);
1498 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1499 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1500 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1501 
1502 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1503 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1504 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1505 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1506 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1507 
1508 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1509 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1510 	data &= 0x0000FFFF;
1511 	data |= 0x00C00000;
1512 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1513 
1514 	/*
1515 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1516 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1517 	 */
1518 
1519 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1520 	 * but used for RLC_LB_CNTL configuration */
1521 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1522 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1523 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1524 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1525 	mutex_unlock(&adev->grbm_idx_mutex);
1526 
1527 	gfx_v9_0_init_always_on_cu_mask(adev);
1528 }
1529 
1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1531 {
1532 	uint32_t data;
1533 
1534 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1535 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1536 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1537 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1538 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1539 
1540 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1541 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1542 
1543 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1544 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1545 
1546 	mutex_lock(&adev->grbm_idx_mutex);
1547 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1548 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1550 
1551 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1552 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1553 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1554 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1555 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1556 
1557 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1558 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1559 	data &= 0x0000FFFF;
1560 	data |= 0x00C00000;
1561 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1562 
1563 	/*
1564 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1565 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1566 	 */
1567 
1568 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1569 	 * but used for RLC_LB_CNTL configuration */
1570 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1571 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1572 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1573 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1574 	mutex_unlock(&adev->grbm_idx_mutex);
1575 
1576 	gfx_v9_0_init_always_on_cu_mask(adev);
1577 }
1578 
1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1580 {
1581 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1582 }
1583 
1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1585 {
1586 	return 5;
1587 }
1588 
1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1590 {
1591 	const struct cs_section_def *cs_data;
1592 	int r;
1593 
1594 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1595 
1596 	cs_data = adev->gfx.rlc.cs_data;
1597 
1598 	if (cs_data) {
1599 		/* init clear state block */
1600 		r = amdgpu_gfx_rlc_init_csb(adev);
1601 		if (r)
1602 			return r;
1603 	}
1604 
1605 	if (adev->asic_type == CHIP_RAVEN) {
1606 		/* TODO: double check the cp_table_size for RV */
1607 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1608 		r = amdgpu_gfx_rlc_init_cpt(adev);
1609 		if (r)
1610 			return r;
1611 	}
1612 
1613 	switch (adev->asic_type) {
1614 	case CHIP_RAVEN:
1615 		gfx_v9_0_init_lbpw(adev);
1616 		break;
1617 	case CHIP_VEGA20:
1618 		gfx_v9_4_init_lbpw(adev);
1619 		break;
1620 	default:
1621 		break;
1622 	}
1623 
1624 	return 0;
1625 }
1626 
1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1628 {
1629 	int r;
1630 
1631 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1632 	if (unlikely(r != 0))
1633 		return r;
1634 
1635 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1636 			AMDGPU_GEM_DOMAIN_VRAM);
1637 	if (!r)
1638 		adev->gfx.rlc.clear_state_gpu_addr =
1639 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1640 
1641 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1642 
1643 	return r;
1644 }
1645 
1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1647 {
1648 	int r;
1649 
1650 	if (!adev->gfx.rlc.clear_state_obj)
1651 		return;
1652 
1653 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1654 	if (likely(r == 0)) {
1655 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1656 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1657 	}
1658 }
1659 
1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1661 {
1662 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1663 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1664 }
1665 
1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1667 {
1668 	int r;
1669 	u32 *hpd;
1670 	const __le32 *fw_data;
1671 	unsigned fw_size;
1672 	u32 *fw;
1673 	size_t mec_hpd_size;
1674 
1675 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1676 
1677 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1678 
1679 	/* take ownership of the relevant compute queues */
1680 	amdgpu_gfx_compute_queue_acquire(adev);
1681 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1682 
1683 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1684 				      AMDGPU_GEM_DOMAIN_VRAM,
1685 				      &adev->gfx.mec.hpd_eop_obj,
1686 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1687 				      (void **)&hpd);
1688 	if (r) {
1689 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1690 		gfx_v9_0_mec_fini(adev);
1691 		return r;
1692 	}
1693 
1694 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1695 
1696 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1697 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1698 
1699 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1700 
1701 	fw_data = (const __le32 *)
1702 		(adev->gfx.mec_fw->data +
1703 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1704 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1705 
1706 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1707 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1708 				      &adev->gfx.mec.mec_fw_obj,
1709 				      &adev->gfx.mec.mec_fw_gpu_addr,
1710 				      (void **)&fw);
1711 	if (r) {
1712 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1713 		gfx_v9_0_mec_fini(adev);
1714 		return r;
1715 	}
1716 
1717 	memcpy(fw, fw_data, fw_size);
1718 
1719 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1720 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1721 
1722 	return 0;
1723 }
1724 
1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1726 {
1727 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1728 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1729 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1730 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1731 		(SQ_IND_INDEX__FORCE_READ_MASK));
1732 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1733 }
1734 
1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1736 			   uint32_t wave, uint32_t thread,
1737 			   uint32_t regno, uint32_t num, uint32_t *out)
1738 {
1739 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1740 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1741 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1742 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1743 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1744 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1745 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1746 	while (num--)
1747 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1748 }
1749 
1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1751 {
1752 	/* type 1 wave data */
1753 	dst[(*no_fields)++] = 1;
1754 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1755 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1756 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1757 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1758 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1759 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1760 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1761 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1762 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1763 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1764 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1765 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1766 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1767 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1768 }
1769 
1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1771 				     uint32_t wave, uint32_t start,
1772 				     uint32_t size, uint32_t *dst)
1773 {
1774 	wave_read_regs(
1775 		adev, simd, wave, 0,
1776 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1777 }
1778 
1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1780 				     uint32_t wave, uint32_t thread,
1781 				     uint32_t start, uint32_t size,
1782 				     uint32_t *dst)
1783 {
1784 	wave_read_regs(
1785 		adev, simd, wave, thread,
1786 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1787 }
1788 
1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1790 				  u32 me, u32 pipe, u32 q, u32 vm)
1791 {
1792 	soc15_grbm_select(adev, me, pipe, q, vm);
1793 }
1794 
1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1796 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1797 	.select_se_sh = &gfx_v9_0_select_se_sh,
1798 	.read_wave_data = &gfx_v9_0_read_wave_data,
1799 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1800 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1801 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1802 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1803 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1804 };
1805 
1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1807 {
1808 	u32 gb_addr_config;
1809 	int err;
1810 
1811 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1812 
1813 	switch (adev->asic_type) {
1814 	case CHIP_VEGA10:
1815 		adev->gfx.config.max_hw_contexts = 8;
1816 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1820 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1821 		break;
1822 	case CHIP_VEGA12:
1823 		adev->gfx.config.max_hw_contexts = 8;
1824 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1825 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1826 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1827 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1828 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1829 		DRM_INFO("fix gfx.config for vega12\n");
1830 		break;
1831 	case CHIP_VEGA20:
1832 		adev->gfx.config.max_hw_contexts = 8;
1833 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1837 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1838 		gb_addr_config &= ~0xf3e777ff;
1839 		gb_addr_config |= 0x22014042;
1840 		/* check vbios table if gpu info is not available */
1841 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1842 		if (err)
1843 			return err;
1844 		break;
1845 	case CHIP_RAVEN:
1846 		adev->gfx.config.max_hw_contexts = 8;
1847 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851 		if (adev->rev_id >= 8)
1852 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1853 		else
1854 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1855 		break;
1856 	case CHIP_ARCTURUS:
1857 		adev->gfx.config.max_hw_contexts = 8;
1858 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1859 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1860 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1861 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1862 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1863 		gb_addr_config &= ~0xf3e777ff;
1864 		gb_addr_config |= 0x22014042;
1865 		break;
1866 	default:
1867 		BUG();
1868 		break;
1869 	}
1870 
1871 	adev->gfx.config.gb_addr_config = gb_addr_config;
1872 
1873 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1874 			REG_GET_FIELD(
1875 					adev->gfx.config.gb_addr_config,
1876 					GB_ADDR_CONFIG,
1877 					NUM_PIPES);
1878 
1879 	adev->gfx.config.max_tile_pipes =
1880 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1881 
1882 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1883 			REG_GET_FIELD(
1884 					adev->gfx.config.gb_addr_config,
1885 					GB_ADDR_CONFIG,
1886 					NUM_BANKS);
1887 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1888 			REG_GET_FIELD(
1889 					adev->gfx.config.gb_addr_config,
1890 					GB_ADDR_CONFIG,
1891 					MAX_COMPRESSED_FRAGS);
1892 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1893 			REG_GET_FIELD(
1894 					adev->gfx.config.gb_addr_config,
1895 					GB_ADDR_CONFIG,
1896 					NUM_RB_PER_SE);
1897 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1898 			REG_GET_FIELD(
1899 					adev->gfx.config.gb_addr_config,
1900 					GB_ADDR_CONFIG,
1901 					NUM_SHADER_ENGINES);
1902 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1903 			REG_GET_FIELD(
1904 					adev->gfx.config.gb_addr_config,
1905 					GB_ADDR_CONFIG,
1906 					PIPE_INTERLEAVE_SIZE));
1907 
1908 	return 0;
1909 }
1910 
1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1912 				   struct amdgpu_ngg_buf *ngg_buf,
1913 				   int size_se,
1914 				   int default_size_se)
1915 {
1916 	int r;
1917 
1918 	if (size_se < 0) {
1919 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1920 		return -EINVAL;
1921 	}
1922 	size_se = size_se ? size_se : default_size_se;
1923 
1924 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1925 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1926 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1927 				    &ngg_buf->bo,
1928 				    &ngg_buf->gpu_addr,
1929 				    NULL);
1930 	if (r) {
1931 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1932 		return r;
1933 	}
1934 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1935 
1936 	return r;
1937 }
1938 
1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1940 {
1941 	int i;
1942 
1943 	for (i = 0; i < NGG_BUF_MAX; i++)
1944 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1945 				      &adev->gfx.ngg.buf[i].gpu_addr,
1946 				      NULL);
1947 
1948 	memset(&adev->gfx.ngg.buf[0], 0,
1949 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1950 
1951 	adev->gfx.ngg.init = false;
1952 
1953 	return 0;
1954 }
1955 
1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1957 {
1958 	int r;
1959 
1960 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1961 		return 0;
1962 
1963 	/* GDS reserve memory: 64 bytes alignment */
1964 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1965 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1966 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1967 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1968 
1969 	/* Primitive Buffer */
1970 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1971 				    amdgpu_prim_buf_per_se,
1972 				    64 * 1024);
1973 	if (r) {
1974 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1975 		goto err;
1976 	}
1977 
1978 	/* Position Buffer */
1979 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1980 				    amdgpu_pos_buf_per_se,
1981 				    256 * 1024);
1982 	if (r) {
1983 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1984 		goto err;
1985 	}
1986 
1987 	/* Control Sideband */
1988 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1989 				    amdgpu_cntl_sb_buf_per_se,
1990 				    256);
1991 	if (r) {
1992 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1993 		goto err;
1994 	}
1995 
1996 	/* Parameter Cache, not created by default */
1997 	if (amdgpu_param_buf_per_se <= 0)
1998 		goto out;
1999 
2000 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2001 				    amdgpu_param_buf_per_se,
2002 				    512 * 1024);
2003 	if (r) {
2004 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2005 		goto err;
2006 	}
2007 
2008 out:
2009 	adev->gfx.ngg.init = true;
2010 	return 0;
2011 err:
2012 	gfx_v9_0_ngg_fini(adev);
2013 	return r;
2014 }
2015 
2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2017 {
2018 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2019 	int r;
2020 	u32 data, base;
2021 
2022 	if (!amdgpu_ngg)
2023 		return 0;
2024 
2025 	/* Program buffer size */
2026 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2027 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2028 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2029 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2030 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2031 
2032 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2033 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2034 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2035 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2036 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2037 
2038 	/* Program buffer base address */
2039 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2040 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2041 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2042 
2043 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2044 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2045 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2046 
2047 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2048 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2049 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2050 
2051 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2052 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2053 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2054 
2055 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2056 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2057 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2058 
2059 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2060 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2061 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2062 
2063 	/* Clear GDS reserved memory */
2064 	r = amdgpu_ring_alloc(ring, 17);
2065 	if (r) {
2066 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2067 			  ring->name, r);
2068 		return r;
2069 	}
2070 
2071 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2072 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2073 			           (adev->gds.gds_size +
2074 				    adev->gfx.ngg.gds_reserve_size));
2075 
2076 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2077 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2078 				PACKET3_DMA_DATA_DST_SEL(1) |
2079 				PACKET3_DMA_DATA_SRC_SEL(2)));
2080 	amdgpu_ring_write(ring, 0);
2081 	amdgpu_ring_write(ring, 0);
2082 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2083 	amdgpu_ring_write(ring, 0);
2084 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2085 				adev->gfx.ngg.gds_reserve_size);
2086 
2087 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2088 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2089 
2090 	amdgpu_ring_commit(ring);
2091 
2092 	return 0;
2093 }
2094 
2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2096 				      int mec, int pipe, int queue)
2097 {
2098 	int r;
2099 	unsigned irq_type;
2100 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2101 
2102 	ring = &adev->gfx.compute_ring[ring_id];
2103 
2104 	/* mec0 is me1 */
2105 	ring->me = mec + 1;
2106 	ring->pipe = pipe;
2107 	ring->queue = queue;
2108 
2109 	ring->ring_obj = NULL;
2110 	ring->use_doorbell = true;
2111 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2112 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2113 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2114 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115 
2116 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118 		+ ring->pipe;
2119 
2120 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2121 	r = amdgpu_ring_init(adev, ring, 1024,
2122 			     &adev->gfx.eop_irq, irq_type);
2123 	if (r)
2124 		return r;
2125 
2126 
2127 	return 0;
2128 }
2129 
2130 static int gfx_v9_0_sw_init(void *handle)
2131 {
2132 	int i, j, k, r, ring_id;
2133 	struct amdgpu_ring *ring;
2134 	struct amdgpu_kiq *kiq;
2135 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2136 
2137 	switch (adev->asic_type) {
2138 	case CHIP_VEGA10:
2139 	case CHIP_VEGA12:
2140 	case CHIP_VEGA20:
2141 	case CHIP_RAVEN:
2142 	case CHIP_ARCTURUS:
2143 		adev->gfx.mec.num_mec = 2;
2144 		break;
2145 	default:
2146 		adev->gfx.mec.num_mec = 1;
2147 		break;
2148 	}
2149 
2150 	adev->gfx.mec.num_pipe_per_mec = 4;
2151 	adev->gfx.mec.num_queue_per_pipe = 8;
2152 
2153 	/* EOP Event */
2154 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2155 	if (r)
2156 		return r;
2157 
2158 	/* Privileged reg */
2159 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2160 			      &adev->gfx.priv_reg_irq);
2161 	if (r)
2162 		return r;
2163 
2164 	/* Privileged inst */
2165 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2166 			      &adev->gfx.priv_inst_irq);
2167 	if (r)
2168 		return r;
2169 
2170 	/* ECC error */
2171 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2172 			      &adev->gfx.cp_ecc_error_irq);
2173 	if (r)
2174 		return r;
2175 
2176 	/* FUE error */
2177 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2178 			      &adev->gfx.cp_ecc_error_irq);
2179 	if (r)
2180 		return r;
2181 
2182 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2183 
2184 	gfx_v9_0_scratch_init(adev);
2185 
2186 	r = gfx_v9_0_init_microcode(adev);
2187 	if (r) {
2188 		DRM_ERROR("Failed to load gfx firmware!\n");
2189 		return r;
2190 	}
2191 
2192 	r = adev->gfx.rlc.funcs->init(adev);
2193 	if (r) {
2194 		DRM_ERROR("Failed to init rlc BOs!\n");
2195 		return r;
2196 	}
2197 
2198 	r = gfx_v9_0_mec_init(adev);
2199 	if (r) {
2200 		DRM_ERROR("Failed to init MEC BOs!\n");
2201 		return r;
2202 	}
2203 
2204 	/* set up the gfx ring */
2205 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2206 		ring = &adev->gfx.gfx_ring[i];
2207 		ring->ring_obj = NULL;
2208 		if (!i)
2209 			sprintf(ring->name, "gfx");
2210 		else
2211 			sprintf(ring->name, "gfx_%d", i);
2212 		ring->use_doorbell = true;
2213 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2214 		r = amdgpu_ring_init(adev, ring, 1024,
2215 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2216 		if (r)
2217 			return r;
2218 	}
2219 
2220 	/* set up the compute queues - allocate horizontally across pipes */
2221 	ring_id = 0;
2222 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2223 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2224 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2225 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2226 					continue;
2227 
2228 				r = gfx_v9_0_compute_ring_init(adev,
2229 							       ring_id,
2230 							       i, k, j);
2231 				if (r)
2232 					return r;
2233 
2234 				ring_id++;
2235 			}
2236 		}
2237 	}
2238 
2239 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2240 	if (r) {
2241 		DRM_ERROR("Failed to init KIQ BOs!\n");
2242 		return r;
2243 	}
2244 
2245 	kiq = &adev->gfx.kiq;
2246 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2247 	if (r)
2248 		return r;
2249 
2250 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2251 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2252 	if (r)
2253 		return r;
2254 
2255 	adev->gfx.ce_ram_size = 0x8000;
2256 
2257 	r = gfx_v9_0_gpu_early_init(adev);
2258 	if (r)
2259 		return r;
2260 
2261 	r = gfx_v9_0_ngg_init(adev);
2262 	if (r)
2263 		return r;
2264 
2265 	return 0;
2266 }
2267 
2268 
2269 static int gfx_v9_0_sw_fini(void *handle)
2270 {
2271 	int i;
2272 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2273 
2274 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2275 			adev->gfx.ras_if) {
2276 		struct ras_common_if *ras_if = adev->gfx.ras_if;
2277 		struct ras_ih_if ih_info = {
2278 			.head = *ras_if,
2279 		};
2280 
2281 		amdgpu_ras_debugfs_remove(adev, ras_if);
2282 		amdgpu_ras_sysfs_remove(adev, ras_if);
2283 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2284 		amdgpu_ras_feature_enable(adev, ras_if, 0);
2285 		kfree(ras_if);
2286 	}
2287 
2288 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2289 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2290 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2291 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2292 
2293 	amdgpu_gfx_mqd_sw_fini(adev);
2294 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2295 	amdgpu_gfx_kiq_fini(adev);
2296 
2297 	gfx_v9_0_mec_fini(adev);
2298 	gfx_v9_0_ngg_fini(adev);
2299 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2300 	if (adev->asic_type == CHIP_RAVEN) {
2301 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2302 				&adev->gfx.rlc.cp_table_gpu_addr,
2303 				(void **)&adev->gfx.rlc.cp_table_ptr);
2304 	}
2305 	gfx_v9_0_free_microcode(adev);
2306 
2307 	return 0;
2308 }
2309 
2310 
2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2312 {
2313 	/* TODO */
2314 }
2315 
2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2317 {
2318 	u32 data;
2319 
2320 	if (instance == 0xffffffff)
2321 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2322 	else
2323 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2324 
2325 	if (se_num == 0xffffffff)
2326 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2327 	else
2328 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2329 
2330 	if (sh_num == 0xffffffff)
2331 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2332 	else
2333 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2334 
2335 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2336 }
2337 
2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2339 {
2340 	u32 data, mask;
2341 
2342 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2343 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2344 
2345 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2346 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2347 
2348 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2349 					 adev->gfx.config.max_sh_per_se);
2350 
2351 	return (~data) & mask;
2352 }
2353 
2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2355 {
2356 	int i, j;
2357 	u32 data;
2358 	u32 active_rbs = 0;
2359 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2360 					adev->gfx.config.max_sh_per_se;
2361 
2362 	mutex_lock(&adev->grbm_idx_mutex);
2363 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2364 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2365 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2366 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2367 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2368 					       rb_bitmap_width_per_sh);
2369 		}
2370 	}
2371 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2372 	mutex_unlock(&adev->grbm_idx_mutex);
2373 
2374 	adev->gfx.config.backend_enable_mask = active_rbs;
2375 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2376 }
2377 
2378 #define DEFAULT_SH_MEM_BASES	(0x6000)
2379 #define FIRST_COMPUTE_VMID	(8)
2380 #define LAST_COMPUTE_VMID	(16)
2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2382 {
2383 	int i;
2384 	uint32_t sh_mem_config;
2385 	uint32_t sh_mem_bases;
2386 
2387 	/*
2388 	 * Configure apertures:
2389 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2390 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2391 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2392 	 */
2393 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2394 
2395 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2396 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2397 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2398 
2399 	mutex_lock(&adev->srbm_mutex);
2400 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2401 		soc15_grbm_select(adev, 0, 0, 0, i);
2402 		/* CP and shaders */
2403 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2404 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2405 	}
2406 	soc15_grbm_select(adev, 0, 0, 0, 0);
2407 	mutex_unlock(&adev->srbm_mutex);
2408 
2409 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2410 	   acccess. These should be enabled by FW for target VMIDs. */
2411 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2412 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2413 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2414 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2415 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2416 	}
2417 }
2418 
2419 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2420 {
2421 	int vmid;
2422 
2423 	/*
2424 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2425 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2426 	 * the driver can enable them for graphics. VMID0 should maintain
2427 	 * access so that HWS firmware can save/restore entries.
2428 	 */
2429 	for (vmid = 1; vmid < 16; vmid++) {
2430 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2431 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2432 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2433 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2434 	}
2435 }
2436 
2437 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2438 {
2439 	u32 tmp;
2440 	int i;
2441 
2442 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2443 
2444 	gfx_v9_0_tiling_mode_table_init(adev);
2445 
2446 	gfx_v9_0_setup_rb(adev);
2447 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2448 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2449 
2450 	/* XXX SH_MEM regs */
2451 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2452 	mutex_lock(&adev->srbm_mutex);
2453 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2454 		soc15_grbm_select(adev, 0, 0, 0, i);
2455 		/* CP and shaders */
2456 		if (i == 0) {
2457 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2458 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2459 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2460 					    !!amdgpu_noretry);
2461 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2462 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2463 		} else {
2464 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2465 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2466 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2467 					    !!amdgpu_noretry);
2468 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2469 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2470 				(adev->gmc.private_aperture_start >> 48));
2471 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2472 				(adev->gmc.shared_aperture_start >> 48));
2473 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2474 		}
2475 	}
2476 	soc15_grbm_select(adev, 0, 0, 0, 0);
2477 
2478 	mutex_unlock(&adev->srbm_mutex);
2479 
2480 	gfx_v9_0_init_compute_vmid(adev);
2481 	gfx_v9_0_init_gds_vmid(adev);
2482 }
2483 
2484 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2485 {
2486 	u32 i, j, k;
2487 	u32 mask;
2488 
2489 	mutex_lock(&adev->grbm_idx_mutex);
2490 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2491 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2492 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2493 			for (k = 0; k < adev->usec_timeout; k++) {
2494 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2495 					break;
2496 				udelay(1);
2497 			}
2498 			if (k == adev->usec_timeout) {
2499 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2500 						      0xffffffff, 0xffffffff);
2501 				mutex_unlock(&adev->grbm_idx_mutex);
2502 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2503 					 i, j);
2504 				return;
2505 			}
2506 		}
2507 	}
2508 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2509 	mutex_unlock(&adev->grbm_idx_mutex);
2510 
2511 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2512 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2513 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2514 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2515 	for (k = 0; k < adev->usec_timeout; k++) {
2516 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2517 			break;
2518 		udelay(1);
2519 	}
2520 }
2521 
2522 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2523 					       bool enable)
2524 {
2525 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2526 
2527 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2528 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2529 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2530 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2531 
2532 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2533 }
2534 
2535 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2536 {
2537 	/* csib */
2538 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2539 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2540 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2541 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2542 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2543 			adev->gfx.rlc.clear_state_size);
2544 }
2545 
2546 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2547 				int indirect_offset,
2548 				int list_size,
2549 				int *unique_indirect_regs,
2550 				int unique_indirect_reg_count,
2551 				int *indirect_start_offsets,
2552 				int *indirect_start_offsets_count,
2553 				int max_start_offsets_count)
2554 {
2555 	int idx;
2556 
2557 	for (; indirect_offset < list_size; indirect_offset++) {
2558 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2559 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2560 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2561 
2562 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2563 			indirect_offset += 2;
2564 
2565 			/* look for the matching indice */
2566 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2567 				if (unique_indirect_regs[idx] ==
2568 					register_list_format[indirect_offset] ||
2569 					!unique_indirect_regs[idx])
2570 					break;
2571 			}
2572 
2573 			BUG_ON(idx >= unique_indirect_reg_count);
2574 
2575 			if (!unique_indirect_regs[idx])
2576 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2577 
2578 			indirect_offset++;
2579 		}
2580 	}
2581 }
2582 
2583 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2584 {
2585 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2586 	int unique_indirect_reg_count = 0;
2587 
2588 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2589 	int indirect_start_offsets_count = 0;
2590 
2591 	int list_size = 0;
2592 	int i = 0, j = 0;
2593 	u32 tmp = 0;
2594 
2595 	u32 *register_list_format =
2596 		kmemdup(adev->gfx.rlc.register_list_format,
2597 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2598 	if (!register_list_format)
2599 		return -ENOMEM;
2600 
2601 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2602 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2603 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2604 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2605 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2606 				    unique_indirect_regs,
2607 				    unique_indirect_reg_count,
2608 				    indirect_start_offsets,
2609 				    &indirect_start_offsets_count,
2610 				    ARRAY_SIZE(indirect_start_offsets));
2611 
2612 	/* enable auto inc in case it is disabled */
2613 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2614 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2615 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2616 
2617 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2618 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2619 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2620 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2621 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2622 			adev->gfx.rlc.register_restore[i]);
2623 
2624 	/* load indirect register */
2625 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2626 		adev->gfx.rlc.reg_list_format_start);
2627 
2628 	/* direct register portion */
2629 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2630 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2631 			register_list_format[i]);
2632 
2633 	/* indirect register portion */
2634 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2635 		if (register_list_format[i] == 0xFFFFFFFF) {
2636 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2637 			continue;
2638 		}
2639 
2640 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2641 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2642 
2643 		for (j = 0; j < unique_indirect_reg_count; j++) {
2644 			if (register_list_format[i] == unique_indirect_regs[j]) {
2645 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2646 				break;
2647 			}
2648 		}
2649 
2650 		BUG_ON(j >= unique_indirect_reg_count);
2651 
2652 		i++;
2653 	}
2654 
2655 	/* set save/restore list size */
2656 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2657 	list_size = list_size >> 1;
2658 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2659 		adev->gfx.rlc.reg_restore_list_size);
2660 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2661 
2662 	/* write the starting offsets to RLC scratch ram */
2663 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2664 		adev->gfx.rlc.starting_offsets_start);
2665 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2666 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2667 		       indirect_start_offsets[i]);
2668 
2669 	/* load unique indirect regs*/
2670 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2671 		if (unique_indirect_regs[i] != 0) {
2672 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2673 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2674 			       unique_indirect_regs[i] & 0x3FFFF);
2675 
2676 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2677 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2678 			       unique_indirect_regs[i] >> 20);
2679 		}
2680 	}
2681 
2682 	kfree(register_list_format);
2683 	return 0;
2684 }
2685 
2686 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2687 {
2688 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2689 }
2690 
2691 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2692 					     bool enable)
2693 {
2694 	uint32_t data = 0;
2695 	uint32_t default_data = 0;
2696 
2697 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2698 	if (enable == true) {
2699 		/* enable GFXIP control over CGPG */
2700 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2701 		if(default_data != data)
2702 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2703 
2704 		/* update status */
2705 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2706 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2707 		if(default_data != data)
2708 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2709 	} else {
2710 		/* restore GFXIP control over GCPG */
2711 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2712 		if(default_data != data)
2713 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2714 	}
2715 }
2716 
2717 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2718 {
2719 	uint32_t data = 0;
2720 
2721 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2722 			      AMD_PG_SUPPORT_GFX_SMG |
2723 			      AMD_PG_SUPPORT_GFX_DMG)) {
2724 		/* init IDLE_POLL_COUNT = 60 */
2725 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2726 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2727 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2728 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2729 
2730 		/* init RLC PG Delay */
2731 		data = 0;
2732 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2733 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2734 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2735 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2736 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2737 
2738 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2739 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2740 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2741 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2742 
2743 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2744 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2745 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2746 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2747 
2748 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2749 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2750 
2751 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2752 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2753 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2754 
2755 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2756 	}
2757 }
2758 
2759 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2760 						bool enable)
2761 {
2762 	uint32_t data = 0;
2763 	uint32_t default_data = 0;
2764 
2765 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2766 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2767 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2768 			     enable ? 1 : 0);
2769 	if (default_data != data)
2770 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2771 }
2772 
2773 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2774 						bool enable)
2775 {
2776 	uint32_t data = 0;
2777 	uint32_t default_data = 0;
2778 
2779 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2780 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2781 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2782 			     enable ? 1 : 0);
2783 	if(default_data != data)
2784 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2785 }
2786 
2787 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2788 					bool enable)
2789 {
2790 	uint32_t data = 0;
2791 	uint32_t default_data = 0;
2792 
2793 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795 			     CP_PG_DISABLE,
2796 			     enable ? 0 : 1);
2797 	if(default_data != data)
2798 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2799 }
2800 
2801 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2802 						bool enable)
2803 {
2804 	uint32_t data, default_data;
2805 
2806 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808 			     GFX_POWER_GATING_ENABLE,
2809 			     enable ? 1 : 0);
2810 	if(default_data != data)
2811 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2812 }
2813 
2814 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2815 						bool enable)
2816 {
2817 	uint32_t data, default_data;
2818 
2819 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2820 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2821 			     GFX_PIPELINE_PG_ENABLE,
2822 			     enable ? 1 : 0);
2823 	if(default_data != data)
2824 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2825 
2826 	if (!enable)
2827 		/* read any GFX register to wake up GFX */
2828 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2829 }
2830 
2831 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2832 						       bool enable)
2833 {
2834 	uint32_t data, default_data;
2835 
2836 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2837 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2838 			     STATIC_PER_CU_PG_ENABLE,
2839 			     enable ? 1 : 0);
2840 	if(default_data != data)
2841 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2842 }
2843 
2844 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2845 						bool enable)
2846 {
2847 	uint32_t data, default_data;
2848 
2849 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2850 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2851 			     DYN_PER_CU_PG_ENABLE,
2852 			     enable ? 1 : 0);
2853 	if(default_data != data)
2854 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2855 }
2856 
2857 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2858 {
2859 	gfx_v9_0_init_csb(adev);
2860 
2861 	/*
2862 	 * Rlc save restore list is workable since v2_1.
2863 	 * And it's needed by gfxoff feature.
2864 	 */
2865 	if (adev->gfx.rlc.is_rlc_v2_1) {
2866 		gfx_v9_1_init_rlc_save_restore_list(adev);
2867 		gfx_v9_0_enable_save_restore_machine(adev);
2868 	}
2869 
2870 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2871 			      AMD_PG_SUPPORT_GFX_SMG |
2872 			      AMD_PG_SUPPORT_GFX_DMG |
2873 			      AMD_PG_SUPPORT_CP |
2874 			      AMD_PG_SUPPORT_GDS |
2875 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2876 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2877 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2878 		gfx_v9_0_init_gfx_power_gating(adev);
2879 	}
2880 }
2881 
2882 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2883 {
2884 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2885 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2886 	gfx_v9_0_wait_for_rlc_serdes(adev);
2887 }
2888 
2889 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2890 {
2891 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2892 	udelay(50);
2893 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2894 	udelay(50);
2895 }
2896 
2897 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2898 {
2899 #ifdef AMDGPU_RLC_DEBUG_RETRY
2900 	u32 rlc_ucode_ver;
2901 #endif
2902 
2903 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2904 	udelay(50);
2905 
2906 	/* carrizo do enable cp interrupt after cp inited */
2907 	if (!(adev->flags & AMD_IS_APU)) {
2908 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2909 		udelay(50);
2910 	}
2911 
2912 #ifdef AMDGPU_RLC_DEBUG_RETRY
2913 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2914 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2915 	if(rlc_ucode_ver == 0x108) {
2916 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2917 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2918 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2919 		 * default is 0x9C4 to create a 100us interval */
2920 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2921 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2922 		 * to disable the page fault retry interrupts, default is
2923 		 * 0x100 (256) */
2924 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2925 	}
2926 #endif
2927 }
2928 
2929 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2930 {
2931 	const struct rlc_firmware_header_v2_0 *hdr;
2932 	const __le32 *fw_data;
2933 	unsigned i, fw_size;
2934 
2935 	if (!adev->gfx.rlc_fw)
2936 		return -EINVAL;
2937 
2938 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2939 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2940 
2941 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2942 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2943 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2944 
2945 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2946 			RLCG_UCODE_LOADING_START_ADDRESS);
2947 	for (i = 0; i < fw_size; i++)
2948 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2949 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2950 
2951 	return 0;
2952 }
2953 
2954 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2955 {
2956 	int r;
2957 
2958 	if (amdgpu_sriov_vf(adev)) {
2959 		gfx_v9_0_init_csb(adev);
2960 		return 0;
2961 	}
2962 
2963 	adev->gfx.rlc.funcs->stop(adev);
2964 
2965 	/* disable CG */
2966 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2967 
2968 	gfx_v9_0_init_pg(adev);
2969 
2970 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2971 		/* legacy rlc firmware loading */
2972 		r = gfx_v9_0_rlc_load_microcode(adev);
2973 		if (r)
2974 			return r;
2975 	}
2976 
2977 	switch (adev->asic_type) {
2978 	case CHIP_RAVEN:
2979 		if (amdgpu_lbpw == 0)
2980 			gfx_v9_0_enable_lbpw(adev, false);
2981 		else
2982 			gfx_v9_0_enable_lbpw(adev, true);
2983 		break;
2984 	case CHIP_VEGA20:
2985 		if (amdgpu_lbpw > 0)
2986 			gfx_v9_0_enable_lbpw(adev, true);
2987 		else
2988 			gfx_v9_0_enable_lbpw(adev, false);
2989 		break;
2990 	default:
2991 		break;
2992 	}
2993 
2994 	adev->gfx.rlc.funcs->start(adev);
2995 
2996 	return 0;
2997 }
2998 
2999 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3000 {
3001 	int i;
3002 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3003 
3004 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3005 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3006 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3007 	if (!enable) {
3008 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3009 			adev->gfx.gfx_ring[i].sched.ready = false;
3010 	}
3011 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3012 	udelay(50);
3013 }
3014 
3015 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3016 {
3017 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3018 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3019 	const struct gfx_firmware_header_v1_0 *me_hdr;
3020 	const __le32 *fw_data;
3021 	unsigned i, fw_size;
3022 
3023 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3024 		return -EINVAL;
3025 
3026 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3027 		adev->gfx.pfp_fw->data;
3028 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3029 		adev->gfx.ce_fw->data;
3030 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3031 		adev->gfx.me_fw->data;
3032 
3033 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3034 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3035 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3036 
3037 	gfx_v9_0_cp_gfx_enable(adev, false);
3038 
3039 	/* PFP */
3040 	fw_data = (const __le32 *)
3041 		(adev->gfx.pfp_fw->data +
3042 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3043 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3044 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3045 	for (i = 0; i < fw_size; i++)
3046 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3047 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3048 
3049 	/* CE */
3050 	fw_data = (const __le32 *)
3051 		(adev->gfx.ce_fw->data +
3052 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3053 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3054 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3055 	for (i = 0; i < fw_size; i++)
3056 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3057 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3058 
3059 	/* ME */
3060 	fw_data = (const __le32 *)
3061 		(adev->gfx.me_fw->data +
3062 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3063 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3064 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3065 	for (i = 0; i < fw_size; i++)
3066 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3067 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3068 
3069 	return 0;
3070 }
3071 
3072 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3073 {
3074 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3075 	const struct cs_section_def *sect = NULL;
3076 	const struct cs_extent_def *ext = NULL;
3077 	int r, i, tmp;
3078 
3079 	/* init the CP */
3080 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3081 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3082 
3083 	gfx_v9_0_cp_gfx_enable(adev, true);
3084 
3085 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3086 	if (r) {
3087 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3088 		return r;
3089 	}
3090 
3091 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3092 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3093 
3094 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3095 	amdgpu_ring_write(ring, 0x80000000);
3096 	amdgpu_ring_write(ring, 0x80000000);
3097 
3098 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3099 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3100 			if (sect->id == SECT_CONTEXT) {
3101 				amdgpu_ring_write(ring,
3102 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3103 					       ext->reg_count));
3104 				amdgpu_ring_write(ring,
3105 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3106 				for (i = 0; i < ext->reg_count; i++)
3107 					amdgpu_ring_write(ring, ext->extent[i]);
3108 			}
3109 		}
3110 	}
3111 
3112 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3113 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3114 
3115 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3116 	amdgpu_ring_write(ring, 0);
3117 
3118 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3119 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3120 	amdgpu_ring_write(ring, 0x8000);
3121 	amdgpu_ring_write(ring, 0x8000);
3122 
3123 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3124 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3125 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3126 	amdgpu_ring_write(ring, tmp);
3127 	amdgpu_ring_write(ring, 0);
3128 
3129 	amdgpu_ring_commit(ring);
3130 
3131 	return 0;
3132 }
3133 
3134 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3135 {
3136 	struct amdgpu_ring *ring;
3137 	u32 tmp;
3138 	u32 rb_bufsz;
3139 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3140 
3141 	/* Set the write pointer delay */
3142 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3143 
3144 	/* set the RB to use vmid 0 */
3145 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3146 
3147 	/* Set ring buffer size */
3148 	ring = &adev->gfx.gfx_ring[0];
3149 	rb_bufsz = order_base_2(ring->ring_size / 8);
3150 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3151 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3152 #ifdef __BIG_ENDIAN
3153 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3154 #endif
3155 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3156 
3157 	/* Initialize the ring buffer's write pointers */
3158 	ring->wptr = 0;
3159 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3160 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3161 
3162 	/* set the wb address wether it's enabled or not */
3163 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3164 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3165 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3166 
3167 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3168 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3169 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3170 
3171 	mdelay(1);
3172 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3173 
3174 	rb_addr = ring->gpu_addr >> 8;
3175 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3176 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3177 
3178 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3179 	if (ring->use_doorbell) {
3180 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3181 				    DOORBELL_OFFSET, ring->doorbell_index);
3182 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3183 				    DOORBELL_EN, 1);
3184 	} else {
3185 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3186 	}
3187 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3188 
3189 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3190 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3191 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3192 
3193 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3194 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3195 
3196 
3197 	/* start the ring */
3198 	gfx_v9_0_cp_gfx_start(adev);
3199 	ring->sched.ready = true;
3200 
3201 	return 0;
3202 }
3203 
3204 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3205 {
3206 	int i;
3207 
3208 	if (enable) {
3209 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3210 	} else {
3211 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3212 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3213 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3214 			adev->gfx.compute_ring[i].sched.ready = false;
3215 		adev->gfx.kiq.ring.sched.ready = false;
3216 	}
3217 	udelay(50);
3218 }
3219 
3220 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3221 {
3222 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3223 	const __le32 *fw_data;
3224 	unsigned i;
3225 	u32 tmp;
3226 
3227 	if (!adev->gfx.mec_fw)
3228 		return -EINVAL;
3229 
3230 	gfx_v9_0_cp_compute_enable(adev, false);
3231 
3232 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3233 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3234 
3235 	fw_data = (const __le32 *)
3236 		(adev->gfx.mec_fw->data +
3237 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3238 	tmp = 0;
3239 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3240 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3241 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3242 
3243 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3244 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3245 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3246 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3247 
3248 	/* MEC1 */
3249 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3250 			 mec_hdr->jt_offset);
3251 	for (i = 0; i < mec_hdr->jt_size; i++)
3252 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3253 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3254 
3255 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3256 			adev->gfx.mec_fw_version);
3257 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3258 
3259 	return 0;
3260 }
3261 
3262 /* KIQ functions */
3263 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3264 {
3265 	uint32_t tmp;
3266 	struct amdgpu_device *adev = ring->adev;
3267 
3268 	/* tell RLC which is KIQ queue */
3269 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3270 	tmp &= 0xffffff00;
3271 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3272 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3273 	tmp |= 0x80;
3274 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3275 }
3276 
3277 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3278 {
3279 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3280 	uint64_t queue_mask = 0;
3281 	int r, i;
3282 
3283 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3284 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3285 			continue;
3286 
3287 		/* This situation may be hit in the future if a new HW
3288 		 * generation exposes more than 64 queues. If so, the
3289 		 * definition of queue_mask needs updating */
3290 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3291 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3292 			break;
3293 		}
3294 
3295 		queue_mask |= (1ull << i);
3296 	}
3297 
3298 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3299 	if (r) {
3300 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3301 		return r;
3302 	}
3303 
3304 	/* set resources */
3305 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3306 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3307 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3308 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3309 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3310 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3311 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3312 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3313 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3314 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3315 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3316 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3317 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3318 
3319 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3320 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3321 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3322 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3323 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3324 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3325 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3326 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3327 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3328 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3329 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3330 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3331 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3332 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3333 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3334 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3335 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3336 	}
3337 
3338 	r = amdgpu_ring_test_helper(kiq_ring);
3339 	if (r)
3340 		DRM_ERROR("KCQ enable failed\n");
3341 
3342 	return r;
3343 }
3344 
3345 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3346 {
3347 	struct amdgpu_device *adev = ring->adev;
3348 	struct v9_mqd *mqd = ring->mqd_ptr;
3349 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3350 	uint32_t tmp;
3351 
3352 	mqd->header = 0xC0310800;
3353 	mqd->compute_pipelinestat_enable = 0x00000001;
3354 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3355 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3356 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3357 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3358 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3359 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3360 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3361 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3362 	mqd->compute_misc_reserved = 0x00000003;
3363 
3364 	mqd->dynamic_cu_mask_addr_lo =
3365 		lower_32_bits(ring->mqd_gpu_addr
3366 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3367 	mqd->dynamic_cu_mask_addr_hi =
3368 		upper_32_bits(ring->mqd_gpu_addr
3369 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3370 
3371 	eop_base_addr = ring->eop_gpu_addr >> 8;
3372 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3373 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3374 
3375 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3376 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3377 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3378 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3379 
3380 	mqd->cp_hqd_eop_control = tmp;
3381 
3382 	/* enable doorbell? */
3383 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3384 
3385 	if (ring->use_doorbell) {
3386 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3387 				    DOORBELL_OFFSET, ring->doorbell_index);
3388 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3389 				    DOORBELL_EN, 1);
3390 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3391 				    DOORBELL_SOURCE, 0);
3392 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3393 				    DOORBELL_HIT, 0);
3394 	} else {
3395 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3396 					 DOORBELL_EN, 0);
3397 	}
3398 
3399 	mqd->cp_hqd_pq_doorbell_control = tmp;
3400 
3401 	/* disable the queue if it's active */
3402 	ring->wptr = 0;
3403 	mqd->cp_hqd_dequeue_request = 0;
3404 	mqd->cp_hqd_pq_rptr = 0;
3405 	mqd->cp_hqd_pq_wptr_lo = 0;
3406 	mqd->cp_hqd_pq_wptr_hi = 0;
3407 
3408 	/* set the pointer to the MQD */
3409 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3410 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3411 
3412 	/* set MQD vmid to 0 */
3413 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3414 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3415 	mqd->cp_mqd_control = tmp;
3416 
3417 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3418 	hqd_gpu_addr = ring->gpu_addr >> 8;
3419 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3420 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3421 
3422 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3423 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3424 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3425 			    (order_base_2(ring->ring_size / 4) - 1));
3426 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3427 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3428 #ifdef __BIG_ENDIAN
3429 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3430 #endif
3431 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3432 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3433 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3434 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3435 	mqd->cp_hqd_pq_control = tmp;
3436 
3437 	/* set the wb address whether it's enabled or not */
3438 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3439 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3440 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3441 		upper_32_bits(wb_gpu_addr) & 0xffff;
3442 
3443 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3444 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3445 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3446 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3447 
3448 	tmp = 0;
3449 	/* enable the doorbell if requested */
3450 	if (ring->use_doorbell) {
3451 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3452 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453 				DOORBELL_OFFSET, ring->doorbell_index);
3454 
3455 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3456 					 DOORBELL_EN, 1);
3457 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458 					 DOORBELL_SOURCE, 0);
3459 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3460 					 DOORBELL_HIT, 0);
3461 	}
3462 
3463 	mqd->cp_hqd_pq_doorbell_control = tmp;
3464 
3465 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3466 	ring->wptr = 0;
3467 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3468 
3469 	/* set the vmid for the queue */
3470 	mqd->cp_hqd_vmid = 0;
3471 
3472 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3473 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3474 	mqd->cp_hqd_persistent_state = tmp;
3475 
3476 	/* set MIN_IB_AVAIL_SIZE */
3477 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3478 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3479 	mqd->cp_hqd_ib_control = tmp;
3480 
3481 	/* activate the queue */
3482 	mqd->cp_hqd_active = 1;
3483 
3484 	return 0;
3485 }
3486 
3487 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3488 {
3489 	struct amdgpu_device *adev = ring->adev;
3490 	struct v9_mqd *mqd = ring->mqd_ptr;
3491 	int j;
3492 
3493 	/* disable wptr polling */
3494 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3495 
3496 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3497 	       mqd->cp_hqd_eop_base_addr_lo);
3498 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3499 	       mqd->cp_hqd_eop_base_addr_hi);
3500 
3501 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3502 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3503 	       mqd->cp_hqd_eop_control);
3504 
3505 	/* enable doorbell? */
3506 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3507 	       mqd->cp_hqd_pq_doorbell_control);
3508 
3509 	/* disable the queue if it's active */
3510 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3511 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3512 		for (j = 0; j < adev->usec_timeout; j++) {
3513 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3514 				break;
3515 			udelay(1);
3516 		}
3517 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3518 		       mqd->cp_hqd_dequeue_request);
3519 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3520 		       mqd->cp_hqd_pq_rptr);
3521 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3522 		       mqd->cp_hqd_pq_wptr_lo);
3523 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3524 		       mqd->cp_hqd_pq_wptr_hi);
3525 	}
3526 
3527 	/* set the pointer to the MQD */
3528 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3529 	       mqd->cp_mqd_base_addr_lo);
3530 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3531 	       mqd->cp_mqd_base_addr_hi);
3532 
3533 	/* set MQD vmid to 0 */
3534 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3535 	       mqd->cp_mqd_control);
3536 
3537 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3538 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3539 	       mqd->cp_hqd_pq_base_lo);
3540 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3541 	       mqd->cp_hqd_pq_base_hi);
3542 
3543 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3544 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3545 	       mqd->cp_hqd_pq_control);
3546 
3547 	/* set the wb address whether it's enabled or not */
3548 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3549 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3550 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3551 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3552 
3553 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3554 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3555 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3556 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3557 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3558 
3559 	/* enable the doorbell if requested */
3560 	if (ring->use_doorbell) {
3561 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3562 					(adev->doorbell_index.kiq * 2) << 2);
3563 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3564 					(adev->doorbell_index.userqueue_end * 2) << 2);
3565 	}
3566 
3567 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3568 	       mqd->cp_hqd_pq_doorbell_control);
3569 
3570 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3571 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3572 	       mqd->cp_hqd_pq_wptr_lo);
3573 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3574 	       mqd->cp_hqd_pq_wptr_hi);
3575 
3576 	/* set the vmid for the queue */
3577 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3578 
3579 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3580 	       mqd->cp_hqd_persistent_state);
3581 
3582 	/* activate the queue */
3583 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3584 	       mqd->cp_hqd_active);
3585 
3586 	if (ring->use_doorbell)
3587 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3588 
3589 	return 0;
3590 }
3591 
3592 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3593 {
3594 	struct amdgpu_device *adev = ring->adev;
3595 	int j;
3596 
3597 	/* disable the queue if it's active */
3598 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3599 
3600 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3601 
3602 		for (j = 0; j < adev->usec_timeout; j++) {
3603 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3604 				break;
3605 			udelay(1);
3606 		}
3607 
3608 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3609 			DRM_DEBUG("KIQ dequeue request failed.\n");
3610 
3611 			/* Manual disable if dequeue request times out */
3612 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3613 		}
3614 
3615 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3616 		      0);
3617 	}
3618 
3619 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3620 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3621 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3622 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3623 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3624 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3625 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3626 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3627 
3628 	return 0;
3629 }
3630 
3631 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3632 {
3633 	struct amdgpu_device *adev = ring->adev;
3634 	struct v9_mqd *mqd = ring->mqd_ptr;
3635 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3636 
3637 	gfx_v9_0_kiq_setting(ring);
3638 
3639 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3640 		/* reset MQD to a clean status */
3641 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3642 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3643 
3644 		/* reset ring buffer */
3645 		ring->wptr = 0;
3646 		amdgpu_ring_clear_ring(ring);
3647 
3648 		mutex_lock(&adev->srbm_mutex);
3649 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3650 		gfx_v9_0_kiq_init_register(ring);
3651 		soc15_grbm_select(adev, 0, 0, 0, 0);
3652 		mutex_unlock(&adev->srbm_mutex);
3653 	} else {
3654 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3655 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3656 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3657 		mutex_lock(&adev->srbm_mutex);
3658 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3659 		gfx_v9_0_mqd_init(ring);
3660 		gfx_v9_0_kiq_init_register(ring);
3661 		soc15_grbm_select(adev, 0, 0, 0, 0);
3662 		mutex_unlock(&adev->srbm_mutex);
3663 
3664 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3665 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3666 	}
3667 
3668 	return 0;
3669 }
3670 
3671 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3672 {
3673 	struct amdgpu_device *adev = ring->adev;
3674 	struct v9_mqd *mqd = ring->mqd_ptr;
3675 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3676 
3677 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3678 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3679 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3680 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3681 		mutex_lock(&adev->srbm_mutex);
3682 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3683 		gfx_v9_0_mqd_init(ring);
3684 		soc15_grbm_select(adev, 0, 0, 0, 0);
3685 		mutex_unlock(&adev->srbm_mutex);
3686 
3687 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3688 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3689 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3690 		/* reset MQD to a clean status */
3691 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3692 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3693 
3694 		/* reset ring buffer */
3695 		ring->wptr = 0;
3696 		amdgpu_ring_clear_ring(ring);
3697 	} else {
3698 		amdgpu_ring_clear_ring(ring);
3699 	}
3700 
3701 	return 0;
3702 }
3703 
3704 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3705 {
3706 	struct amdgpu_ring *ring;
3707 	int r;
3708 
3709 	ring = &adev->gfx.kiq.ring;
3710 
3711 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3712 	if (unlikely(r != 0))
3713 		return r;
3714 
3715 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3716 	if (unlikely(r != 0))
3717 		return r;
3718 
3719 	gfx_v9_0_kiq_init_queue(ring);
3720 	amdgpu_bo_kunmap(ring->mqd_obj);
3721 	ring->mqd_ptr = NULL;
3722 	amdgpu_bo_unreserve(ring->mqd_obj);
3723 	ring->sched.ready = true;
3724 	return 0;
3725 }
3726 
3727 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3728 {
3729 	struct amdgpu_ring *ring = NULL;
3730 	int r = 0, i;
3731 
3732 	gfx_v9_0_cp_compute_enable(adev, true);
3733 
3734 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3735 		ring = &adev->gfx.compute_ring[i];
3736 
3737 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3738 		if (unlikely(r != 0))
3739 			goto done;
3740 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3741 		if (!r) {
3742 			r = gfx_v9_0_kcq_init_queue(ring);
3743 			amdgpu_bo_kunmap(ring->mqd_obj);
3744 			ring->mqd_ptr = NULL;
3745 		}
3746 		amdgpu_bo_unreserve(ring->mqd_obj);
3747 		if (r)
3748 			goto done;
3749 	}
3750 
3751 	r = gfx_v9_0_kiq_kcq_enable(adev);
3752 done:
3753 	return r;
3754 }
3755 
3756 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3757 {
3758 	int r, i;
3759 	struct amdgpu_ring *ring;
3760 
3761 	if (!(adev->flags & AMD_IS_APU))
3762 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3763 
3764 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3765 		if (adev->asic_type != CHIP_ARCTURUS) {
3766 			/* legacy firmware loading */
3767 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3768 			if (r)
3769 				return r;
3770 		}
3771 
3772 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3773 		if (r)
3774 			return r;
3775 	}
3776 
3777 	r = gfx_v9_0_kiq_resume(adev);
3778 	if (r)
3779 		return r;
3780 
3781 	if (adev->asic_type != CHIP_ARCTURUS) {
3782 		r = gfx_v9_0_cp_gfx_resume(adev);
3783 		if (r)
3784 			return r;
3785 	}
3786 
3787 	r = gfx_v9_0_kcq_resume(adev);
3788 	if (r)
3789 		return r;
3790 
3791 	if (adev->asic_type != CHIP_ARCTURUS) {
3792 		ring = &adev->gfx.gfx_ring[0];
3793 		r = amdgpu_ring_test_helper(ring);
3794 		if (r)
3795 			return r;
3796 	}
3797 
3798 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3799 		ring = &adev->gfx.compute_ring[i];
3800 		amdgpu_ring_test_helper(ring);
3801 	}
3802 
3803 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3804 
3805 	return 0;
3806 }
3807 
3808 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3809 {
3810 	if (adev->asic_type != CHIP_ARCTURUS)
3811 		gfx_v9_0_cp_gfx_enable(adev, enable);
3812 	gfx_v9_0_cp_compute_enable(adev, enable);
3813 }
3814 
3815 static int gfx_v9_0_hw_init(void *handle)
3816 {
3817 	int r;
3818 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3819 
3820 	if (!amdgpu_sriov_vf(adev))
3821 		gfx_v9_0_init_golden_registers(adev);
3822 
3823 	gfx_v9_0_constants_init(adev);
3824 
3825 	r = gfx_v9_0_csb_vram_pin(adev);
3826 	if (r)
3827 		return r;
3828 
3829 	r = adev->gfx.rlc.funcs->resume(adev);
3830 	if (r)
3831 		return r;
3832 
3833 	r = gfx_v9_0_cp_resume(adev);
3834 	if (r)
3835 		return r;
3836 
3837 	if (adev->asic_type != CHIP_ARCTURUS) {
3838 		r = gfx_v9_0_ngg_en(adev);
3839 		if (r)
3840 			return r;
3841 	}
3842 
3843 	return r;
3844 }
3845 
3846 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3847 {
3848 	int r, i;
3849 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3850 
3851 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3852 	if (r)
3853 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3854 
3855 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3856 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3857 
3858 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3859 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3860 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3861 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3862 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3863 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3864 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3865 		amdgpu_ring_write(kiq_ring, 0);
3866 		amdgpu_ring_write(kiq_ring, 0);
3867 		amdgpu_ring_write(kiq_ring, 0);
3868 	}
3869 	r = amdgpu_ring_test_helper(kiq_ring);
3870 	if (r)
3871 		DRM_ERROR("KCQ disable failed\n");
3872 
3873 	return r;
3874 }
3875 
3876 static int gfx_v9_0_hw_fini(void *handle)
3877 {
3878 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3879 
3880 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3881 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3882 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3883 
3884 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3885 	gfx_v9_0_kcq_disable(adev);
3886 
3887 	if (amdgpu_sriov_vf(adev)) {
3888 		gfx_v9_0_cp_gfx_enable(adev, false);
3889 		/* must disable polling for SRIOV when hw finished, otherwise
3890 		 * CPC engine may still keep fetching WB address which is already
3891 		 * invalid after sw finished and trigger DMAR reading error in
3892 		 * hypervisor side.
3893 		 */
3894 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3895 		return 0;
3896 	}
3897 
3898 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3899 	 * otherwise KIQ is hanging when binding back
3900 	 */
3901 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3902 		mutex_lock(&adev->srbm_mutex);
3903 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3904 				adev->gfx.kiq.ring.pipe,
3905 				adev->gfx.kiq.ring.queue, 0);
3906 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3907 		soc15_grbm_select(adev, 0, 0, 0, 0);
3908 		mutex_unlock(&adev->srbm_mutex);
3909 	}
3910 
3911 	gfx_v9_0_cp_enable(adev, false);
3912 	adev->gfx.rlc.funcs->stop(adev);
3913 
3914 	gfx_v9_0_csb_vram_unpin(adev);
3915 
3916 	return 0;
3917 }
3918 
3919 static int gfx_v9_0_suspend(void *handle)
3920 {
3921 	return gfx_v9_0_hw_fini(handle);
3922 }
3923 
3924 static int gfx_v9_0_resume(void *handle)
3925 {
3926 	return gfx_v9_0_hw_init(handle);
3927 }
3928 
3929 static bool gfx_v9_0_is_idle(void *handle)
3930 {
3931 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3932 
3933 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3934 				GRBM_STATUS, GUI_ACTIVE))
3935 		return false;
3936 	else
3937 		return true;
3938 }
3939 
3940 static int gfx_v9_0_wait_for_idle(void *handle)
3941 {
3942 	unsigned i;
3943 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3944 
3945 	for (i = 0; i < adev->usec_timeout; i++) {
3946 		if (gfx_v9_0_is_idle(handle))
3947 			return 0;
3948 		udelay(1);
3949 	}
3950 	return -ETIMEDOUT;
3951 }
3952 
3953 static int gfx_v9_0_soft_reset(void *handle)
3954 {
3955 	u32 grbm_soft_reset = 0;
3956 	u32 tmp;
3957 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3958 
3959 	/* GRBM_STATUS */
3960 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3961 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3962 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3963 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3964 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3965 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3966 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3967 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3968 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3969 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3970 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3971 	}
3972 
3973 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3974 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3975 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3976 	}
3977 
3978 	/* GRBM_STATUS2 */
3979 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3980 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3981 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3982 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3983 
3984 
3985 	if (grbm_soft_reset) {
3986 		/* stop the rlc */
3987 		adev->gfx.rlc.funcs->stop(adev);
3988 
3989 		if (adev->asic_type != CHIP_ARCTURUS)
3990 			/* Disable GFX parsing/prefetching */
3991 			gfx_v9_0_cp_gfx_enable(adev, false);
3992 
3993 		/* Disable MEC parsing/prefetching */
3994 		gfx_v9_0_cp_compute_enable(adev, false);
3995 
3996 		if (grbm_soft_reset) {
3997 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3998 			tmp |= grbm_soft_reset;
3999 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4000 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4001 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4002 
4003 			udelay(50);
4004 
4005 			tmp &= ~grbm_soft_reset;
4006 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4007 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4008 		}
4009 
4010 		/* Wait a little for things to settle down */
4011 		udelay(50);
4012 	}
4013 	return 0;
4014 }
4015 
4016 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4017 {
4018 	uint64_t clock;
4019 
4020 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4021 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4022 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4023 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4024 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4025 	return clock;
4026 }
4027 
4028 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4029 					  uint32_t vmid,
4030 					  uint32_t gds_base, uint32_t gds_size,
4031 					  uint32_t gws_base, uint32_t gws_size,
4032 					  uint32_t oa_base, uint32_t oa_size)
4033 {
4034 	struct amdgpu_device *adev = ring->adev;
4035 
4036 	/* GDS Base */
4037 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4038 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4039 				   gds_base);
4040 
4041 	/* GDS Size */
4042 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4043 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4044 				   gds_size);
4045 
4046 	/* GWS */
4047 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4048 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4049 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4050 
4051 	/* OA */
4052 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4053 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4054 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4055 }
4056 
4057 static const u32 vgpr_init_compute_shader[] =
4058 {
4059 	0xb07c0000, 0xbe8000ff,
4060 	0x000000f8, 0xbf110800,
4061 	0x7e000280, 0x7e020280,
4062 	0x7e040280, 0x7e060280,
4063 	0x7e080280, 0x7e0a0280,
4064 	0x7e0c0280, 0x7e0e0280,
4065 	0x80808800, 0xbe803200,
4066 	0xbf84fff5, 0xbf9c0000,
4067 	0xd28c0001, 0x0001007f,
4068 	0xd28d0001, 0x0002027e,
4069 	0x10020288, 0xb8810904,
4070 	0xb7814000, 0xd1196a01,
4071 	0x00000301, 0xbe800087,
4072 	0xbefc00c1, 0xd89c4000,
4073 	0x00020201, 0xd89cc080,
4074 	0x00040401, 0x320202ff,
4075 	0x00000800, 0x80808100,
4076 	0xbf84fff8, 0x7e020280,
4077 	0xbf810000, 0x00000000,
4078 };
4079 
4080 static const u32 sgpr_init_compute_shader[] =
4081 {
4082 	0xb07c0000, 0xbe8000ff,
4083 	0x0000005f, 0xbee50080,
4084 	0xbe812c65, 0xbe822c65,
4085 	0xbe832c65, 0xbe842c65,
4086 	0xbe852c65, 0xb77c0005,
4087 	0x80808500, 0xbf84fff8,
4088 	0xbe800080, 0xbf810000,
4089 };
4090 
4091 static const struct soc15_reg_entry vgpr_init_regs[] = {
4092    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4093    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4094    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4095    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4096    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4097    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4098    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4099    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4100    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4101    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4102 };
4103 
4104 static const struct soc15_reg_entry sgpr_init_regs[] = {
4105    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4106    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4107    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4108    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4109    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4110    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4111    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4112    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4113    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4114    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4115 };
4116 
4117 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4118    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4119    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4120    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4121    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4122    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4123    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4124    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4125    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4126    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4127    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4128    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4129    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4130    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4131    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4132    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4133    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4134    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4135    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4136    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4137    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4138    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4139    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4140    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4141    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4142    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4143    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4144    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4145    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4146    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4147    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4148    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4149    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4150 };
4151 
4152 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4153 {
4154 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4155 	int i, r;
4156 
4157 	r = amdgpu_ring_alloc(ring, 7);
4158 	if (r) {
4159 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4160 			ring->name, r);
4161 		return r;
4162 	}
4163 
4164 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4165 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4166 
4167 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4168 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4169 				PACKET3_DMA_DATA_DST_SEL(1) |
4170 				PACKET3_DMA_DATA_SRC_SEL(2) |
4171 				PACKET3_DMA_DATA_ENGINE(0)));
4172 	amdgpu_ring_write(ring, 0);
4173 	amdgpu_ring_write(ring, 0);
4174 	amdgpu_ring_write(ring, 0);
4175 	amdgpu_ring_write(ring, 0);
4176 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4177 				adev->gds.gds_size);
4178 
4179 	amdgpu_ring_commit(ring);
4180 
4181 	for (i = 0; i < adev->usec_timeout; i++) {
4182 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4183 			break;
4184 		udelay(1);
4185 	}
4186 
4187 	if (i >= adev->usec_timeout)
4188 		r = -ETIMEDOUT;
4189 
4190 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4191 
4192 	return r;
4193 }
4194 
4195 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4196 {
4197 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4198 	struct amdgpu_ib ib;
4199 	struct dma_fence *f = NULL;
4200 	int r, i, j, k;
4201 	unsigned total_size, vgpr_offset, sgpr_offset;
4202 	u64 gpu_addr;
4203 
4204 	/* only support when RAS is enabled */
4205 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4206 		return 0;
4207 
4208 	/* bail if the compute ring is not ready */
4209 	if (!ring->sched.ready)
4210 		return 0;
4211 
4212 	total_size =
4213 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4214 	total_size +=
4215 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4216 	total_size = ALIGN(total_size, 256);
4217 	vgpr_offset = total_size;
4218 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4219 	sgpr_offset = total_size;
4220 	total_size += sizeof(sgpr_init_compute_shader);
4221 
4222 	/* allocate an indirect buffer to put the commands in */
4223 	memset(&ib, 0, sizeof(ib));
4224 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4225 	if (r) {
4226 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4227 		return r;
4228 	}
4229 
4230 	/* load the compute shaders */
4231 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4232 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4233 
4234 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4235 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4236 
4237 	/* init the ib length to 0 */
4238 	ib.length_dw = 0;
4239 
4240 	/* VGPR */
4241 	/* write the register state for the compute dispatch */
4242 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4243 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4244 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4245 								- PACKET3_SET_SH_REG_START;
4246 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4247 	}
4248 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4249 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4250 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4251 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4252 							- PACKET3_SET_SH_REG_START;
4253 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4254 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4255 
4256 	/* write dispatch packet */
4257 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4258 	ib.ptr[ib.length_dw++] = 128; /* x */
4259 	ib.ptr[ib.length_dw++] = 1; /* y */
4260 	ib.ptr[ib.length_dw++] = 1; /* z */
4261 	ib.ptr[ib.length_dw++] =
4262 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4263 
4264 	/* write CS partial flush packet */
4265 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4266 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4267 
4268 	/* SGPR */
4269 	/* write the register state for the compute dispatch */
4270 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4271 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4272 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4273 								- PACKET3_SET_SH_REG_START;
4274 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4275 	}
4276 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4277 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4278 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4279 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4280 							- PACKET3_SET_SH_REG_START;
4281 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4282 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4283 
4284 	/* write dispatch packet */
4285 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4286 	ib.ptr[ib.length_dw++] = 128; /* x */
4287 	ib.ptr[ib.length_dw++] = 1; /* y */
4288 	ib.ptr[ib.length_dw++] = 1; /* z */
4289 	ib.ptr[ib.length_dw++] =
4290 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4291 
4292 	/* write CS partial flush packet */
4293 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4294 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4295 
4296 	/* shedule the ib on the ring */
4297 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4298 	if (r) {
4299 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4300 		goto fail;
4301 	}
4302 
4303 	/* wait for the GPU to finish processing the IB */
4304 	r = dma_fence_wait(f, false);
4305 	if (r) {
4306 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4307 		goto fail;
4308 	}
4309 
4310 	/* read back registers to clear the counters */
4311 	mutex_lock(&adev->grbm_idx_mutex);
4312 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4313 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4314 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4315 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4316 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4317 			}
4318 		}
4319 	}
4320 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4321 	mutex_unlock(&adev->grbm_idx_mutex);
4322 
4323 fail:
4324 	amdgpu_ib_free(adev, &ib, NULL);
4325 	dma_fence_put(f);
4326 
4327 	return r;
4328 }
4329 
4330 static int gfx_v9_0_early_init(void *handle)
4331 {
4332 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4333 
4334 	if (adev->asic_type == CHIP_ARCTURUS)
4335 		adev->gfx.num_gfx_rings = 0;
4336 	else
4337 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4338 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4339 	gfx_v9_0_set_ring_funcs(adev);
4340 	gfx_v9_0_set_irq_funcs(adev);
4341 	gfx_v9_0_set_gds_init(adev);
4342 	gfx_v9_0_set_rlc_funcs(adev);
4343 
4344 	return 0;
4345 }
4346 
4347 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4348 		struct ras_err_data *err_data,
4349 		struct amdgpu_iv_entry *entry);
4350 
4351 static int gfx_v9_0_ecc_late_init(void *handle)
4352 {
4353 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4354 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4355 	struct ras_ih_if ih_info = {
4356 		.cb = gfx_v9_0_process_ras_data_cb,
4357 	};
4358 	struct ras_fs_if fs_info = {
4359 		.sysfs_name = "gfx_err_count",
4360 		.debugfs_name = "gfx_err_inject",
4361 	};
4362 	struct ras_common_if ras_block = {
4363 		.block = AMDGPU_RAS_BLOCK__GFX,
4364 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4365 		.sub_block_index = 0,
4366 		.name = "gfx",
4367 	};
4368 	int r;
4369 
4370 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4371 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4372 		return 0;
4373 	}
4374 
4375 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4376 	if (r)
4377 		return r;
4378 
4379 	/* requires IBs so do in late init after IB pool is initialized */
4380 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4381 	if (r)
4382 		return r;
4383 
4384 	/* handle resume path. */
4385 	if (*ras_if) {
4386 		/* resend ras TA enable cmd during resume.
4387 		 * prepare to handle failure.
4388 		 */
4389 		ih_info.head = **ras_if;
4390 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4391 		if (r) {
4392 			if (r == -EAGAIN) {
4393 				/* request a gpu reset. will run again. */
4394 				amdgpu_ras_request_reset_on_boot(adev,
4395 						AMDGPU_RAS_BLOCK__GFX);
4396 				return 0;
4397 			}
4398 			/* fail to enable ras, cleanup all. */
4399 			goto irq;
4400 		}
4401 		/* enable successfully. continue. */
4402 		goto resume;
4403 	}
4404 
4405 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4406 	if (!*ras_if)
4407 		return -ENOMEM;
4408 
4409 	**ras_if = ras_block;
4410 
4411 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4412 	if (r) {
4413 		if (r == -EAGAIN) {
4414 			amdgpu_ras_request_reset_on_boot(adev,
4415 					AMDGPU_RAS_BLOCK__GFX);
4416 			r = 0;
4417 		}
4418 		goto feature;
4419 	}
4420 
4421 	ih_info.head = **ras_if;
4422 	fs_info.head = **ras_if;
4423 
4424 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4425 	if (r)
4426 		goto interrupt;
4427 
4428 	amdgpu_ras_debugfs_create(adev, &fs_info);
4429 
4430 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4431 	if (r)
4432 		goto sysfs;
4433 resume:
4434 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4435 	if (r)
4436 		goto irq;
4437 
4438 	return 0;
4439 irq:
4440 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4441 sysfs:
4442 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4443 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4444 interrupt:
4445 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4446 feature:
4447 	kfree(*ras_if);
4448 	*ras_if = NULL;
4449 	return r;
4450 }
4451 
4452 static int gfx_v9_0_late_init(void *handle)
4453 {
4454 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4455 	int r;
4456 
4457 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4458 	if (r)
4459 		return r;
4460 
4461 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4462 	if (r)
4463 		return r;
4464 
4465 	r = gfx_v9_0_ecc_late_init(handle);
4466 	if (r)
4467 		return r;
4468 
4469 	return 0;
4470 }
4471 
4472 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4473 {
4474 	uint32_t rlc_setting;
4475 
4476 	/* if RLC is not enabled, do nothing */
4477 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4478 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4479 		return false;
4480 
4481 	return true;
4482 }
4483 
4484 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4485 {
4486 	uint32_t data;
4487 	unsigned i;
4488 
4489 	data = RLC_SAFE_MODE__CMD_MASK;
4490 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4491 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4492 
4493 	/* wait for RLC_SAFE_MODE */
4494 	for (i = 0; i < adev->usec_timeout; i++) {
4495 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4496 			break;
4497 		udelay(1);
4498 	}
4499 }
4500 
4501 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4502 {
4503 	uint32_t data;
4504 
4505 	data = RLC_SAFE_MODE__CMD_MASK;
4506 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4507 }
4508 
4509 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4510 						bool enable)
4511 {
4512 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4513 
4514 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4515 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4516 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4517 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4518 	} else {
4519 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4520 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4521 	}
4522 
4523 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4524 }
4525 
4526 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4527 						bool enable)
4528 {
4529 	/* TODO: double check if we need to perform under safe mode */
4530 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4531 
4532 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4533 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4534 	else
4535 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4536 
4537 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4538 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4539 	else
4540 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4541 
4542 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4543 }
4544 
4545 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4546 						      bool enable)
4547 {
4548 	uint32_t data, def;
4549 
4550 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4551 
4552 	/* It is disabled by HW by default */
4553 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4554 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4555 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4556 
4557 		if (adev->asic_type != CHIP_VEGA12)
4558 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4559 
4560 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4561 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4562 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4563 
4564 		/* only for Vega10 & Raven1 */
4565 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4566 
4567 		if (def != data)
4568 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4569 
4570 		/* MGLS is a global flag to control all MGLS in GFX */
4571 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4572 			/* 2 - RLC memory Light sleep */
4573 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4574 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4575 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4576 				if (def != data)
4577 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4578 			}
4579 			/* 3 - CP memory Light sleep */
4580 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4581 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4582 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4583 				if (def != data)
4584 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4585 			}
4586 		}
4587 	} else {
4588 		/* 1 - MGCG_OVERRIDE */
4589 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4590 
4591 		if (adev->asic_type != CHIP_VEGA12)
4592 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4593 
4594 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4595 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4596 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4597 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4598 
4599 		if (def != data)
4600 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4601 
4602 		/* 2 - disable MGLS in RLC */
4603 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4604 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4605 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4606 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4607 		}
4608 
4609 		/* 3 - disable MGLS in CP */
4610 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4611 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4612 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4613 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4614 		}
4615 	}
4616 
4617 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4618 }
4619 
4620 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4621 					   bool enable)
4622 {
4623 	uint32_t data, def;
4624 
4625 	if (adev->asic_type == CHIP_ARCTURUS)
4626 		return;
4627 
4628 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4629 
4630 	/* Enable 3D CGCG/CGLS */
4631 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4632 		/* write cmd to clear cgcg/cgls ov */
4633 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4634 		/* unset CGCG override */
4635 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4636 		/* update CGCG and CGLS override bits */
4637 		if (def != data)
4638 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4639 
4640 		/* enable 3Dcgcg FSM(0x0000363f) */
4641 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4642 
4643 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4644 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4645 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4646 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4647 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4648 		if (def != data)
4649 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4650 
4651 		/* set IDLE_POLL_COUNT(0x00900100) */
4652 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4653 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4654 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4655 		if (def != data)
4656 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4657 	} else {
4658 		/* Disable CGCG/CGLS */
4659 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4660 		/* disable cgcg, cgls should be disabled */
4661 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4662 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4663 		/* disable cgcg and cgls in FSM */
4664 		if (def != data)
4665 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4666 	}
4667 
4668 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4669 }
4670 
4671 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4672 						      bool enable)
4673 {
4674 	uint32_t def, data;
4675 
4676 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4677 
4678 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4679 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4680 		/* unset CGCG override */
4681 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4682 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4683 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4684 		else
4685 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4686 		/* update CGCG and CGLS override bits */
4687 		if (def != data)
4688 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4689 
4690 		/* enable cgcg FSM(0x0000363F) */
4691 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4692 
4693 		if (adev->asic_type == CHIP_ARCTURUS)
4694 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4695 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4696 		else
4697 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4698 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4699 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4700 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4701 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4702 		if (def != data)
4703 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4704 
4705 		/* set IDLE_POLL_COUNT(0x00900100) */
4706 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4707 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4708 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4709 		if (def != data)
4710 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4711 	} else {
4712 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4713 		/* reset CGCG/CGLS bits */
4714 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4715 		/* disable cgcg and cgls in FSM */
4716 		if (def != data)
4717 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4718 	}
4719 
4720 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4721 }
4722 
4723 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4724 					    bool enable)
4725 {
4726 	if (enable) {
4727 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4728 		 * ===  MGCG + MGLS ===
4729 		 */
4730 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4731 		/* ===  CGCG /CGLS for GFX 3D Only === */
4732 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4733 		/* ===  CGCG + CGLS === */
4734 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4735 	} else {
4736 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4737 		 * ===  CGCG + CGLS ===
4738 		 */
4739 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4740 		/* ===  CGCG /CGLS for GFX 3D Only === */
4741 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4742 		/* ===  MGCG + MGLS === */
4743 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4744 	}
4745 	return 0;
4746 }
4747 
4748 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4749 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4750 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4751 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4752 	.init = gfx_v9_0_rlc_init,
4753 	.get_csb_size = gfx_v9_0_get_csb_size,
4754 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4755 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4756 	.resume = gfx_v9_0_rlc_resume,
4757 	.stop = gfx_v9_0_rlc_stop,
4758 	.reset = gfx_v9_0_rlc_reset,
4759 	.start = gfx_v9_0_rlc_start
4760 };
4761 
4762 static int gfx_v9_0_set_powergating_state(void *handle,
4763 					  enum amd_powergating_state state)
4764 {
4765 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4766 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4767 
4768 	switch (adev->asic_type) {
4769 	case CHIP_RAVEN:
4770 		if (!enable) {
4771 			amdgpu_gfx_off_ctrl(adev, false);
4772 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4773 		}
4774 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4775 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4776 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4777 		} else {
4778 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4779 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4780 		}
4781 
4782 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4783 			gfx_v9_0_enable_cp_power_gating(adev, true);
4784 		else
4785 			gfx_v9_0_enable_cp_power_gating(adev, false);
4786 
4787 		/* update gfx cgpg state */
4788 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4789 
4790 		/* update mgcg state */
4791 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4792 
4793 		if (enable)
4794 			amdgpu_gfx_off_ctrl(adev, true);
4795 		break;
4796 	case CHIP_VEGA12:
4797 		if (!enable) {
4798 			amdgpu_gfx_off_ctrl(adev, false);
4799 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4800 		} else {
4801 			amdgpu_gfx_off_ctrl(adev, true);
4802 		}
4803 		break;
4804 	default:
4805 		break;
4806 	}
4807 
4808 	return 0;
4809 }
4810 
4811 static int gfx_v9_0_set_clockgating_state(void *handle,
4812 					  enum amd_clockgating_state state)
4813 {
4814 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4815 
4816 	if (amdgpu_sriov_vf(adev))
4817 		return 0;
4818 
4819 	switch (adev->asic_type) {
4820 	case CHIP_VEGA10:
4821 	case CHIP_VEGA12:
4822 	case CHIP_VEGA20:
4823 	case CHIP_RAVEN:
4824 	case CHIP_ARCTURUS:
4825 		gfx_v9_0_update_gfx_clock_gating(adev,
4826 						 state == AMD_CG_STATE_GATE ? true : false);
4827 		break;
4828 	default:
4829 		break;
4830 	}
4831 	return 0;
4832 }
4833 
4834 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4835 {
4836 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837 	int data;
4838 
4839 	if (amdgpu_sriov_vf(adev))
4840 		*flags = 0;
4841 
4842 	/* AMD_CG_SUPPORT_GFX_MGCG */
4843 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4844 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4845 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4846 
4847 	/* AMD_CG_SUPPORT_GFX_CGCG */
4848 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4849 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4850 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4851 
4852 	/* AMD_CG_SUPPORT_GFX_CGLS */
4853 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4854 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4855 
4856 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4857 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4858 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4859 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4860 
4861 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4862 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4863 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4864 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4865 
4866 	if (adev->asic_type != CHIP_ARCTURUS) {
4867 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4868 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4869 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4870 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4871 
4872 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4873 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4874 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4875 	}
4876 }
4877 
4878 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4879 {
4880 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4881 }
4882 
4883 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4884 {
4885 	struct amdgpu_device *adev = ring->adev;
4886 	u64 wptr;
4887 
4888 	/* XXX check if swapping is necessary on BE */
4889 	if (ring->use_doorbell) {
4890 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4891 	} else {
4892 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4893 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4894 	}
4895 
4896 	return wptr;
4897 }
4898 
4899 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4900 {
4901 	struct amdgpu_device *adev = ring->adev;
4902 
4903 	if (ring->use_doorbell) {
4904 		/* XXX check if swapping is necessary on BE */
4905 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4906 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4907 	} else {
4908 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4909 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4910 	}
4911 }
4912 
4913 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4914 {
4915 	struct amdgpu_device *adev = ring->adev;
4916 	u32 ref_and_mask, reg_mem_engine;
4917 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4918 
4919 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4920 		switch (ring->me) {
4921 		case 1:
4922 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4923 			break;
4924 		case 2:
4925 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4926 			break;
4927 		default:
4928 			return;
4929 		}
4930 		reg_mem_engine = 0;
4931 	} else {
4932 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4933 		reg_mem_engine = 1; /* pfp */
4934 	}
4935 
4936 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4937 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4938 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4939 			      ref_and_mask, ref_and_mask, 0x20);
4940 }
4941 
4942 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4943 					struct amdgpu_job *job,
4944 					struct amdgpu_ib *ib,
4945 					uint32_t flags)
4946 {
4947 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4948 	u32 header, control = 0;
4949 
4950 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4951 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4952 	else
4953 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4954 
4955 	control |= ib->length_dw | (vmid << 24);
4956 
4957 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4958 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4959 
4960 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4961 			gfx_v9_0_ring_emit_de_meta(ring);
4962 	}
4963 
4964 	amdgpu_ring_write(ring, header);
4965 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4966 	amdgpu_ring_write(ring,
4967 #ifdef __BIG_ENDIAN
4968 		(2 << 0) |
4969 #endif
4970 		lower_32_bits(ib->gpu_addr));
4971 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4972 	amdgpu_ring_write(ring, control);
4973 }
4974 
4975 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4976 					  struct amdgpu_job *job,
4977 					  struct amdgpu_ib *ib,
4978 					  uint32_t flags)
4979 {
4980 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4981 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4982 
4983 	/* Currently, there is a high possibility to get wave ID mismatch
4984 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4985 	 * different wave IDs than the GDS expects. This situation happens
4986 	 * randomly when at least 5 compute pipes use GDS ordered append.
4987 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4988 	 * Those are probably bugs somewhere else in the kernel driver.
4989 	 *
4990 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4991 	 * GDS to 0 for this ring (me/pipe).
4992 	 */
4993 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4994 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4995 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4996 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4997 	}
4998 
4999 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5000 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5001 	amdgpu_ring_write(ring,
5002 #ifdef __BIG_ENDIAN
5003 				(2 << 0) |
5004 #endif
5005 				lower_32_bits(ib->gpu_addr));
5006 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5007 	amdgpu_ring_write(ring, control);
5008 }
5009 
5010 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5011 				     u64 seq, unsigned flags)
5012 {
5013 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5014 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5015 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5016 
5017 	/* RELEASE_MEM - flush caches, send int */
5018 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5019 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5020 					       EOP_TC_NC_ACTION_EN) :
5021 					      (EOP_TCL1_ACTION_EN |
5022 					       EOP_TC_ACTION_EN |
5023 					       EOP_TC_WB_ACTION_EN |
5024 					       EOP_TC_MD_ACTION_EN)) |
5025 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5026 				 EVENT_INDEX(5)));
5027 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5028 
5029 	/*
5030 	 * the address should be Qword aligned if 64bit write, Dword
5031 	 * aligned if only send 32bit data low (discard data high)
5032 	 */
5033 	if (write64bit)
5034 		BUG_ON(addr & 0x7);
5035 	else
5036 		BUG_ON(addr & 0x3);
5037 	amdgpu_ring_write(ring, lower_32_bits(addr));
5038 	amdgpu_ring_write(ring, upper_32_bits(addr));
5039 	amdgpu_ring_write(ring, lower_32_bits(seq));
5040 	amdgpu_ring_write(ring, upper_32_bits(seq));
5041 	amdgpu_ring_write(ring, 0);
5042 }
5043 
5044 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5045 {
5046 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5047 	uint32_t seq = ring->fence_drv.sync_seq;
5048 	uint64_t addr = ring->fence_drv.gpu_addr;
5049 
5050 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5051 			      lower_32_bits(addr), upper_32_bits(addr),
5052 			      seq, 0xffffffff, 4);
5053 }
5054 
5055 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5056 					unsigned vmid, uint64_t pd_addr)
5057 {
5058 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5059 
5060 	/* compute doesn't have PFP */
5061 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5062 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5063 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5064 		amdgpu_ring_write(ring, 0x0);
5065 	}
5066 }
5067 
5068 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5069 {
5070 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5071 }
5072 
5073 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5074 {
5075 	u64 wptr;
5076 
5077 	/* XXX check if swapping is necessary on BE */
5078 	if (ring->use_doorbell)
5079 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5080 	else
5081 		BUG();
5082 	return wptr;
5083 }
5084 
5085 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5086 					   bool acquire)
5087 {
5088 	struct amdgpu_device *adev = ring->adev;
5089 	int pipe_num, tmp, reg;
5090 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5091 
5092 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5093 
5094 	/* first me only has 2 entries, GFX and HP3D */
5095 	if (ring->me > 0)
5096 		pipe_num -= 2;
5097 
5098 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5099 	tmp = RREG32(reg);
5100 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5101 	WREG32(reg, tmp);
5102 }
5103 
5104 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5105 					    struct amdgpu_ring *ring,
5106 					    bool acquire)
5107 {
5108 	int i, pipe;
5109 	bool reserve;
5110 	struct amdgpu_ring *iring;
5111 
5112 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5113 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5114 	if (acquire)
5115 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5116 	else
5117 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5118 
5119 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5120 		/* Clear all reservations - everyone reacquires all resources */
5121 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5122 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5123 						       true);
5124 
5125 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5126 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5127 						       true);
5128 	} else {
5129 		/* Lower all pipes without a current reservation */
5130 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5131 			iring = &adev->gfx.gfx_ring[i];
5132 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5133 							   iring->me,
5134 							   iring->pipe,
5135 							   0);
5136 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5137 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5138 		}
5139 
5140 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5141 			iring = &adev->gfx.compute_ring[i];
5142 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5143 							   iring->me,
5144 							   iring->pipe,
5145 							   0);
5146 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5147 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5148 		}
5149 	}
5150 
5151 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5152 }
5153 
5154 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5155 				      struct amdgpu_ring *ring,
5156 				      bool acquire)
5157 {
5158 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5159 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5160 
5161 	mutex_lock(&adev->srbm_mutex);
5162 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5163 
5164 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5165 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5166 
5167 	soc15_grbm_select(adev, 0, 0, 0, 0);
5168 	mutex_unlock(&adev->srbm_mutex);
5169 }
5170 
5171 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5172 					       enum drm_sched_priority priority)
5173 {
5174 	struct amdgpu_device *adev = ring->adev;
5175 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5176 
5177 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5178 		return;
5179 
5180 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5181 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5182 }
5183 
5184 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5185 {
5186 	struct amdgpu_device *adev = ring->adev;
5187 
5188 	/* XXX check if swapping is necessary on BE */
5189 	if (ring->use_doorbell) {
5190 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5191 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5192 	} else{
5193 		BUG(); /* only DOORBELL method supported on gfx9 now */
5194 	}
5195 }
5196 
5197 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5198 					 u64 seq, unsigned int flags)
5199 {
5200 	struct amdgpu_device *adev = ring->adev;
5201 
5202 	/* we only allocate 32bit for each seq wb address */
5203 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5204 
5205 	/* write fence seq to the "addr" */
5206 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5207 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5208 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5209 	amdgpu_ring_write(ring, lower_32_bits(addr));
5210 	amdgpu_ring_write(ring, upper_32_bits(addr));
5211 	amdgpu_ring_write(ring, lower_32_bits(seq));
5212 
5213 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5214 		/* set register to trigger INT */
5215 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5216 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5217 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5218 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5219 		amdgpu_ring_write(ring, 0);
5220 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5221 	}
5222 }
5223 
5224 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5225 {
5226 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5227 	amdgpu_ring_write(ring, 0);
5228 }
5229 
5230 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5231 {
5232 	struct v9_ce_ib_state ce_payload = {0};
5233 	uint64_t csa_addr;
5234 	int cnt;
5235 
5236 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5237 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5238 
5239 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5240 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5241 				 WRITE_DATA_DST_SEL(8) |
5242 				 WR_CONFIRM) |
5243 				 WRITE_DATA_CACHE_POLICY(0));
5244 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5245 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5246 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5247 }
5248 
5249 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5250 {
5251 	struct v9_de_ib_state de_payload = {0};
5252 	uint64_t csa_addr, gds_addr;
5253 	int cnt;
5254 
5255 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5256 	gds_addr = csa_addr + 4096;
5257 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5258 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5259 
5260 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5261 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5262 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5263 				 WRITE_DATA_DST_SEL(8) |
5264 				 WR_CONFIRM) |
5265 				 WRITE_DATA_CACHE_POLICY(0));
5266 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5267 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5268 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5269 }
5270 
5271 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5272 {
5273 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5274 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5275 }
5276 
5277 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5278 {
5279 	uint32_t dw2 = 0;
5280 
5281 	if (amdgpu_sriov_vf(ring->adev))
5282 		gfx_v9_0_ring_emit_ce_meta(ring);
5283 
5284 	gfx_v9_0_ring_emit_tmz(ring, true);
5285 
5286 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5287 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5288 		/* set load_global_config & load_global_uconfig */
5289 		dw2 |= 0x8001;
5290 		/* set load_cs_sh_regs */
5291 		dw2 |= 0x01000000;
5292 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5293 		dw2 |= 0x10002;
5294 
5295 		/* set load_ce_ram if preamble presented */
5296 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5297 			dw2 |= 0x10000000;
5298 	} else {
5299 		/* still load_ce_ram if this is the first time preamble presented
5300 		 * although there is no context switch happens.
5301 		 */
5302 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5303 			dw2 |= 0x10000000;
5304 	}
5305 
5306 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5307 	amdgpu_ring_write(ring, dw2);
5308 	amdgpu_ring_write(ring, 0);
5309 }
5310 
5311 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5312 {
5313 	unsigned ret;
5314 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5315 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5316 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5317 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5318 	ret = ring->wptr & ring->buf_mask;
5319 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5320 	return ret;
5321 }
5322 
5323 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5324 {
5325 	unsigned cur;
5326 	BUG_ON(offset > ring->buf_mask);
5327 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5328 
5329 	cur = (ring->wptr & ring->buf_mask) - 1;
5330 	if (likely(cur > offset))
5331 		ring->ring[offset] = cur - offset;
5332 	else
5333 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5334 }
5335 
5336 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5337 {
5338 	struct amdgpu_device *adev = ring->adev;
5339 
5340 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5341 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5342 				(5 << 8) |	/* dst: memory */
5343 				(1 << 20));	/* write confirm */
5344 	amdgpu_ring_write(ring, reg);
5345 	amdgpu_ring_write(ring, 0);
5346 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5347 				adev->virt.reg_val_offs * 4));
5348 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5349 				adev->virt.reg_val_offs * 4));
5350 }
5351 
5352 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5353 				    uint32_t val)
5354 {
5355 	uint32_t cmd = 0;
5356 
5357 	switch (ring->funcs->type) {
5358 	case AMDGPU_RING_TYPE_GFX:
5359 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5360 		break;
5361 	case AMDGPU_RING_TYPE_KIQ:
5362 		cmd = (1 << 16); /* no inc addr */
5363 		break;
5364 	default:
5365 		cmd = WR_CONFIRM;
5366 		break;
5367 	}
5368 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5369 	amdgpu_ring_write(ring, cmd);
5370 	amdgpu_ring_write(ring, reg);
5371 	amdgpu_ring_write(ring, 0);
5372 	amdgpu_ring_write(ring, val);
5373 }
5374 
5375 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5376 					uint32_t val, uint32_t mask)
5377 {
5378 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5379 }
5380 
5381 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5382 						  uint32_t reg0, uint32_t reg1,
5383 						  uint32_t ref, uint32_t mask)
5384 {
5385 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5386 	struct amdgpu_device *adev = ring->adev;
5387 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5388 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5389 
5390 	if (fw_version_ok)
5391 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5392 				      ref, mask, 0x20);
5393 	else
5394 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5395 							   ref, mask);
5396 }
5397 
5398 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5399 {
5400 	struct amdgpu_device *adev = ring->adev;
5401 	uint32_t value = 0;
5402 
5403 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5404 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5405 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5406 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5407 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5408 }
5409 
5410 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5411 						 enum amdgpu_interrupt_state state)
5412 {
5413 	switch (state) {
5414 	case AMDGPU_IRQ_STATE_DISABLE:
5415 	case AMDGPU_IRQ_STATE_ENABLE:
5416 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5417 			       TIME_STAMP_INT_ENABLE,
5418 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5419 		break;
5420 	default:
5421 		break;
5422 	}
5423 }
5424 
5425 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5426 						     int me, int pipe,
5427 						     enum amdgpu_interrupt_state state)
5428 {
5429 	u32 mec_int_cntl, mec_int_cntl_reg;
5430 
5431 	/*
5432 	 * amdgpu controls only the first MEC. That's why this function only
5433 	 * handles the setting of interrupts for this specific MEC. All other
5434 	 * pipes' interrupts are set by amdkfd.
5435 	 */
5436 
5437 	if (me == 1) {
5438 		switch (pipe) {
5439 		case 0:
5440 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5441 			break;
5442 		case 1:
5443 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5444 			break;
5445 		case 2:
5446 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5447 			break;
5448 		case 3:
5449 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5450 			break;
5451 		default:
5452 			DRM_DEBUG("invalid pipe %d\n", pipe);
5453 			return;
5454 		}
5455 	} else {
5456 		DRM_DEBUG("invalid me %d\n", me);
5457 		return;
5458 	}
5459 
5460 	switch (state) {
5461 	case AMDGPU_IRQ_STATE_DISABLE:
5462 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5463 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5464 					     TIME_STAMP_INT_ENABLE, 0);
5465 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5466 		break;
5467 	case AMDGPU_IRQ_STATE_ENABLE:
5468 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5469 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5470 					     TIME_STAMP_INT_ENABLE, 1);
5471 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5472 		break;
5473 	default:
5474 		break;
5475 	}
5476 }
5477 
5478 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5479 					     struct amdgpu_irq_src *source,
5480 					     unsigned type,
5481 					     enum amdgpu_interrupt_state state)
5482 {
5483 	switch (state) {
5484 	case AMDGPU_IRQ_STATE_DISABLE:
5485 	case AMDGPU_IRQ_STATE_ENABLE:
5486 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5487 			       PRIV_REG_INT_ENABLE,
5488 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5489 		break;
5490 	default:
5491 		break;
5492 	}
5493 
5494 	return 0;
5495 }
5496 
5497 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5498 					      struct amdgpu_irq_src *source,
5499 					      unsigned type,
5500 					      enum amdgpu_interrupt_state state)
5501 {
5502 	switch (state) {
5503 	case AMDGPU_IRQ_STATE_DISABLE:
5504 	case AMDGPU_IRQ_STATE_ENABLE:
5505 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5506 			       PRIV_INSTR_INT_ENABLE,
5507 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5508 	default:
5509 		break;
5510 	}
5511 
5512 	return 0;
5513 }
5514 
5515 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5516 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5517 			CP_ECC_ERROR_INT_ENABLE, 1)
5518 
5519 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5520 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5521 			CP_ECC_ERROR_INT_ENABLE, 0)
5522 
5523 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5524 					      struct amdgpu_irq_src *source,
5525 					      unsigned type,
5526 					      enum amdgpu_interrupt_state state)
5527 {
5528 	switch (state) {
5529 	case AMDGPU_IRQ_STATE_DISABLE:
5530 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5531 				CP_ECC_ERROR_INT_ENABLE, 0);
5532 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5533 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5534 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5535 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5536 		break;
5537 
5538 	case AMDGPU_IRQ_STATE_ENABLE:
5539 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5540 				CP_ECC_ERROR_INT_ENABLE, 1);
5541 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5542 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5543 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5544 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5545 		break;
5546 	default:
5547 		break;
5548 	}
5549 
5550 	return 0;
5551 }
5552 
5553 
5554 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5555 					    struct amdgpu_irq_src *src,
5556 					    unsigned type,
5557 					    enum amdgpu_interrupt_state state)
5558 {
5559 	switch (type) {
5560 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5561 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5562 		break;
5563 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5564 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5565 		break;
5566 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5567 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5568 		break;
5569 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5570 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5571 		break;
5572 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5573 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5574 		break;
5575 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5576 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5577 		break;
5578 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5579 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5580 		break;
5581 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5582 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5583 		break;
5584 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5585 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5586 		break;
5587 	default:
5588 		break;
5589 	}
5590 	return 0;
5591 }
5592 
5593 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5594 			    struct amdgpu_irq_src *source,
5595 			    struct amdgpu_iv_entry *entry)
5596 {
5597 	int i;
5598 	u8 me_id, pipe_id, queue_id;
5599 	struct amdgpu_ring *ring;
5600 
5601 	DRM_DEBUG("IH: CP EOP\n");
5602 	me_id = (entry->ring_id & 0x0c) >> 2;
5603 	pipe_id = (entry->ring_id & 0x03) >> 0;
5604 	queue_id = (entry->ring_id & 0x70) >> 4;
5605 
5606 	switch (me_id) {
5607 	case 0:
5608 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5609 		break;
5610 	case 1:
5611 	case 2:
5612 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5613 			ring = &adev->gfx.compute_ring[i];
5614 			/* Per-queue interrupt is supported for MEC starting from VI.
5615 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5616 			  */
5617 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5618 				amdgpu_fence_process(ring);
5619 		}
5620 		break;
5621 	}
5622 	return 0;
5623 }
5624 
5625 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5626 			   struct amdgpu_iv_entry *entry)
5627 {
5628 	u8 me_id, pipe_id, queue_id;
5629 	struct amdgpu_ring *ring;
5630 	int i;
5631 
5632 	me_id = (entry->ring_id & 0x0c) >> 2;
5633 	pipe_id = (entry->ring_id & 0x03) >> 0;
5634 	queue_id = (entry->ring_id & 0x70) >> 4;
5635 
5636 	switch (me_id) {
5637 	case 0:
5638 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5639 		break;
5640 	case 1:
5641 	case 2:
5642 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5643 			ring = &adev->gfx.compute_ring[i];
5644 			if (ring->me == me_id && ring->pipe == pipe_id &&
5645 			    ring->queue == queue_id)
5646 				drm_sched_fault(&ring->sched);
5647 		}
5648 		break;
5649 	}
5650 }
5651 
5652 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5653 				 struct amdgpu_irq_src *source,
5654 				 struct amdgpu_iv_entry *entry)
5655 {
5656 	DRM_ERROR("Illegal register access in command stream\n");
5657 	gfx_v9_0_fault(adev, entry);
5658 	return 0;
5659 }
5660 
5661 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5662 				  struct amdgpu_irq_src *source,
5663 				  struct amdgpu_iv_entry *entry)
5664 {
5665 	DRM_ERROR("Illegal instruction in command stream\n");
5666 	gfx_v9_0_fault(adev, entry);
5667 	return 0;
5668 }
5669 
5670 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5671 		struct ras_err_data *err_data,
5672 		struct amdgpu_iv_entry *entry)
5673 {
5674 	/* TODO ue will trigger an interrupt. */
5675 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5676 	if (adev->gfx.funcs->query_ras_error_count)
5677 		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5678 	amdgpu_ras_reset_gpu(adev, 0);
5679 	return AMDGPU_RAS_SUCCESS;
5680 }
5681 
5682 static const struct {
5683 	const char *name;
5684 	uint32_t ip;
5685 	uint32_t inst;
5686 	uint32_t seg;
5687 	uint32_t reg_offset;
5688 	uint32_t per_se_instance;
5689 	int32_t num_instance;
5690 	uint32_t sec_count_mask;
5691 	uint32_t ded_count_mask;
5692 } gfx_ras_edc_regs[] = {
5693 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5694 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5695 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5696 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5697 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5698 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5699 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5700 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5701 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5702 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5703 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5704 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5705 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5706 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5707 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5708 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5709 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5710 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5711 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5712 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5713 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5714 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5715 	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5716 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5717 	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5718 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5719 	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5720 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5721 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5722 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5723 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5724 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5725 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5726 	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5727 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5728 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5729 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5730 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5731 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5732 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5733 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5734 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5735 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5736 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5737 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5738 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5739 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5740 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5741 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5742 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5743 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5744 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5745 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5746 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5747 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5748 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5749 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5750 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5751 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5752 	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5753 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5754 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5755 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5756 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5757 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5758 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5759 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5760 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5761 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5762 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5763 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5764 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5765 	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5766 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5767 	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5768 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5769 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5770 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5771 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5772 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5773 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5774 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5775 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5776 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5777 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5778 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5779 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5780 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5781 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5782 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5783 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5784 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5785 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5786 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5787 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5788 	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5789 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5790 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5791 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5792 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5793 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5794 	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5795 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5796 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5797 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5798 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5799 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5800 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5801 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5802 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5803 	  0 },
5804 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5805 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5806 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5807 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5808 	  0 },
5809 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5810 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5811 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5812 	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5813 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5814 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5815 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5816 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5817 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5818 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5819 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5820 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5821 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5822 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5823 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5824 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5825 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5826 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5827 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5828 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5829 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5830 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5831 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5832 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5833 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5834 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5835 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5836 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5837 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5838 	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5839 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5840 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5841 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5842 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5843 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5844 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5845 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5846 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5847 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5848 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5849 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5850 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5851 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5852 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5853 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5854 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5855 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5856 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5857 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5858 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5859 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5860 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5861 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5862 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5863 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5864 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5865 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5866 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5867 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5868 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5869 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5870 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5871 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5872 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5873 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5874 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5875 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5876 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5877 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5878 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5879 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5880 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5881 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5882 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5883 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5884 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5885 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5886 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5887 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5888 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5889 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5890 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5891 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5892 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5893 	  0 },
5894 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5895 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5896 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5897 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5898 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5899 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5900 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5901 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5902 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5903 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5904 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5905 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5906 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5907 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5908 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5909 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5910 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5911 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5912 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5913 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5914 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5915 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5916 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5917 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5918 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5919 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5920 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5921 	  0 },
5922 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5923 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5924 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5925 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5926 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5927 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5928 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5929 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5930 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
5931 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5932 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
5933 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
5934 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5935 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
5936 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
5937 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5938 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
5939 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
5940 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5941 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
5942 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
5943 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5944 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
5945 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
5946 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5947 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
5948 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5949 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
5950 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5951 	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
5952 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5953 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
5954 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
5955 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
5956 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5957 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
5958 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
5959 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5960 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
5961 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
5962 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5963 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
5964 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
5965 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5966 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
5967 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5968 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
5969 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5970 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
5971 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5972 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
5973 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5974 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
5975 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
5976 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
5977 };
5978 
5979 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
5980 				     void *inject_if)
5981 {
5982 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
5983 	int ret;
5984 	struct ta_ras_trigger_error_input block_info = { 0 };
5985 
5986 	if (adev->asic_type != CHIP_VEGA20)
5987 		return -EINVAL;
5988 
5989 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
5990 		return -EINVAL;
5991 
5992 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
5993 		return -EPERM;
5994 
5995 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
5996 	      info->head.type)) {
5997 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
5998 			ras_gfx_subblocks[info->head.sub_block_index].name,
5999 			info->head.type);
6000 		return -EPERM;
6001 	}
6002 
6003 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6004 	      info->head.type)) {
6005 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6006 			ras_gfx_subblocks[info->head.sub_block_index].name,
6007 			info->head.type);
6008 		return -EPERM;
6009 	}
6010 
6011 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6012 	block_info.sub_block_index =
6013 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6014 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6015 	block_info.address = info->address;
6016 	block_info.value = info->value;
6017 
6018 	mutex_lock(&adev->grbm_idx_mutex);
6019 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6020 	mutex_unlock(&adev->grbm_idx_mutex);
6021 
6022 	return ret;
6023 }
6024 
6025 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6026 					  void *ras_error_status)
6027 {
6028 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6029 	uint32_t sec_count, ded_count;
6030 	uint32_t i;
6031 	uint32_t reg_value;
6032 	uint32_t se_id, instance_id;
6033 
6034 	if (adev->asic_type != CHIP_VEGA20)
6035 		return -EINVAL;
6036 
6037 	err_data->ue_count = 0;
6038 	err_data->ce_count = 0;
6039 
6040 	mutex_lock(&adev->grbm_idx_mutex);
6041 	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6042 		for (instance_id = 0; instance_id < 256; instance_id++) {
6043 			for (i = 0;
6044 			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6045 			     i++) {
6046 				if (se_id != 0 &&
6047 				    !gfx_ras_edc_regs[i].per_se_instance)
6048 					continue;
6049 				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6050 					continue;
6051 
6052 				gfx_v9_0_select_se_sh(adev, se_id, 0,
6053 						      instance_id);
6054 
6055 				reg_value = RREG32(
6056 					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6057 							[gfx_ras_edc_regs[i].inst]
6058 							[gfx_ras_edc_regs[i].seg] +
6059 					gfx_ras_edc_regs[i].reg_offset);
6060 				sec_count = reg_value &
6061 					    gfx_ras_edc_regs[i].sec_count_mask;
6062 				ded_count = reg_value &
6063 					    gfx_ras_edc_regs[i].ded_count_mask;
6064 				if (sec_count) {
6065 					DRM_INFO(
6066 						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6067 						se_id, instance_id,
6068 						gfx_ras_edc_regs[i].name,
6069 						sec_count);
6070 					err_data->ce_count++;
6071 				}
6072 
6073 				if (ded_count) {
6074 					DRM_INFO(
6075 						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6076 						se_id, instance_id,
6077 						gfx_ras_edc_regs[i].name,
6078 						ded_count);
6079 					err_data->ue_count++;
6080 				}
6081 			}
6082 		}
6083 	}
6084 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6085 	mutex_unlock(&adev->grbm_idx_mutex);
6086 
6087 	return 0;
6088 }
6089 
6090 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6091 				  struct amdgpu_irq_src *source,
6092 				  struct amdgpu_iv_entry *entry)
6093 {
6094 	struct ras_common_if *ras_if = adev->gfx.ras_if;
6095 	struct ras_dispatch_if ih_data = {
6096 		.entry = entry,
6097 	};
6098 
6099 	if (!ras_if)
6100 		return 0;
6101 
6102 	ih_data.head = *ras_if;
6103 
6104 	DRM_ERROR("CP ECC ERROR IRQ\n");
6105 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6106 	return 0;
6107 }
6108 
6109 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6110 	.name = "gfx_v9_0",
6111 	.early_init = gfx_v9_0_early_init,
6112 	.late_init = gfx_v9_0_late_init,
6113 	.sw_init = gfx_v9_0_sw_init,
6114 	.sw_fini = gfx_v9_0_sw_fini,
6115 	.hw_init = gfx_v9_0_hw_init,
6116 	.hw_fini = gfx_v9_0_hw_fini,
6117 	.suspend = gfx_v9_0_suspend,
6118 	.resume = gfx_v9_0_resume,
6119 	.is_idle = gfx_v9_0_is_idle,
6120 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6121 	.soft_reset = gfx_v9_0_soft_reset,
6122 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6123 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6124 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6125 };
6126 
6127 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6128 	.type = AMDGPU_RING_TYPE_GFX,
6129 	.align_mask = 0xff,
6130 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6131 	.support_64bit_ptrs = true,
6132 	.vmhub = AMDGPU_GFXHUB_0,
6133 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6134 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6135 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6136 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6137 		5 +  /* COND_EXEC */
6138 		7 +  /* PIPELINE_SYNC */
6139 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6140 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6141 		2 + /* VM_FLUSH */
6142 		8 +  /* FENCE for VM_FLUSH */
6143 		20 + /* GDS switch */
6144 		4 + /* double SWITCH_BUFFER,
6145 		       the first COND_EXEC jump to the place just
6146 			   prior to this double SWITCH_BUFFER  */
6147 		5 + /* COND_EXEC */
6148 		7 +	 /*	HDP_flush */
6149 		4 +	 /*	VGT_flush */
6150 		14 + /*	CE_META */
6151 		31 + /*	DE_META */
6152 		3 + /* CNTX_CTRL */
6153 		5 + /* HDP_INVL */
6154 		8 + 8 + /* FENCE x2 */
6155 		2, /* SWITCH_BUFFER */
6156 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6157 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6158 	.emit_fence = gfx_v9_0_ring_emit_fence,
6159 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6160 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6161 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6162 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6163 	.test_ring = gfx_v9_0_ring_test_ring,
6164 	.test_ib = gfx_v9_0_ring_test_ib,
6165 	.insert_nop = amdgpu_ring_insert_nop,
6166 	.pad_ib = amdgpu_ring_generic_pad_ib,
6167 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6168 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6169 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6170 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6171 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6172 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6173 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6174 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6175 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6176 };
6177 
6178 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6179 	.type = AMDGPU_RING_TYPE_COMPUTE,
6180 	.align_mask = 0xff,
6181 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6182 	.support_64bit_ptrs = true,
6183 	.vmhub = AMDGPU_GFXHUB_0,
6184 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6185 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6186 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6187 	.emit_frame_size =
6188 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6189 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6190 		5 + /* hdp invalidate */
6191 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6192 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6193 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6194 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6195 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6196 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6197 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6198 	.emit_fence = gfx_v9_0_ring_emit_fence,
6199 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6200 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6201 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6202 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6203 	.test_ring = gfx_v9_0_ring_test_ring,
6204 	.test_ib = gfx_v9_0_ring_test_ib,
6205 	.insert_nop = amdgpu_ring_insert_nop,
6206 	.pad_ib = amdgpu_ring_generic_pad_ib,
6207 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6208 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6209 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6210 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6211 };
6212 
6213 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6214 	.type = AMDGPU_RING_TYPE_KIQ,
6215 	.align_mask = 0xff,
6216 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6217 	.support_64bit_ptrs = true,
6218 	.vmhub = AMDGPU_GFXHUB_0,
6219 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6220 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6221 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6222 	.emit_frame_size =
6223 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6224 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6225 		5 + /* hdp invalidate */
6226 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6227 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6228 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6229 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6230 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6231 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6232 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6233 	.test_ring = gfx_v9_0_ring_test_ring,
6234 	.insert_nop = amdgpu_ring_insert_nop,
6235 	.pad_ib = amdgpu_ring_generic_pad_ib,
6236 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6237 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6238 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6239 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6240 };
6241 
6242 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6243 {
6244 	int i;
6245 
6246 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6247 
6248 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6249 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6250 
6251 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6252 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6253 }
6254 
6255 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6256 	.set = gfx_v9_0_set_eop_interrupt_state,
6257 	.process = gfx_v9_0_eop_irq,
6258 };
6259 
6260 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6261 	.set = gfx_v9_0_set_priv_reg_fault_state,
6262 	.process = gfx_v9_0_priv_reg_irq,
6263 };
6264 
6265 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6266 	.set = gfx_v9_0_set_priv_inst_fault_state,
6267 	.process = gfx_v9_0_priv_inst_irq,
6268 };
6269 
6270 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6271 	.set = gfx_v9_0_set_cp_ecc_error_state,
6272 	.process = gfx_v9_0_cp_ecc_error_irq,
6273 };
6274 
6275 
6276 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6277 {
6278 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6279 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6280 
6281 	adev->gfx.priv_reg_irq.num_types = 1;
6282 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6283 
6284 	adev->gfx.priv_inst_irq.num_types = 1;
6285 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6286 
6287 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6288 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6289 }
6290 
6291 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6292 {
6293 	switch (adev->asic_type) {
6294 	case CHIP_VEGA10:
6295 	case CHIP_VEGA12:
6296 	case CHIP_VEGA20:
6297 	case CHIP_RAVEN:
6298 	case CHIP_ARCTURUS:
6299 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6300 		break;
6301 	default:
6302 		break;
6303 	}
6304 }
6305 
6306 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6307 {
6308 	/* init asci gds info */
6309 	switch (adev->asic_type) {
6310 	case CHIP_VEGA10:
6311 	case CHIP_VEGA12:
6312 	case CHIP_VEGA20:
6313 		adev->gds.gds_size = 0x10000;
6314 		break;
6315 	case CHIP_RAVEN:
6316 	case CHIP_ARCTURUS:
6317 		adev->gds.gds_size = 0x1000;
6318 		break;
6319 	default:
6320 		adev->gds.gds_size = 0x10000;
6321 		break;
6322 	}
6323 
6324 	switch (adev->asic_type) {
6325 	case CHIP_VEGA10:
6326 	case CHIP_VEGA20:
6327 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6328 		break;
6329 	case CHIP_VEGA12:
6330 		adev->gds.gds_compute_max_wave_id = 0x27f;
6331 		break;
6332 	case CHIP_RAVEN:
6333 		if (adev->rev_id >= 0x8)
6334 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6335 		else
6336 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6337 		break;
6338 	case CHIP_ARCTURUS:
6339 		adev->gds.gds_compute_max_wave_id = 0xfff;
6340 		break;
6341 	default:
6342 		/* this really depends on the chip */
6343 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6344 		break;
6345 	}
6346 
6347 	adev->gds.gws_size = 64;
6348 	adev->gds.oa_size = 16;
6349 }
6350 
6351 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6352 						 u32 bitmap)
6353 {
6354 	u32 data;
6355 
6356 	if (!bitmap)
6357 		return;
6358 
6359 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6360 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6361 
6362 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6363 }
6364 
6365 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6366 {
6367 	u32 data, mask;
6368 
6369 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6370 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6371 
6372 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6373 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6374 
6375 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6376 
6377 	return (~data) & mask;
6378 }
6379 
6380 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6381 				 struct amdgpu_cu_info *cu_info)
6382 {
6383 	int i, j, k, counter, active_cu_number = 0;
6384 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6385 	unsigned disable_masks[4 * 4];
6386 
6387 	if (!adev || !cu_info)
6388 		return -EINVAL;
6389 
6390 	/*
6391 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6392 	 */
6393 	if (adev->gfx.config.max_shader_engines *
6394 		adev->gfx.config.max_sh_per_se > 16)
6395 		return -EINVAL;
6396 
6397 	amdgpu_gfx_parse_disable_cu(disable_masks,
6398 				    adev->gfx.config.max_shader_engines,
6399 				    adev->gfx.config.max_sh_per_se);
6400 
6401 	mutex_lock(&adev->grbm_idx_mutex);
6402 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6403 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6404 			mask = 1;
6405 			ao_bitmap = 0;
6406 			counter = 0;
6407 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6408 			gfx_v9_0_set_user_cu_inactive_bitmap(
6409 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6410 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6411 
6412 			/*
6413 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6414 			 * 4x4 size array, and it's usually suitable for Vega
6415 			 * ASICs which has 4*2 SE/SH layout.
6416 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6417 			 * To mostly reduce the impact, we make it compatible
6418 			 * with current bitmap array as below:
6419 			 *    SE4,SH0 --> bitmap[0][1]
6420 			 *    SE5,SH0 --> bitmap[1][1]
6421 			 *    SE6,SH0 --> bitmap[2][1]
6422 			 *    SE7,SH0 --> bitmap[3][1]
6423 			 */
6424 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6425 
6426 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6427 				if (bitmap & mask) {
6428 					if (counter < adev->gfx.config.max_cu_per_sh)
6429 						ao_bitmap |= mask;
6430 					counter ++;
6431 				}
6432 				mask <<= 1;
6433 			}
6434 			active_cu_number += counter;
6435 			if (i < 2 && j < 2)
6436 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6437 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6438 		}
6439 	}
6440 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6441 	mutex_unlock(&adev->grbm_idx_mutex);
6442 
6443 	cu_info->number = active_cu_number;
6444 	cu_info->ao_cu_mask = ao_cu_mask;
6445 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6446 
6447 	return 0;
6448 }
6449 
6450 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6451 {
6452 	.type = AMD_IP_BLOCK_TYPE_GFX,
6453 	.major = 9,
6454 	.minor = 0,
6455 	.rev = 0,
6456 	.funcs = &gfx_v9_0_ip_funcs,
6457 };
6458