xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision a44e4f3ab16bc808590763a543a93b6fbf3abcc4)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42 
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65 
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72 
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79 
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86 
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93 
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101 
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109 
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113 
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120 
121 #define mmTCP_CHAN_STEER_0_ARCT								0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX							0
123 #define mmTCP_CHAN_STEER_1_ARCT								0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX							0
125 #define mmTCP_CHAN_STEER_2_ARCT								0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX							0
127 #define mmTCP_CHAN_STEER_3_ARCT								0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX							0
129 #define mmTCP_CHAN_STEER_4_ARCT								0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX							0
131 #define mmTCP_CHAN_STEER_5_ARCT								0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX							0
133 
134 enum ta_ras_gfx_subblock {
135 	/*CPC*/
136 	TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137 	TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138 	TA_RAS_BLOCK__GFX_CPC_UCODE,
139 	TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142 	TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143 	TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144 	TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145 	TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146 	/* CPF*/
147 	TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149 	TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150 	TA_RAS_BLOCK__GFX_CPF_TAG,
151 	TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152 	/* CPG*/
153 	TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154 	TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155 	TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156 	TA_RAS_BLOCK__GFX_CPG_TAG,
157 	TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158 	/* GDS*/
159 	TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160 	TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161 	TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163 	TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164 	TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165 	TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166 	/* SPI*/
167 	TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168 	/* SQ*/
169 	TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170 	TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171 	TA_RAS_BLOCK__GFX_SQ_LDS_D,
172 	TA_RAS_BLOCK__GFX_SQ_LDS_I,
173 	TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174 	TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175 	/* SQC (3 ranges)*/
176 	TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177 	/* SQC range 0*/
178 	TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179 	TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180 		TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182 	TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184 	TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186 	TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187 	TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188 		TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189 	/* SQC range 1*/
190 	TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192 		TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195 	TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201 	TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203 	/* SQC range 2*/
204 	TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206 		TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209 	TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214 	TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215 	TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216 		TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217 	TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218 	/* TA*/
219 	TA_RAS_BLOCK__GFX_TA_INDEX_START,
220 	TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221 	TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222 	TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223 	TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224 	TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225 	TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226 	/* TCA*/
227 	TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228 	TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229 	TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230 	TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231 	/* TCC (5 sub-ranges)*/
232 	TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233 	/* TCC range 0*/
234 	TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238 	TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240 	TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241 	TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242 	TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243 	TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244 	/* TCC range 1*/
245 	TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246 	TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247 	TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248 	TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249 		TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250 	/* TCC range 2*/
251 	TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252 	TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253 	TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254 	TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255 	TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256 	TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258 	TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259 	TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260 	TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261 		TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262 	/* TCC range 3*/
263 	TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265 	TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266 	TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267 		TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268 	/* TCC range 4*/
269 	TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270 	TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271 		TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272 	TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273 	TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274 		TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275 	TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276 	/* TCI*/
277 	TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278 	/* TCP*/
279 	TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280 	TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281 	TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282 	TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283 	TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284 	TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286 	TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287 	TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288 	/* TD*/
289 	TA_RAS_BLOCK__GFX_TD_INDEX_START,
290 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291 	TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292 	TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293 	TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294 	/* EA (3 sub-ranges)*/
295 	TA_RAS_BLOCK__GFX_EA_INDEX_START,
296 	/* EA range 0*/
297 	TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298 	TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299 	TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300 	TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301 	TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302 	TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303 	TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304 	TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305 	TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306 	TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307 	/* EA range 1*/
308 	TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309 	TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310 	TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311 	TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312 	TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313 	TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314 	TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315 	TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316 	TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317 	/* EA range 2*/
318 	TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319 	TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320 	TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321 	TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322 	TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323 	TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324 	TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325 	/* UTC VM L2 bank*/
326 	TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327 	/* UTC VM walker*/
328 	TA_RAS_BLOCK__UTC_VML2_WALKER,
329 	/* UTC ATC L2 2MB cache*/
330 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331 	/* UTC ATC L2 4KB cache*/
332 	TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333 	TA_RAS_BLOCK__GFX_MAX
334 };
335 
336 struct ras_gfx_subblock {
337 	unsigned char *name;
338 	int ta_subblock;
339 	int hw_supported_error_type;
340 	int sw_supported_error_type;
341 };
342 
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344 	[AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345 		#subblock,                                                     \
346 		TA_RAS_BLOCK__##subblock,                                      \
347 		((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348 		(((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349 	}
350 
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353 	AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359 	AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362 	AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365 	AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369 			     0),
370 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371 			     0),
372 	AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373 	AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377 	AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380 			     0, 0),
381 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382 			     0),
383 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384 			     0, 0),
385 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386 			     0),
387 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388 			     0, 0),
389 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390 			     0),
391 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392 			     1),
393 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394 			     0, 0, 0),
395 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396 			     0),
397 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398 			     0),
399 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400 			     0),
401 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402 			     0),
403 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404 			     0),
405 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406 			     0, 0),
407 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408 			     0),
409 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410 			     0),
411 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412 			     0, 0, 0),
413 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414 			     0),
415 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416 			     0),
417 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418 			     0),
419 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420 			     0),
421 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422 			     0),
423 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424 			     0, 0),
425 	AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426 			     0),
427 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431 	AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433 	AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436 			     1),
437 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438 			     1),
439 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440 			     1),
441 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442 			     0),
443 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444 			     0),
445 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457 			     0),
458 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460 			     0),
461 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462 			     0, 0),
463 	AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464 			     0),
465 	AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472 	AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475 	AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494 	AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496 	AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498 	AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500 
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
521 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
522 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
523 };
524 
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546 
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561 
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
586 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
587 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
588 };
589 
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600 
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623 
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639 
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646 
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666 
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
680 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
681 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
682 };
683 
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 };
695 
696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
697 {
698 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
699 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
700 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 };
707 
708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
709 {
710 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
711 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
712 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 };
719 
720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
724 
725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
730                                  struct amdgpu_cu_info *cu_info);
731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
736 					  void *ras_error_status);
737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
738 				     void *inject_if);
739 
740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
741 {
742 	switch (adev->asic_type) {
743 	case CHIP_VEGA10:
744 		soc15_program_register_sequence(adev,
745 						golden_settings_gc_9_0,
746 						ARRAY_SIZE(golden_settings_gc_9_0));
747 		soc15_program_register_sequence(adev,
748 						golden_settings_gc_9_0_vg10,
749 						ARRAY_SIZE(golden_settings_gc_9_0_vg10));
750 		break;
751 	case CHIP_VEGA12:
752 		soc15_program_register_sequence(adev,
753 						golden_settings_gc_9_2_1,
754 						ARRAY_SIZE(golden_settings_gc_9_2_1));
755 		soc15_program_register_sequence(adev,
756 						golden_settings_gc_9_2_1_vg12,
757 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
758 		break;
759 	case CHIP_VEGA20:
760 		soc15_program_register_sequence(adev,
761 						golden_settings_gc_9_0,
762 						ARRAY_SIZE(golden_settings_gc_9_0));
763 		soc15_program_register_sequence(adev,
764 						golden_settings_gc_9_0_vg20,
765 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
766 		break;
767 	case CHIP_ARCTURUS:
768 		soc15_program_register_sequence(adev,
769 						golden_settings_gc_9_4_1_arct,
770 						ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
771 		break;
772 	case CHIP_RAVEN:
773 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
774 						ARRAY_SIZE(golden_settings_gc_9_1));
775 		if (adev->rev_id >= 8)
776 			soc15_program_register_sequence(adev,
777 							golden_settings_gc_9_1_rv2,
778 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
779 		else
780 			soc15_program_register_sequence(adev,
781 							golden_settings_gc_9_1_rv1,
782 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
783 		break;
784 	 case CHIP_RENOIR:
785 		soc15_program_register_sequence(adev,
786 						golden_settings_gc_9_1_rn,
787 						ARRAY_SIZE(golden_settings_gc_9_1_rn));
788 		return; /* for renoir, don't need common goldensetting */
789 	default:
790 		break;
791 	}
792 
793 	if (adev->asic_type != CHIP_ARCTURUS)
794 		soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
795 						(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
796 }
797 
798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
799 {
800 	adev->gfx.scratch.num_reg = 8;
801 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
802 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
803 }
804 
805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
806 				       bool wc, uint32_t reg, uint32_t val)
807 {
808 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
809 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
810 				WRITE_DATA_DST_SEL(0) |
811 				(wc ? WR_CONFIRM : 0));
812 	amdgpu_ring_write(ring, reg);
813 	amdgpu_ring_write(ring, 0);
814 	amdgpu_ring_write(ring, val);
815 }
816 
817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
818 				  int mem_space, int opt, uint32_t addr0,
819 				  uint32_t addr1, uint32_t ref, uint32_t mask,
820 				  uint32_t inv)
821 {
822 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
823 	amdgpu_ring_write(ring,
824 				 /* memory (1) or register (0) */
825 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
826 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
827 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
828 				 WAIT_REG_MEM_ENGINE(eng_sel)));
829 
830 	if (mem_space)
831 		BUG_ON(addr0 & 0x3); /* Dword align */
832 	amdgpu_ring_write(ring, addr0);
833 	amdgpu_ring_write(ring, addr1);
834 	amdgpu_ring_write(ring, ref);
835 	amdgpu_ring_write(ring, mask);
836 	amdgpu_ring_write(ring, inv); /* poll interval */
837 }
838 
839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
840 {
841 	struct amdgpu_device *adev = ring->adev;
842 	uint32_t scratch;
843 	uint32_t tmp = 0;
844 	unsigned i;
845 	int r;
846 
847 	r = amdgpu_gfx_scratch_get(adev, &scratch);
848 	if (r)
849 		return r;
850 
851 	WREG32(scratch, 0xCAFEDEAD);
852 	r = amdgpu_ring_alloc(ring, 3);
853 	if (r)
854 		goto error_free_scratch;
855 
856 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
857 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
858 	amdgpu_ring_write(ring, 0xDEADBEEF);
859 	amdgpu_ring_commit(ring);
860 
861 	for (i = 0; i < adev->usec_timeout; i++) {
862 		tmp = RREG32(scratch);
863 		if (tmp == 0xDEADBEEF)
864 			break;
865 		udelay(1);
866 	}
867 
868 	if (i >= adev->usec_timeout)
869 		r = -ETIMEDOUT;
870 
871 error_free_scratch:
872 	amdgpu_gfx_scratch_free(adev, scratch);
873 	return r;
874 }
875 
876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 {
878 	struct amdgpu_device *adev = ring->adev;
879 	struct amdgpu_ib ib;
880 	struct dma_fence *f = NULL;
881 
882 	unsigned index;
883 	uint64_t gpu_addr;
884 	uint32_t tmp;
885 	long r;
886 
887 	r = amdgpu_device_wb_get(adev, &index);
888 	if (r)
889 		return r;
890 
891 	gpu_addr = adev->wb.gpu_addr + (index * 4);
892 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
893 	memset(&ib, 0, sizeof(ib));
894 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
895 	if (r)
896 		goto err1;
897 
898 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
899 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
900 	ib.ptr[2] = lower_32_bits(gpu_addr);
901 	ib.ptr[3] = upper_32_bits(gpu_addr);
902 	ib.ptr[4] = 0xDEADBEEF;
903 	ib.length_dw = 5;
904 
905 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
906 	if (r)
907 		goto err2;
908 
909 	r = dma_fence_wait_timeout(f, false, timeout);
910 	if (r == 0) {
911 		r = -ETIMEDOUT;
912 		goto err2;
913 	} else if (r < 0) {
914 		goto err2;
915 	}
916 
917 	tmp = adev->wb.wb[index];
918 	if (tmp == 0xDEADBEEF)
919 		r = 0;
920 	else
921 		r = -EINVAL;
922 
923 err2:
924 	amdgpu_ib_free(adev, &ib, NULL);
925 	dma_fence_put(f);
926 err1:
927 	amdgpu_device_wb_free(adev, index);
928 	return r;
929 }
930 
931 
932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
933 {
934 	release_firmware(adev->gfx.pfp_fw);
935 	adev->gfx.pfp_fw = NULL;
936 	release_firmware(adev->gfx.me_fw);
937 	adev->gfx.me_fw = NULL;
938 	release_firmware(adev->gfx.ce_fw);
939 	adev->gfx.ce_fw = NULL;
940 	release_firmware(adev->gfx.rlc_fw);
941 	adev->gfx.rlc_fw = NULL;
942 	release_firmware(adev->gfx.mec_fw);
943 	adev->gfx.mec_fw = NULL;
944 	release_firmware(adev->gfx.mec2_fw);
945 	adev->gfx.mec2_fw = NULL;
946 
947 	kfree(adev->gfx.rlc.register_list_format);
948 }
949 
950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
951 {
952 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
953 
954 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
955 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
956 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
957 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
958 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
959 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
960 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
961 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
962 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
963 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
964 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
965 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
966 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
967 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
968 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
969 }
970 
971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
972 {
973 	adev->gfx.me_fw_write_wait = false;
974 	adev->gfx.mec_fw_write_wait = false;
975 
976 	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
977 	    (adev->gfx.mec_feature_version < 46) ||
978 	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
979 	    (adev->gfx.pfp_feature_version < 46))
980 		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
981 			      GRBM requires 1-cycle delay in cp firmware\n");
982 
983 	switch (adev->asic_type) {
984 	case CHIP_VEGA10:
985 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
986 		    (adev->gfx.me_feature_version >= 42) &&
987 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
988 		    (adev->gfx.pfp_feature_version >= 42))
989 			adev->gfx.me_fw_write_wait = true;
990 
991 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
992 		    (adev->gfx.mec_feature_version >= 42))
993 			adev->gfx.mec_fw_write_wait = true;
994 		break;
995 	case CHIP_VEGA12:
996 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
997 		    (adev->gfx.me_feature_version >= 44) &&
998 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
999 		    (adev->gfx.pfp_feature_version >= 44))
1000 			adev->gfx.me_fw_write_wait = true;
1001 
1002 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1003 		    (adev->gfx.mec_feature_version >= 44))
1004 			adev->gfx.mec_fw_write_wait = true;
1005 		break;
1006 	case CHIP_VEGA20:
1007 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1008 		    (adev->gfx.me_feature_version >= 44) &&
1009 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1010 		    (adev->gfx.pfp_feature_version >= 44))
1011 			adev->gfx.me_fw_write_wait = true;
1012 
1013 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1014 		    (adev->gfx.mec_feature_version >= 44))
1015 			adev->gfx.mec_fw_write_wait = true;
1016 		break;
1017 	case CHIP_RAVEN:
1018 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1019 		    (adev->gfx.me_feature_version >= 42) &&
1020 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1021 		    (adev->gfx.pfp_feature_version >= 42))
1022 			adev->gfx.me_fw_write_wait = true;
1023 
1024 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1025 		    (adev->gfx.mec_feature_version >= 42))
1026 			adev->gfx.mec_fw_write_wait = true;
1027 		break;
1028 	default:
1029 		break;
1030 	}
1031 }
1032 
1033 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1034 {
1035 	switch (adev->asic_type) {
1036 	case CHIP_VEGA10:
1037 	case CHIP_VEGA12:
1038 	case CHIP_VEGA20:
1039 		break;
1040 	case CHIP_RAVEN:
1041 		if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1042 			&&((adev->gfx.rlc_fw_version != 106 &&
1043 			     adev->gfx.rlc_fw_version < 531) ||
1044 			    (adev->gfx.rlc_fw_version == 53815) ||
1045 			    (adev->gfx.rlc_feature_version < 1) ||
1046 			    !adev->gfx.rlc.is_rlc_v2_1))
1047 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1048 
1049 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1050 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1051 				AMD_PG_SUPPORT_CP |
1052 				AMD_PG_SUPPORT_RLC_SMU_HS;
1053 		break;
1054 	case CHIP_RENOIR:
1055 		if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1056 			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1057 				AMD_PG_SUPPORT_CP |
1058 				AMD_PG_SUPPORT_RLC_SMU_HS;
1059 		break;
1060 	default:
1061 		break;
1062 	}
1063 }
1064 
1065 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1066 					  const char *chip_name)
1067 {
1068 	char fw_name[30];
1069 	int err;
1070 	struct amdgpu_firmware_info *info = NULL;
1071 	const struct common_firmware_header *header = NULL;
1072 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1073 
1074 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1075 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1076 	if (err)
1077 		goto out;
1078 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1079 	if (err)
1080 		goto out;
1081 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1082 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1083 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1084 
1085 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1086 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1087 	if (err)
1088 		goto out;
1089 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
1090 	if (err)
1091 		goto out;
1092 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1093 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1094 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1095 
1096 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1097 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1098 	if (err)
1099 		goto out;
1100 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1101 	if (err)
1102 		goto out;
1103 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1104 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1105 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1106 
1107 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1108 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110 		info->fw = adev->gfx.pfp_fw;
1111 		header = (const struct common_firmware_header *)info->fw->data;
1112 		adev->firmware.fw_size +=
1113 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114 
1115 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117 		info->fw = adev->gfx.me_fw;
1118 		header = (const struct common_firmware_header *)info->fw->data;
1119 		adev->firmware.fw_size +=
1120 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121 
1122 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124 		info->fw = adev->gfx.ce_fw;
1125 		header = (const struct common_firmware_header *)info->fw->data;
1126 		adev->firmware.fw_size +=
1127 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128 	}
1129 
1130 out:
1131 	if (err) {
1132 		dev_err(adev->dev,
1133 			"gfx9: Failed to load firmware \"%s\"\n",
1134 			fw_name);
1135 		release_firmware(adev->gfx.pfp_fw);
1136 		adev->gfx.pfp_fw = NULL;
1137 		release_firmware(adev->gfx.me_fw);
1138 		adev->gfx.me_fw = NULL;
1139 		release_firmware(adev->gfx.ce_fw);
1140 		adev->gfx.ce_fw = NULL;
1141 	}
1142 	return err;
1143 }
1144 
1145 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1146 					  const char *chip_name)
1147 {
1148 	char fw_name[30];
1149 	int err;
1150 	struct amdgpu_firmware_info *info = NULL;
1151 	const struct common_firmware_header *header = NULL;
1152 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1153 	unsigned int *tmp = NULL;
1154 	unsigned int i = 0;
1155 	uint16_t version_major;
1156 	uint16_t version_minor;
1157 	uint32_t smu_version;
1158 
1159 	/*
1160 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1161 	 * instead of picasso_rlc.bin.
1162 	 * Judgment method:
1163 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1164 	 *          or revision >= 0xD8 && revision <= 0xDF
1165 	 * otherwise is PCO FP5
1166 	 */
1167 	if (!strcmp(chip_name, "picasso") &&
1168 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1169 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1170 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1171 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1172 		(smu_version >= 0x41e2b))
1173 		/**
1174 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1175 		*/
1176 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1177 	else
1178 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1179 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1180 	if (err)
1181 		goto out;
1182 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1183 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1184 
1185 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1186 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1187 	if (version_major == 2 && version_minor == 1)
1188 		adev->gfx.rlc.is_rlc_v2_1 = true;
1189 
1190 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1191 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1192 	adev->gfx.rlc.save_and_restore_offset =
1193 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
1194 	adev->gfx.rlc.clear_state_descriptor_offset =
1195 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1196 	adev->gfx.rlc.avail_scratch_ram_locations =
1197 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1198 	adev->gfx.rlc.reg_restore_list_size =
1199 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
1200 	adev->gfx.rlc.reg_list_format_start =
1201 			le32_to_cpu(rlc_hdr->reg_list_format_start);
1202 	adev->gfx.rlc.reg_list_format_separate_start =
1203 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1204 	adev->gfx.rlc.starting_offsets_start =
1205 			le32_to_cpu(rlc_hdr->starting_offsets_start);
1206 	adev->gfx.rlc.reg_list_format_size_bytes =
1207 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1208 	adev->gfx.rlc.reg_list_size_bytes =
1209 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1210 	adev->gfx.rlc.register_list_format =
1211 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1212 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1213 	if (!adev->gfx.rlc.register_list_format) {
1214 		err = -ENOMEM;
1215 		goto out;
1216 	}
1217 
1218 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1219 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1220 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1221 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
1222 
1223 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1224 
1225 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1226 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1227 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1228 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1229 
1230 	if (adev->gfx.rlc.is_rlc_v2_1)
1231 		gfx_v9_0_init_rlc_ext_microcode(adev);
1232 
1233 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1234 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1235 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1236 		info->fw = adev->gfx.rlc_fw;
1237 		header = (const struct common_firmware_header *)info->fw->data;
1238 		adev->firmware.fw_size +=
1239 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1240 
1241 		if (adev->gfx.rlc.is_rlc_v2_1 &&
1242 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1243 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1244 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1245 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1246 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1247 			info->fw = adev->gfx.rlc_fw;
1248 			adev->firmware.fw_size +=
1249 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1250 
1251 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1252 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1253 			info->fw = adev->gfx.rlc_fw;
1254 			adev->firmware.fw_size +=
1255 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1256 
1257 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1258 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1259 			info->fw = adev->gfx.rlc_fw;
1260 			adev->firmware.fw_size +=
1261 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1262 		}
1263 	}
1264 
1265 out:
1266 	if (err) {
1267 		dev_err(adev->dev,
1268 			"gfx9: Failed to load firmware \"%s\"\n",
1269 			fw_name);
1270 		release_firmware(adev->gfx.rlc_fw);
1271 		adev->gfx.rlc_fw = NULL;
1272 	}
1273 	return err;
1274 }
1275 
1276 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1277 					  const char *chip_name)
1278 {
1279 	char fw_name[30];
1280 	int err;
1281 	struct amdgpu_firmware_info *info = NULL;
1282 	const struct common_firmware_header *header = NULL;
1283 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1284 
1285 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1286 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1287 	if (err)
1288 		goto out;
1289 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1290 	if (err)
1291 		goto out;
1292 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1293 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1294 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1295 
1296 
1297 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1298 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1299 	if (!err) {
1300 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1301 		if (err)
1302 			goto out;
1303 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1304 		adev->gfx.mec2_fw->data;
1305 		adev->gfx.mec2_fw_version =
1306 		le32_to_cpu(cp_hdr->header.ucode_version);
1307 		adev->gfx.mec2_feature_version =
1308 		le32_to_cpu(cp_hdr->ucode_feature_version);
1309 	} else {
1310 		err = 0;
1311 		adev->gfx.mec2_fw = NULL;
1312 	}
1313 
1314 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1315 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1316 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1317 		info->fw = adev->gfx.mec_fw;
1318 		header = (const struct common_firmware_header *)info->fw->data;
1319 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1320 		adev->firmware.fw_size +=
1321 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1322 
1323 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1324 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1325 		info->fw = adev->gfx.mec_fw;
1326 		adev->firmware.fw_size +=
1327 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1328 
1329 		if (adev->gfx.mec2_fw) {
1330 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1331 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1332 			info->fw = adev->gfx.mec2_fw;
1333 			header = (const struct common_firmware_header *)info->fw->data;
1334 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1335 			adev->firmware.fw_size +=
1336 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1337 
1338 			/* TODO: Determine if MEC2 JT FW loading can be removed
1339 				 for all GFX V9 asic and above */
1340 			if (adev->asic_type != CHIP_ARCTURUS) {
1341 				info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1342 				info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1343 				info->fw = adev->gfx.mec2_fw;
1344 				adev->firmware.fw_size +=
1345 					ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1346 					PAGE_SIZE);
1347 			}
1348 		}
1349 	}
1350 
1351 out:
1352 	gfx_v9_0_check_if_need_gfxoff(adev);
1353 	gfx_v9_0_check_fw_write_wait(adev);
1354 	if (err) {
1355 		dev_err(adev->dev,
1356 			"gfx9: Failed to load firmware \"%s\"\n",
1357 			fw_name);
1358 		release_firmware(adev->gfx.mec_fw);
1359 		adev->gfx.mec_fw = NULL;
1360 		release_firmware(adev->gfx.mec2_fw);
1361 		adev->gfx.mec2_fw = NULL;
1362 	}
1363 	return err;
1364 }
1365 
1366 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1367 {
1368 	const char *chip_name;
1369 	int r;
1370 
1371 	DRM_DEBUG("\n");
1372 
1373 	switch (adev->asic_type) {
1374 	case CHIP_VEGA10:
1375 		chip_name = "vega10";
1376 		break;
1377 	case CHIP_VEGA12:
1378 		chip_name = "vega12";
1379 		break;
1380 	case CHIP_VEGA20:
1381 		chip_name = "vega20";
1382 		break;
1383 	case CHIP_RAVEN:
1384 		if (adev->rev_id >= 8)
1385 			chip_name = "raven2";
1386 		else if (adev->pdev->device == 0x15d8)
1387 			chip_name = "picasso";
1388 		else
1389 			chip_name = "raven";
1390 		break;
1391 	case CHIP_ARCTURUS:
1392 		chip_name = "arcturus";
1393 		break;
1394 	case CHIP_RENOIR:
1395 		chip_name = "renoir";
1396 		break;
1397 	default:
1398 		BUG();
1399 	}
1400 
1401 	/* No CPG in Arcturus */
1402 	if (adev->asic_type != CHIP_ARCTURUS) {
1403 		r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1404 		if (r)
1405 			return r;
1406 	}
1407 
1408 	r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1409 	if (r)
1410 		return r;
1411 
1412 	r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1413 	if (r)
1414 		return r;
1415 
1416 	return r;
1417 }
1418 
1419 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1420 {
1421 	u32 count = 0;
1422 	const struct cs_section_def *sect = NULL;
1423 	const struct cs_extent_def *ext = NULL;
1424 
1425 	/* begin clear state */
1426 	count += 2;
1427 	/* context control state */
1428 	count += 3;
1429 
1430 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1431 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1432 			if (sect->id == SECT_CONTEXT)
1433 				count += 2 + ext->reg_count;
1434 			else
1435 				return 0;
1436 		}
1437 	}
1438 
1439 	/* end clear state */
1440 	count += 2;
1441 	/* clear state */
1442 	count += 2;
1443 
1444 	return count;
1445 }
1446 
1447 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1448 				    volatile u32 *buffer)
1449 {
1450 	u32 count = 0, i;
1451 	const struct cs_section_def *sect = NULL;
1452 	const struct cs_extent_def *ext = NULL;
1453 
1454 	if (adev->gfx.rlc.cs_data == NULL)
1455 		return;
1456 	if (buffer == NULL)
1457 		return;
1458 
1459 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1460 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1461 
1462 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1463 	buffer[count++] = cpu_to_le32(0x80000000);
1464 	buffer[count++] = cpu_to_le32(0x80000000);
1465 
1466 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1467 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1468 			if (sect->id == SECT_CONTEXT) {
1469 				buffer[count++] =
1470 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1471 				buffer[count++] = cpu_to_le32(ext->reg_index -
1472 						PACKET3_SET_CONTEXT_REG_START);
1473 				for (i = 0; i < ext->reg_count; i++)
1474 					buffer[count++] = cpu_to_le32(ext->extent[i]);
1475 			} else {
1476 				return;
1477 			}
1478 		}
1479 	}
1480 
1481 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1482 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1483 
1484 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1485 	buffer[count++] = cpu_to_le32(0);
1486 }
1487 
1488 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1489 {
1490 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1491 	uint32_t pg_always_on_cu_num = 2;
1492 	uint32_t always_on_cu_num;
1493 	uint32_t i, j, k;
1494 	uint32_t mask, cu_bitmap, counter;
1495 
1496 	if (adev->flags & AMD_IS_APU)
1497 		always_on_cu_num = 4;
1498 	else if (adev->asic_type == CHIP_VEGA12)
1499 		always_on_cu_num = 8;
1500 	else
1501 		always_on_cu_num = 12;
1502 
1503 	mutex_lock(&adev->grbm_idx_mutex);
1504 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1505 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1506 			mask = 1;
1507 			cu_bitmap = 0;
1508 			counter = 0;
1509 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1510 
1511 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1512 				if (cu_info->bitmap[i][j] & mask) {
1513 					if (counter == pg_always_on_cu_num)
1514 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1515 					if (counter < always_on_cu_num)
1516 						cu_bitmap |= mask;
1517 					else
1518 						break;
1519 					counter++;
1520 				}
1521 				mask <<= 1;
1522 			}
1523 
1524 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1525 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1526 		}
1527 	}
1528 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1529 	mutex_unlock(&adev->grbm_idx_mutex);
1530 }
1531 
1532 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1533 {
1534 	uint32_t data;
1535 
1536 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1537 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1538 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1539 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1540 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1541 
1542 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1543 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1544 
1545 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1546 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1547 
1548 	mutex_lock(&adev->grbm_idx_mutex);
1549 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1550 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1551 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1552 
1553 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1554 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1555 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1556 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1557 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1558 
1559 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1560 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1561 	data &= 0x0000FFFF;
1562 	data |= 0x00C00000;
1563 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1564 
1565 	/*
1566 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1567 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1568 	 */
1569 
1570 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1571 	 * but used for RLC_LB_CNTL configuration */
1572 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1573 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1574 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1575 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1576 	mutex_unlock(&adev->grbm_idx_mutex);
1577 
1578 	gfx_v9_0_init_always_on_cu_mask(adev);
1579 }
1580 
1581 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1582 {
1583 	uint32_t data;
1584 
1585 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1586 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1587 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1588 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1589 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1590 
1591 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1592 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1593 
1594 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1595 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1596 
1597 	mutex_lock(&adev->grbm_idx_mutex);
1598 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1599 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1600 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1601 
1602 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1603 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1604 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1605 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1606 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1607 
1608 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1609 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1610 	data &= 0x0000FFFF;
1611 	data |= 0x00C00000;
1612 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1613 
1614 	/*
1615 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1616 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1617 	 */
1618 
1619 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1620 	 * but used for RLC_LB_CNTL configuration */
1621 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1622 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1623 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1624 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1625 	mutex_unlock(&adev->grbm_idx_mutex);
1626 
1627 	gfx_v9_0_init_always_on_cu_mask(adev);
1628 }
1629 
1630 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1631 {
1632 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1633 }
1634 
1635 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1636 {
1637 	return 5;
1638 }
1639 
1640 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1641 {
1642 	const struct cs_section_def *cs_data;
1643 	int r;
1644 
1645 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1646 
1647 	cs_data = adev->gfx.rlc.cs_data;
1648 
1649 	if (cs_data) {
1650 		/* init clear state block */
1651 		r = amdgpu_gfx_rlc_init_csb(adev);
1652 		if (r)
1653 			return r;
1654 	}
1655 
1656 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1657 		/* TODO: double check the cp_table_size for RV */
1658 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1659 		r = amdgpu_gfx_rlc_init_cpt(adev);
1660 		if (r)
1661 			return r;
1662 	}
1663 
1664 	switch (adev->asic_type) {
1665 	case CHIP_RAVEN:
1666 		gfx_v9_0_init_lbpw(adev);
1667 		break;
1668 	case CHIP_VEGA20:
1669 		gfx_v9_4_init_lbpw(adev);
1670 		break;
1671 	default:
1672 		break;
1673 	}
1674 
1675 	return 0;
1676 }
1677 
1678 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1679 {
1680 	int r;
1681 
1682 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1683 	if (unlikely(r != 0))
1684 		return r;
1685 
1686 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1687 			AMDGPU_GEM_DOMAIN_VRAM);
1688 	if (!r)
1689 		adev->gfx.rlc.clear_state_gpu_addr =
1690 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1691 
1692 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1693 
1694 	return r;
1695 }
1696 
1697 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1698 {
1699 	int r;
1700 
1701 	if (!adev->gfx.rlc.clear_state_obj)
1702 		return;
1703 
1704 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1705 	if (likely(r == 0)) {
1706 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1707 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1708 	}
1709 }
1710 
1711 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1712 {
1713 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1714 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1715 }
1716 
1717 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1718 {
1719 	int r;
1720 	u32 *hpd;
1721 	const __le32 *fw_data;
1722 	unsigned fw_size;
1723 	u32 *fw;
1724 	size_t mec_hpd_size;
1725 
1726 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1727 
1728 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1729 
1730 	/* take ownership of the relevant compute queues */
1731 	amdgpu_gfx_compute_queue_acquire(adev);
1732 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1733 
1734 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1735 				      AMDGPU_GEM_DOMAIN_VRAM,
1736 				      &adev->gfx.mec.hpd_eop_obj,
1737 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1738 				      (void **)&hpd);
1739 	if (r) {
1740 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1741 		gfx_v9_0_mec_fini(adev);
1742 		return r;
1743 	}
1744 
1745 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1746 
1747 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1748 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1749 
1750 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1751 
1752 	fw_data = (const __le32 *)
1753 		(adev->gfx.mec_fw->data +
1754 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1755 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1756 
1757 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1758 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1759 				      &adev->gfx.mec.mec_fw_obj,
1760 				      &adev->gfx.mec.mec_fw_gpu_addr,
1761 				      (void **)&fw);
1762 	if (r) {
1763 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1764 		gfx_v9_0_mec_fini(adev);
1765 		return r;
1766 	}
1767 
1768 	memcpy(fw, fw_data, fw_size);
1769 
1770 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1771 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1772 
1773 	return 0;
1774 }
1775 
1776 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1777 {
1778 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1779 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1780 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1781 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1782 		(SQ_IND_INDEX__FORCE_READ_MASK));
1783 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1784 }
1785 
1786 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1787 			   uint32_t wave, uint32_t thread,
1788 			   uint32_t regno, uint32_t num, uint32_t *out)
1789 {
1790 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1791 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1792 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1793 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1794 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1795 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1796 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1797 	while (num--)
1798 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1799 }
1800 
1801 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1802 {
1803 	/* type 1 wave data */
1804 	dst[(*no_fields)++] = 1;
1805 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1806 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1807 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1808 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1809 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1810 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1811 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1812 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1813 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1814 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1815 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1816 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1817 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1818 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1819 }
1820 
1821 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1822 				     uint32_t wave, uint32_t start,
1823 				     uint32_t size, uint32_t *dst)
1824 {
1825 	wave_read_regs(
1826 		adev, simd, wave, 0,
1827 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1828 }
1829 
1830 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1831 				     uint32_t wave, uint32_t thread,
1832 				     uint32_t start, uint32_t size,
1833 				     uint32_t *dst)
1834 {
1835 	wave_read_regs(
1836 		adev, simd, wave, thread,
1837 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1838 }
1839 
1840 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1841 				  u32 me, u32 pipe, u32 q, u32 vm)
1842 {
1843 	soc15_grbm_select(adev, me, pipe, q, vm);
1844 }
1845 
1846 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1847 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1848 	.select_se_sh = &gfx_v9_0_select_se_sh,
1849 	.read_wave_data = &gfx_v9_0_read_wave_data,
1850 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1851 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1852 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1853 	.ras_error_inject = &gfx_v9_0_ras_error_inject,
1854 	.query_ras_error_count = &gfx_v9_0_query_ras_error_count
1855 };
1856 
1857 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1858 {
1859 	u32 gb_addr_config;
1860 	int err;
1861 
1862 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1863 
1864 	switch (adev->asic_type) {
1865 	case CHIP_VEGA10:
1866 		adev->gfx.config.max_hw_contexts = 8;
1867 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1871 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1872 		break;
1873 	case CHIP_VEGA12:
1874 		adev->gfx.config.max_hw_contexts = 8;
1875 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1879 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1880 		DRM_INFO("fix gfx.config for vega12\n");
1881 		break;
1882 	case CHIP_VEGA20:
1883 		adev->gfx.config.max_hw_contexts = 8;
1884 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1888 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1889 		gb_addr_config &= ~0xf3e777ff;
1890 		gb_addr_config |= 0x22014042;
1891 		/* check vbios table if gpu info is not available */
1892 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1893 		if (err)
1894 			return err;
1895 		break;
1896 	case CHIP_RAVEN:
1897 		adev->gfx.config.max_hw_contexts = 8;
1898 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1902 		if (adev->rev_id >= 8)
1903 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1904 		else
1905 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1906 		break;
1907 	case CHIP_ARCTURUS:
1908 		adev->gfx.config.max_hw_contexts = 8;
1909 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1910 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1911 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1912 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1913 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1914 		gb_addr_config &= ~0xf3e777ff;
1915 		gb_addr_config |= 0x22014042;
1916 		break;
1917 	case CHIP_RENOIR:
1918 		adev->gfx.config.max_hw_contexts = 8;
1919 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1920 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1921 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1922 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1923 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1924 		gb_addr_config &= ~0xf3e777ff;
1925 		gb_addr_config |= 0x22010042;
1926 		break;
1927 	default:
1928 		BUG();
1929 		break;
1930 	}
1931 
1932 	adev->gfx.config.gb_addr_config = gb_addr_config;
1933 
1934 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1935 			REG_GET_FIELD(
1936 					adev->gfx.config.gb_addr_config,
1937 					GB_ADDR_CONFIG,
1938 					NUM_PIPES);
1939 
1940 	adev->gfx.config.max_tile_pipes =
1941 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1942 
1943 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1944 			REG_GET_FIELD(
1945 					adev->gfx.config.gb_addr_config,
1946 					GB_ADDR_CONFIG,
1947 					NUM_BANKS);
1948 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1949 			REG_GET_FIELD(
1950 					adev->gfx.config.gb_addr_config,
1951 					GB_ADDR_CONFIG,
1952 					MAX_COMPRESSED_FRAGS);
1953 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1954 			REG_GET_FIELD(
1955 					adev->gfx.config.gb_addr_config,
1956 					GB_ADDR_CONFIG,
1957 					NUM_RB_PER_SE);
1958 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1959 			REG_GET_FIELD(
1960 					adev->gfx.config.gb_addr_config,
1961 					GB_ADDR_CONFIG,
1962 					NUM_SHADER_ENGINES);
1963 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1964 			REG_GET_FIELD(
1965 					adev->gfx.config.gb_addr_config,
1966 					GB_ADDR_CONFIG,
1967 					PIPE_INTERLEAVE_SIZE));
1968 
1969 	return 0;
1970 }
1971 
1972 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1973 				   struct amdgpu_ngg_buf *ngg_buf,
1974 				   int size_se,
1975 				   int default_size_se)
1976 {
1977 	int r;
1978 
1979 	if (size_se < 0) {
1980 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1981 		return -EINVAL;
1982 	}
1983 	size_se = size_se ? size_se : default_size_se;
1984 
1985 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1986 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1987 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1988 				    &ngg_buf->bo,
1989 				    &ngg_buf->gpu_addr,
1990 				    NULL);
1991 	if (r) {
1992 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1993 		return r;
1994 	}
1995 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1996 
1997 	return r;
1998 }
1999 
2000 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
2001 {
2002 	int i;
2003 
2004 	for (i = 0; i < NGG_BUF_MAX; i++)
2005 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
2006 				      &adev->gfx.ngg.buf[i].gpu_addr,
2007 				      NULL);
2008 
2009 	memset(&adev->gfx.ngg.buf[0], 0,
2010 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
2011 
2012 	adev->gfx.ngg.init = false;
2013 
2014 	return 0;
2015 }
2016 
2017 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2018 {
2019 	int r;
2020 
2021 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2022 		return 0;
2023 
2024 	/* GDS reserve memory: 64 bytes alignment */
2025 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2026 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2027 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2028 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2029 
2030 	/* Primitive Buffer */
2031 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2032 				    amdgpu_prim_buf_per_se,
2033 				    64 * 1024);
2034 	if (r) {
2035 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2036 		goto err;
2037 	}
2038 
2039 	/* Position Buffer */
2040 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2041 				    amdgpu_pos_buf_per_se,
2042 				    256 * 1024);
2043 	if (r) {
2044 		dev_err(adev->dev, "Failed to create Position Buffer\n");
2045 		goto err;
2046 	}
2047 
2048 	/* Control Sideband */
2049 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2050 				    amdgpu_cntl_sb_buf_per_se,
2051 				    256);
2052 	if (r) {
2053 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2054 		goto err;
2055 	}
2056 
2057 	/* Parameter Cache, not created by default */
2058 	if (amdgpu_param_buf_per_se <= 0)
2059 		goto out;
2060 
2061 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2062 				    amdgpu_param_buf_per_se,
2063 				    512 * 1024);
2064 	if (r) {
2065 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
2066 		goto err;
2067 	}
2068 
2069 out:
2070 	adev->gfx.ngg.init = true;
2071 	return 0;
2072 err:
2073 	gfx_v9_0_ngg_fini(adev);
2074 	return r;
2075 }
2076 
2077 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2078 {
2079 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2080 	int r;
2081 	u32 data, base;
2082 
2083 	if (!amdgpu_ngg)
2084 		return 0;
2085 
2086 	/* Program buffer size */
2087 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2088 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2089 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2090 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
2091 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2092 
2093 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2094 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2095 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2096 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2097 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2098 
2099 	/* Program buffer base address */
2100 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2101 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2102 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2103 
2104 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2105 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2106 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2107 
2108 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2109 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2110 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2111 
2112 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2113 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2114 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2115 
2116 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2117 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2118 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2119 
2120 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2121 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2122 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2123 
2124 	/* Clear GDS reserved memory */
2125 	r = amdgpu_ring_alloc(ring, 17);
2126 	if (r) {
2127 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2128 			  ring->name, r);
2129 		return r;
2130 	}
2131 
2132 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2133 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2134 			           (adev->gds.gds_size +
2135 				    adev->gfx.ngg.gds_reserve_size));
2136 
2137 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2138 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2139 				PACKET3_DMA_DATA_DST_SEL(1) |
2140 				PACKET3_DMA_DATA_SRC_SEL(2)));
2141 	amdgpu_ring_write(ring, 0);
2142 	amdgpu_ring_write(ring, 0);
2143 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2144 	amdgpu_ring_write(ring, 0);
2145 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2146 				adev->gfx.ngg.gds_reserve_size);
2147 
2148 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2149 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2150 
2151 	amdgpu_ring_commit(ring);
2152 
2153 	return 0;
2154 }
2155 
2156 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2157 				      int mec, int pipe, int queue)
2158 {
2159 	int r;
2160 	unsigned irq_type;
2161 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2162 
2163 	ring = &adev->gfx.compute_ring[ring_id];
2164 
2165 	/* mec0 is me1 */
2166 	ring->me = mec + 1;
2167 	ring->pipe = pipe;
2168 	ring->queue = queue;
2169 
2170 	ring->ring_obj = NULL;
2171 	ring->use_doorbell = true;
2172 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2173 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2174 				+ (ring_id * GFX9_MEC_HPD_SIZE);
2175 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2176 
2177 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2178 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2179 		+ ring->pipe;
2180 
2181 	/* type-2 packets are deprecated on MEC, use type-3 instead */
2182 	r = amdgpu_ring_init(adev, ring, 1024,
2183 			     &adev->gfx.eop_irq, irq_type);
2184 	if (r)
2185 		return r;
2186 
2187 
2188 	return 0;
2189 }
2190 
2191 static int gfx_v9_0_sw_init(void *handle)
2192 {
2193 	int i, j, k, r, ring_id;
2194 	struct amdgpu_ring *ring;
2195 	struct amdgpu_kiq *kiq;
2196 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2197 
2198 	switch (adev->asic_type) {
2199 	case CHIP_VEGA10:
2200 	case CHIP_VEGA12:
2201 	case CHIP_VEGA20:
2202 	case CHIP_RAVEN:
2203 	case CHIP_ARCTURUS:
2204 	case CHIP_RENOIR:
2205 		adev->gfx.mec.num_mec = 2;
2206 		break;
2207 	default:
2208 		adev->gfx.mec.num_mec = 1;
2209 		break;
2210 	}
2211 
2212 	adev->gfx.mec.num_pipe_per_mec = 4;
2213 	adev->gfx.mec.num_queue_per_pipe = 8;
2214 
2215 	/* EOP Event */
2216 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2217 	if (r)
2218 		return r;
2219 
2220 	/* Privileged reg */
2221 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2222 			      &adev->gfx.priv_reg_irq);
2223 	if (r)
2224 		return r;
2225 
2226 	/* Privileged inst */
2227 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2228 			      &adev->gfx.priv_inst_irq);
2229 	if (r)
2230 		return r;
2231 
2232 	/* ECC error */
2233 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2234 			      &adev->gfx.cp_ecc_error_irq);
2235 	if (r)
2236 		return r;
2237 
2238 	/* FUE error */
2239 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2240 			      &adev->gfx.cp_ecc_error_irq);
2241 	if (r)
2242 		return r;
2243 
2244 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2245 
2246 	gfx_v9_0_scratch_init(adev);
2247 
2248 	r = gfx_v9_0_init_microcode(adev);
2249 	if (r) {
2250 		DRM_ERROR("Failed to load gfx firmware!\n");
2251 		return r;
2252 	}
2253 
2254 	r = adev->gfx.rlc.funcs->init(adev);
2255 	if (r) {
2256 		DRM_ERROR("Failed to init rlc BOs!\n");
2257 		return r;
2258 	}
2259 
2260 	r = gfx_v9_0_mec_init(adev);
2261 	if (r) {
2262 		DRM_ERROR("Failed to init MEC BOs!\n");
2263 		return r;
2264 	}
2265 
2266 	/* set up the gfx ring */
2267 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2268 		ring = &adev->gfx.gfx_ring[i];
2269 		ring->ring_obj = NULL;
2270 		if (!i)
2271 			sprintf(ring->name, "gfx");
2272 		else
2273 			sprintf(ring->name, "gfx_%d", i);
2274 		ring->use_doorbell = true;
2275 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2276 		r = amdgpu_ring_init(adev, ring, 1024,
2277 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2278 		if (r)
2279 			return r;
2280 	}
2281 
2282 	/* set up the compute queues - allocate horizontally across pipes */
2283 	ring_id = 0;
2284 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2285 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2286 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2287 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2288 					continue;
2289 
2290 				r = gfx_v9_0_compute_ring_init(adev,
2291 							       ring_id,
2292 							       i, k, j);
2293 				if (r)
2294 					return r;
2295 
2296 				ring_id++;
2297 			}
2298 		}
2299 	}
2300 
2301 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2302 	if (r) {
2303 		DRM_ERROR("Failed to init KIQ BOs!\n");
2304 		return r;
2305 	}
2306 
2307 	kiq = &adev->gfx.kiq;
2308 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2309 	if (r)
2310 		return r;
2311 
2312 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
2313 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2314 	if (r)
2315 		return r;
2316 
2317 	adev->gfx.ce_ram_size = 0x8000;
2318 
2319 	r = gfx_v9_0_gpu_early_init(adev);
2320 	if (r)
2321 		return r;
2322 
2323 	r = gfx_v9_0_ngg_init(adev);
2324 	if (r)
2325 		return r;
2326 
2327 	return 0;
2328 }
2329 
2330 
2331 static int gfx_v9_0_sw_fini(void *handle)
2332 {
2333 	int i;
2334 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2335 
2336 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2337 			adev->gfx.ras_if) {
2338 		struct ras_common_if *ras_if = adev->gfx.ras_if;
2339 		struct ras_ih_if ih_info = {
2340 			.head = *ras_if,
2341 		};
2342 
2343 		amdgpu_ras_debugfs_remove(adev, ras_if);
2344 		amdgpu_ras_sysfs_remove(adev, ras_if);
2345 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2346 		amdgpu_ras_feature_enable(adev, ras_if, 0);
2347 		kfree(ras_if);
2348 	}
2349 
2350 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2351 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2352 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
2353 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2354 
2355 	amdgpu_gfx_mqd_sw_fini(adev);
2356 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2357 	amdgpu_gfx_kiq_fini(adev);
2358 
2359 	gfx_v9_0_mec_fini(adev);
2360 	gfx_v9_0_ngg_fini(adev);
2361 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2362 	if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2363 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2364 				&adev->gfx.rlc.cp_table_gpu_addr,
2365 				(void **)&adev->gfx.rlc.cp_table_ptr);
2366 	}
2367 	gfx_v9_0_free_microcode(adev);
2368 
2369 	return 0;
2370 }
2371 
2372 
2373 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2374 {
2375 	/* TODO */
2376 }
2377 
2378 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2379 {
2380 	u32 data;
2381 
2382 	if (instance == 0xffffffff)
2383 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2384 	else
2385 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2386 
2387 	if (se_num == 0xffffffff)
2388 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2389 	else
2390 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2391 
2392 	if (sh_num == 0xffffffff)
2393 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2394 	else
2395 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2396 
2397 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2398 }
2399 
2400 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2401 {
2402 	u32 data, mask;
2403 
2404 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2405 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2406 
2407 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2408 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2409 
2410 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2411 					 adev->gfx.config.max_sh_per_se);
2412 
2413 	return (~data) & mask;
2414 }
2415 
2416 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2417 {
2418 	int i, j;
2419 	u32 data;
2420 	u32 active_rbs = 0;
2421 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2422 					adev->gfx.config.max_sh_per_se;
2423 
2424 	mutex_lock(&adev->grbm_idx_mutex);
2425 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2426 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2427 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2428 			data = gfx_v9_0_get_rb_active_bitmap(adev);
2429 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2430 					       rb_bitmap_width_per_sh);
2431 		}
2432 	}
2433 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2434 	mutex_unlock(&adev->grbm_idx_mutex);
2435 
2436 	adev->gfx.config.backend_enable_mask = active_rbs;
2437 	adev->gfx.config.num_rbs = hweight32(active_rbs);
2438 }
2439 
2440 #define DEFAULT_SH_MEM_BASES	(0x6000)
2441 #define FIRST_COMPUTE_VMID	(8)
2442 #define LAST_COMPUTE_VMID	(16)
2443 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2444 {
2445 	int i;
2446 	uint32_t sh_mem_config;
2447 	uint32_t sh_mem_bases;
2448 
2449 	/*
2450 	 * Configure apertures:
2451 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2452 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2453 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2454 	 */
2455 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2456 
2457 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2458 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2459 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2460 
2461 	mutex_lock(&adev->srbm_mutex);
2462 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2463 		soc15_grbm_select(adev, 0, 0, 0, i);
2464 		/* CP and shaders */
2465 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2466 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2467 	}
2468 	soc15_grbm_select(adev, 0, 0, 0, 0);
2469 	mutex_unlock(&adev->srbm_mutex);
2470 
2471 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
2472 	   acccess. These should be enabled by FW for target VMIDs. */
2473 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2474 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2475 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2476 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2477 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2478 	}
2479 }
2480 
2481 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2482 {
2483 	int vmid;
2484 
2485 	/*
2486 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2487 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2488 	 * the driver can enable them for graphics. VMID0 should maintain
2489 	 * access so that HWS firmware can save/restore entries.
2490 	 */
2491 	for (vmid = 1; vmid < 16; vmid++) {
2492 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2493 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2494 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2495 		WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2496 	}
2497 }
2498 
2499 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2500 {
2501 	u32 tmp;
2502 	int i;
2503 
2504 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2505 
2506 	gfx_v9_0_tiling_mode_table_init(adev);
2507 
2508 	gfx_v9_0_setup_rb(adev);
2509 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2510 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2511 
2512 	/* XXX SH_MEM regs */
2513 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2514 	mutex_lock(&adev->srbm_mutex);
2515 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2516 		soc15_grbm_select(adev, 0, 0, 0, i);
2517 		/* CP and shaders */
2518 		if (i == 0) {
2519 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2520 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2521 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2522 					    !!amdgpu_noretry);
2523 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2524 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2525 		} else {
2526 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2527 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2528 			tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2529 					    !!amdgpu_noretry);
2530 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2531 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2532 				(adev->gmc.private_aperture_start >> 48));
2533 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2534 				(adev->gmc.shared_aperture_start >> 48));
2535 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2536 		}
2537 	}
2538 	soc15_grbm_select(adev, 0, 0, 0, 0);
2539 
2540 	mutex_unlock(&adev->srbm_mutex);
2541 
2542 	gfx_v9_0_init_compute_vmid(adev);
2543 	gfx_v9_0_init_gds_vmid(adev);
2544 }
2545 
2546 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2547 {
2548 	u32 i, j, k;
2549 	u32 mask;
2550 
2551 	mutex_lock(&adev->grbm_idx_mutex);
2552 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2553 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2554 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2555 			for (k = 0; k < adev->usec_timeout; k++) {
2556 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2557 					break;
2558 				udelay(1);
2559 			}
2560 			if (k == adev->usec_timeout) {
2561 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
2562 						      0xffffffff, 0xffffffff);
2563 				mutex_unlock(&adev->grbm_idx_mutex);
2564 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2565 					 i, j);
2566 				return;
2567 			}
2568 		}
2569 	}
2570 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2571 	mutex_unlock(&adev->grbm_idx_mutex);
2572 
2573 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2574 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2575 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2576 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2577 	for (k = 0; k < adev->usec_timeout; k++) {
2578 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2579 			break;
2580 		udelay(1);
2581 	}
2582 }
2583 
2584 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2585 					       bool enable)
2586 {
2587 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2588 
2589 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2590 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2591 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2592 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2593 
2594 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2595 }
2596 
2597 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2598 {
2599 	/* csib */
2600 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2601 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2602 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2603 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2604 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2605 			adev->gfx.rlc.clear_state_size);
2606 }
2607 
2608 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2609 				int indirect_offset,
2610 				int list_size,
2611 				int *unique_indirect_regs,
2612 				int unique_indirect_reg_count,
2613 				int *indirect_start_offsets,
2614 				int *indirect_start_offsets_count,
2615 				int max_start_offsets_count)
2616 {
2617 	int idx;
2618 
2619 	for (; indirect_offset < list_size; indirect_offset++) {
2620 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2621 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2622 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2623 
2624 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2625 			indirect_offset += 2;
2626 
2627 			/* look for the matching indice */
2628 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2629 				if (unique_indirect_regs[idx] ==
2630 					register_list_format[indirect_offset] ||
2631 					!unique_indirect_regs[idx])
2632 					break;
2633 			}
2634 
2635 			BUG_ON(idx >= unique_indirect_reg_count);
2636 
2637 			if (!unique_indirect_regs[idx])
2638 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2639 
2640 			indirect_offset++;
2641 		}
2642 	}
2643 }
2644 
2645 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2646 {
2647 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2648 	int unique_indirect_reg_count = 0;
2649 
2650 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2651 	int indirect_start_offsets_count = 0;
2652 
2653 	int list_size = 0;
2654 	int i = 0, j = 0;
2655 	u32 tmp = 0;
2656 
2657 	u32 *register_list_format =
2658 		kmemdup(adev->gfx.rlc.register_list_format,
2659 			adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2660 	if (!register_list_format)
2661 		return -ENOMEM;
2662 
2663 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2664 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2665 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2666 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2667 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2668 				    unique_indirect_regs,
2669 				    unique_indirect_reg_count,
2670 				    indirect_start_offsets,
2671 				    &indirect_start_offsets_count,
2672 				    ARRAY_SIZE(indirect_start_offsets));
2673 
2674 	/* enable auto inc in case it is disabled */
2675 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2676 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2677 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2678 
2679 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2680 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2681 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2682 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2683 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2684 			adev->gfx.rlc.register_restore[i]);
2685 
2686 	/* load indirect register */
2687 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2688 		adev->gfx.rlc.reg_list_format_start);
2689 
2690 	/* direct register portion */
2691 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2692 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2693 			register_list_format[i]);
2694 
2695 	/* indirect register portion */
2696 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2697 		if (register_list_format[i] == 0xFFFFFFFF) {
2698 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2699 			continue;
2700 		}
2701 
2702 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2703 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2704 
2705 		for (j = 0; j < unique_indirect_reg_count; j++) {
2706 			if (register_list_format[i] == unique_indirect_regs[j]) {
2707 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2708 				break;
2709 			}
2710 		}
2711 
2712 		BUG_ON(j >= unique_indirect_reg_count);
2713 
2714 		i++;
2715 	}
2716 
2717 	/* set save/restore list size */
2718 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2719 	list_size = list_size >> 1;
2720 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2721 		adev->gfx.rlc.reg_restore_list_size);
2722 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2723 
2724 	/* write the starting offsets to RLC scratch ram */
2725 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2726 		adev->gfx.rlc.starting_offsets_start);
2727 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2728 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2729 		       indirect_start_offsets[i]);
2730 
2731 	/* load unique indirect regs*/
2732 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2733 		if (unique_indirect_regs[i] != 0) {
2734 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2735 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2736 			       unique_indirect_regs[i] & 0x3FFFF);
2737 
2738 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2739 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2740 			       unique_indirect_regs[i] >> 20);
2741 		}
2742 	}
2743 
2744 	kfree(register_list_format);
2745 	return 0;
2746 }
2747 
2748 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2749 {
2750 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2751 }
2752 
2753 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2754 					     bool enable)
2755 {
2756 	uint32_t data = 0;
2757 	uint32_t default_data = 0;
2758 
2759 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2760 	if (enable == true) {
2761 		/* enable GFXIP control over CGPG */
2762 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2763 		if(default_data != data)
2764 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2765 
2766 		/* update status */
2767 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2768 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2769 		if(default_data != data)
2770 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2771 	} else {
2772 		/* restore GFXIP control over GCPG */
2773 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2774 		if(default_data != data)
2775 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2776 	}
2777 }
2778 
2779 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2780 {
2781 	uint32_t data = 0;
2782 
2783 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2784 			      AMD_PG_SUPPORT_GFX_SMG |
2785 			      AMD_PG_SUPPORT_GFX_DMG)) {
2786 		/* init IDLE_POLL_COUNT = 60 */
2787 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2788 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2789 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2790 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2791 
2792 		/* init RLC PG Delay */
2793 		data = 0;
2794 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2795 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2796 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2797 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2798 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2799 
2800 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2801 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2802 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2803 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2804 
2805 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2806 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2807 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2808 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2809 
2810 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2811 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2812 
2813 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2814 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2815 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2816 
2817 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2818 	}
2819 }
2820 
2821 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2822 						bool enable)
2823 {
2824 	uint32_t data = 0;
2825 	uint32_t default_data = 0;
2826 
2827 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2830 			     enable ? 1 : 0);
2831 	if (default_data != data)
2832 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2833 }
2834 
2835 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2836 						bool enable)
2837 {
2838 	uint32_t data = 0;
2839 	uint32_t default_data = 0;
2840 
2841 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2842 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2843 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2844 			     enable ? 1 : 0);
2845 	if(default_data != data)
2846 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2847 }
2848 
2849 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2850 					bool enable)
2851 {
2852 	uint32_t data = 0;
2853 	uint32_t default_data = 0;
2854 
2855 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2856 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2857 			     CP_PG_DISABLE,
2858 			     enable ? 0 : 1);
2859 	if(default_data != data)
2860 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2861 }
2862 
2863 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2864 						bool enable)
2865 {
2866 	uint32_t data, default_data;
2867 
2868 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870 			     GFX_POWER_GATING_ENABLE,
2871 			     enable ? 1 : 0);
2872 	if(default_data != data)
2873 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874 }
2875 
2876 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2877 						bool enable)
2878 {
2879 	uint32_t data, default_data;
2880 
2881 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2882 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2883 			     GFX_PIPELINE_PG_ENABLE,
2884 			     enable ? 1 : 0);
2885 	if(default_data != data)
2886 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2887 
2888 	if (!enable)
2889 		/* read any GFX register to wake up GFX */
2890 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2891 }
2892 
2893 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2894 						       bool enable)
2895 {
2896 	uint32_t data, default_data;
2897 
2898 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2899 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2900 			     STATIC_PER_CU_PG_ENABLE,
2901 			     enable ? 1 : 0);
2902 	if(default_data != data)
2903 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2904 }
2905 
2906 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2907 						bool enable)
2908 {
2909 	uint32_t data, default_data;
2910 
2911 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2912 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2913 			     DYN_PER_CU_PG_ENABLE,
2914 			     enable ? 1 : 0);
2915 	if(default_data != data)
2916 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2917 }
2918 
2919 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2920 {
2921 	gfx_v9_0_init_csb(adev);
2922 
2923 	/*
2924 	 * Rlc save restore list is workable since v2_1.
2925 	 * And it's needed by gfxoff feature.
2926 	 */
2927 	if (adev->gfx.rlc.is_rlc_v2_1) {
2928 		gfx_v9_1_init_rlc_save_restore_list(adev);
2929 		gfx_v9_0_enable_save_restore_machine(adev);
2930 	}
2931 
2932 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2933 			      AMD_PG_SUPPORT_GFX_SMG |
2934 			      AMD_PG_SUPPORT_GFX_DMG |
2935 			      AMD_PG_SUPPORT_CP |
2936 			      AMD_PG_SUPPORT_GDS |
2937 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2938 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2939 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2940 		gfx_v9_0_init_gfx_power_gating(adev);
2941 	}
2942 }
2943 
2944 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2945 {
2946 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2947 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2948 	gfx_v9_0_wait_for_rlc_serdes(adev);
2949 }
2950 
2951 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2952 {
2953 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2954 	udelay(50);
2955 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2956 	udelay(50);
2957 }
2958 
2959 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2960 {
2961 #ifdef AMDGPU_RLC_DEBUG_RETRY
2962 	u32 rlc_ucode_ver;
2963 #endif
2964 
2965 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2966 	udelay(50);
2967 
2968 	/* carrizo do enable cp interrupt after cp inited */
2969 	if (!(adev->flags & AMD_IS_APU)) {
2970 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2971 		udelay(50);
2972 	}
2973 
2974 #ifdef AMDGPU_RLC_DEBUG_RETRY
2975 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2976 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2977 	if(rlc_ucode_ver == 0x108) {
2978 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2979 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2980 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2981 		 * default is 0x9C4 to create a 100us interval */
2982 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2983 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2984 		 * to disable the page fault retry interrupts, default is
2985 		 * 0x100 (256) */
2986 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2987 	}
2988 #endif
2989 }
2990 
2991 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2992 {
2993 	const struct rlc_firmware_header_v2_0 *hdr;
2994 	const __le32 *fw_data;
2995 	unsigned i, fw_size;
2996 
2997 	if (!adev->gfx.rlc_fw)
2998 		return -EINVAL;
2999 
3000 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3001 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
3002 
3003 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3004 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3005 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3006 
3007 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3008 			RLCG_UCODE_LOADING_START_ADDRESS);
3009 	for (i = 0; i < fw_size; i++)
3010 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3011 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3012 
3013 	return 0;
3014 }
3015 
3016 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3017 {
3018 	int r;
3019 
3020 	if (amdgpu_sriov_vf(adev)) {
3021 		gfx_v9_0_init_csb(adev);
3022 		return 0;
3023 	}
3024 
3025 	adev->gfx.rlc.funcs->stop(adev);
3026 
3027 	/* disable CG */
3028 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3029 
3030 	gfx_v9_0_init_pg(adev);
3031 
3032 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3033 		/* legacy rlc firmware loading */
3034 		r = gfx_v9_0_rlc_load_microcode(adev);
3035 		if (r)
3036 			return r;
3037 	}
3038 
3039 	switch (adev->asic_type) {
3040 	case CHIP_RAVEN:
3041 		if (amdgpu_lbpw == 0)
3042 			gfx_v9_0_enable_lbpw(adev, false);
3043 		else
3044 			gfx_v9_0_enable_lbpw(adev, true);
3045 		break;
3046 	case CHIP_VEGA20:
3047 		if (amdgpu_lbpw > 0)
3048 			gfx_v9_0_enable_lbpw(adev, true);
3049 		else
3050 			gfx_v9_0_enable_lbpw(adev, false);
3051 		break;
3052 	default:
3053 		break;
3054 	}
3055 
3056 	adev->gfx.rlc.funcs->start(adev);
3057 
3058 	return 0;
3059 }
3060 
3061 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3062 {
3063 	int i;
3064 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3065 
3066 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3067 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3068 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3069 	if (!enable) {
3070 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3071 			adev->gfx.gfx_ring[i].sched.ready = false;
3072 	}
3073 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3074 	udelay(50);
3075 }
3076 
3077 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3078 {
3079 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3080 	const struct gfx_firmware_header_v1_0 *ce_hdr;
3081 	const struct gfx_firmware_header_v1_0 *me_hdr;
3082 	const __le32 *fw_data;
3083 	unsigned i, fw_size;
3084 
3085 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3086 		return -EINVAL;
3087 
3088 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3089 		adev->gfx.pfp_fw->data;
3090 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3091 		adev->gfx.ce_fw->data;
3092 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3093 		adev->gfx.me_fw->data;
3094 
3095 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3096 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3097 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3098 
3099 	gfx_v9_0_cp_gfx_enable(adev, false);
3100 
3101 	/* PFP */
3102 	fw_data = (const __le32 *)
3103 		(adev->gfx.pfp_fw->data +
3104 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3105 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3106 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3107 	for (i = 0; i < fw_size; i++)
3108 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3109 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3110 
3111 	/* CE */
3112 	fw_data = (const __le32 *)
3113 		(adev->gfx.ce_fw->data +
3114 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3115 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3116 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3117 	for (i = 0; i < fw_size; i++)
3118 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3119 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3120 
3121 	/* ME */
3122 	fw_data = (const __le32 *)
3123 		(adev->gfx.me_fw->data +
3124 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3125 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3126 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3127 	for (i = 0; i < fw_size; i++)
3128 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3129 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3130 
3131 	return 0;
3132 }
3133 
3134 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3135 {
3136 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3137 	const struct cs_section_def *sect = NULL;
3138 	const struct cs_extent_def *ext = NULL;
3139 	int r, i, tmp;
3140 
3141 	/* init the CP */
3142 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3143 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3144 
3145 	gfx_v9_0_cp_gfx_enable(adev, true);
3146 
3147 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3148 	if (r) {
3149 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3150 		return r;
3151 	}
3152 
3153 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3154 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3155 
3156 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3157 	amdgpu_ring_write(ring, 0x80000000);
3158 	amdgpu_ring_write(ring, 0x80000000);
3159 
3160 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3161 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3162 			if (sect->id == SECT_CONTEXT) {
3163 				amdgpu_ring_write(ring,
3164 				       PACKET3(PACKET3_SET_CONTEXT_REG,
3165 					       ext->reg_count));
3166 				amdgpu_ring_write(ring,
3167 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3168 				for (i = 0; i < ext->reg_count; i++)
3169 					amdgpu_ring_write(ring, ext->extent[i]);
3170 			}
3171 		}
3172 	}
3173 
3174 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3175 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3176 
3177 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3178 	amdgpu_ring_write(ring, 0);
3179 
3180 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3181 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3182 	amdgpu_ring_write(ring, 0x8000);
3183 	amdgpu_ring_write(ring, 0x8000);
3184 
3185 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3186 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3187 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3188 	amdgpu_ring_write(ring, tmp);
3189 	amdgpu_ring_write(ring, 0);
3190 
3191 	amdgpu_ring_commit(ring);
3192 
3193 	return 0;
3194 }
3195 
3196 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3197 {
3198 	struct amdgpu_ring *ring;
3199 	u32 tmp;
3200 	u32 rb_bufsz;
3201 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3202 
3203 	/* Set the write pointer delay */
3204 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3205 
3206 	/* set the RB to use vmid 0 */
3207 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3208 
3209 	/* Set ring buffer size */
3210 	ring = &adev->gfx.gfx_ring[0];
3211 	rb_bufsz = order_base_2(ring->ring_size / 8);
3212 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3213 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3214 #ifdef __BIG_ENDIAN
3215 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3216 #endif
3217 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3218 
3219 	/* Initialize the ring buffer's write pointers */
3220 	ring->wptr = 0;
3221 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3222 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3223 
3224 	/* set the wb address wether it's enabled or not */
3225 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3226 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3227 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3228 
3229 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3230 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3231 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3232 
3233 	mdelay(1);
3234 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3235 
3236 	rb_addr = ring->gpu_addr >> 8;
3237 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3238 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3239 
3240 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3241 	if (ring->use_doorbell) {
3242 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3243 				    DOORBELL_OFFSET, ring->doorbell_index);
3244 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3245 				    DOORBELL_EN, 1);
3246 	} else {
3247 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3248 	}
3249 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3250 
3251 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3252 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
3253 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3254 
3255 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3256 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3257 
3258 
3259 	/* start the ring */
3260 	gfx_v9_0_cp_gfx_start(adev);
3261 	ring->sched.ready = true;
3262 
3263 	return 0;
3264 }
3265 
3266 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3267 {
3268 	int i;
3269 
3270 	if (enable) {
3271 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3272 	} else {
3273 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3274 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3275 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
3276 			adev->gfx.compute_ring[i].sched.ready = false;
3277 		adev->gfx.kiq.ring.sched.ready = false;
3278 	}
3279 	udelay(50);
3280 }
3281 
3282 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3283 {
3284 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3285 	const __le32 *fw_data;
3286 	unsigned i;
3287 	u32 tmp;
3288 
3289 	if (!adev->gfx.mec_fw)
3290 		return -EINVAL;
3291 
3292 	gfx_v9_0_cp_compute_enable(adev, false);
3293 
3294 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3295 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3296 
3297 	fw_data = (const __le32 *)
3298 		(adev->gfx.mec_fw->data +
3299 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3300 	tmp = 0;
3301 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3302 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3303 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3304 
3305 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3306 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3307 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3308 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3309 
3310 	/* MEC1 */
3311 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3312 			 mec_hdr->jt_offset);
3313 	for (i = 0; i < mec_hdr->jt_size; i++)
3314 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3315 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3316 
3317 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3318 			adev->gfx.mec_fw_version);
3319 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3320 
3321 	return 0;
3322 }
3323 
3324 /* KIQ functions */
3325 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3326 {
3327 	uint32_t tmp;
3328 	struct amdgpu_device *adev = ring->adev;
3329 
3330 	/* tell RLC which is KIQ queue */
3331 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3332 	tmp &= 0xffffff00;
3333 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3334 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3335 	tmp |= 0x80;
3336 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3337 }
3338 
3339 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3340 {
3341 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3342 	uint64_t queue_mask = 0;
3343 	int r, i;
3344 
3345 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3346 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3347 			continue;
3348 
3349 		/* This situation may be hit in the future if a new HW
3350 		 * generation exposes more than 64 queues. If so, the
3351 		 * definition of queue_mask needs updating */
3352 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3353 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3354 			break;
3355 		}
3356 
3357 		queue_mask |= (1ull << i);
3358 	}
3359 
3360 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3361 	if (r) {
3362 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3363 		return r;
3364 	}
3365 
3366 	/* set resources */
3367 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3368 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3369 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
3370 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
3371 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
3372 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
3373 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
3374 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
3375 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
3376 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3377 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3378 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3379 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3380 
3381 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3382 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3383 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3384 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3385 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3386 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3387 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3388 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3389 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3390 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3391 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3392 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3393 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3394 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3395 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3396 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3397 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3398 	}
3399 
3400 	r = amdgpu_ring_test_helper(kiq_ring);
3401 	if (r)
3402 		DRM_ERROR("KCQ enable failed\n");
3403 
3404 	return r;
3405 }
3406 
3407 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3408 {
3409 	struct amdgpu_device *adev = ring->adev;
3410 	struct v9_mqd *mqd = ring->mqd_ptr;
3411 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3412 	uint32_t tmp;
3413 
3414 	mqd->header = 0xC0310800;
3415 	mqd->compute_pipelinestat_enable = 0x00000001;
3416 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3417 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3418 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3419 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3420 	mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3421 	mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3422 	mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3423 	mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3424 	mqd->compute_misc_reserved = 0x00000003;
3425 
3426 	mqd->dynamic_cu_mask_addr_lo =
3427 		lower_32_bits(ring->mqd_gpu_addr
3428 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3429 	mqd->dynamic_cu_mask_addr_hi =
3430 		upper_32_bits(ring->mqd_gpu_addr
3431 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3432 
3433 	eop_base_addr = ring->eop_gpu_addr >> 8;
3434 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3435 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3436 
3437 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3438 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3439 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3440 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3441 
3442 	mqd->cp_hqd_eop_control = tmp;
3443 
3444 	/* enable doorbell? */
3445 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3446 
3447 	if (ring->use_doorbell) {
3448 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3449 				    DOORBELL_OFFSET, ring->doorbell_index);
3450 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451 				    DOORBELL_EN, 1);
3452 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453 				    DOORBELL_SOURCE, 0);
3454 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3455 				    DOORBELL_HIT, 0);
3456 	} else {
3457 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458 					 DOORBELL_EN, 0);
3459 	}
3460 
3461 	mqd->cp_hqd_pq_doorbell_control = tmp;
3462 
3463 	/* disable the queue if it's active */
3464 	ring->wptr = 0;
3465 	mqd->cp_hqd_dequeue_request = 0;
3466 	mqd->cp_hqd_pq_rptr = 0;
3467 	mqd->cp_hqd_pq_wptr_lo = 0;
3468 	mqd->cp_hqd_pq_wptr_hi = 0;
3469 
3470 	/* set the pointer to the MQD */
3471 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3472 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3473 
3474 	/* set MQD vmid to 0 */
3475 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3476 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3477 	mqd->cp_mqd_control = tmp;
3478 
3479 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3480 	hqd_gpu_addr = ring->gpu_addr >> 8;
3481 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3482 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3483 
3484 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3485 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3486 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3487 			    (order_base_2(ring->ring_size / 4) - 1));
3488 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3489 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3490 #ifdef __BIG_ENDIAN
3491 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3492 #endif
3493 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3494 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3495 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3496 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3497 	mqd->cp_hqd_pq_control = tmp;
3498 
3499 	/* set the wb address whether it's enabled or not */
3500 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3501 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3502 	mqd->cp_hqd_pq_rptr_report_addr_hi =
3503 		upper_32_bits(wb_gpu_addr) & 0xffff;
3504 
3505 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3506 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3507 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3508 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3509 
3510 	tmp = 0;
3511 	/* enable the doorbell if requested */
3512 	if (ring->use_doorbell) {
3513 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3514 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3515 				DOORBELL_OFFSET, ring->doorbell_index);
3516 
3517 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518 					 DOORBELL_EN, 1);
3519 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520 					 DOORBELL_SOURCE, 0);
3521 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3522 					 DOORBELL_HIT, 0);
3523 	}
3524 
3525 	mqd->cp_hqd_pq_doorbell_control = tmp;
3526 
3527 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3528 	ring->wptr = 0;
3529 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3530 
3531 	/* set the vmid for the queue */
3532 	mqd->cp_hqd_vmid = 0;
3533 
3534 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3535 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3536 	mqd->cp_hqd_persistent_state = tmp;
3537 
3538 	/* set MIN_IB_AVAIL_SIZE */
3539 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3540 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3541 	mqd->cp_hqd_ib_control = tmp;
3542 
3543 	/* activate the queue */
3544 	mqd->cp_hqd_active = 1;
3545 
3546 	return 0;
3547 }
3548 
3549 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3550 {
3551 	struct amdgpu_device *adev = ring->adev;
3552 	struct v9_mqd *mqd = ring->mqd_ptr;
3553 	int j;
3554 
3555 	/* disable wptr polling */
3556 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3557 
3558 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3559 	       mqd->cp_hqd_eop_base_addr_lo);
3560 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3561 	       mqd->cp_hqd_eop_base_addr_hi);
3562 
3563 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3564 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3565 	       mqd->cp_hqd_eop_control);
3566 
3567 	/* enable doorbell? */
3568 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3569 	       mqd->cp_hqd_pq_doorbell_control);
3570 
3571 	/* disable the queue if it's active */
3572 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3573 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3574 		for (j = 0; j < adev->usec_timeout; j++) {
3575 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3576 				break;
3577 			udelay(1);
3578 		}
3579 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3580 		       mqd->cp_hqd_dequeue_request);
3581 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3582 		       mqd->cp_hqd_pq_rptr);
3583 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3584 		       mqd->cp_hqd_pq_wptr_lo);
3585 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3586 		       mqd->cp_hqd_pq_wptr_hi);
3587 	}
3588 
3589 	/* set the pointer to the MQD */
3590 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3591 	       mqd->cp_mqd_base_addr_lo);
3592 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3593 	       mqd->cp_mqd_base_addr_hi);
3594 
3595 	/* set MQD vmid to 0 */
3596 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3597 	       mqd->cp_mqd_control);
3598 
3599 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3600 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3601 	       mqd->cp_hqd_pq_base_lo);
3602 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3603 	       mqd->cp_hqd_pq_base_hi);
3604 
3605 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3606 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3607 	       mqd->cp_hqd_pq_control);
3608 
3609 	/* set the wb address whether it's enabled or not */
3610 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3611 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3612 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3613 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3614 
3615 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3616 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3617 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3618 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3619 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3620 
3621 	/* enable the doorbell if requested */
3622 	if (ring->use_doorbell) {
3623 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3624 					(adev->doorbell_index.kiq * 2) << 2);
3625 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3626 					(adev->doorbell_index.userqueue_end * 2) << 2);
3627 	}
3628 
3629 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3630 	       mqd->cp_hqd_pq_doorbell_control);
3631 
3632 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3633 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3634 	       mqd->cp_hqd_pq_wptr_lo);
3635 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3636 	       mqd->cp_hqd_pq_wptr_hi);
3637 
3638 	/* set the vmid for the queue */
3639 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3640 
3641 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3642 	       mqd->cp_hqd_persistent_state);
3643 
3644 	/* activate the queue */
3645 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3646 	       mqd->cp_hqd_active);
3647 
3648 	if (ring->use_doorbell)
3649 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3650 
3651 	return 0;
3652 }
3653 
3654 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3655 {
3656 	struct amdgpu_device *adev = ring->adev;
3657 	int j;
3658 
3659 	/* disable the queue if it's active */
3660 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3661 
3662 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3663 
3664 		for (j = 0; j < adev->usec_timeout; j++) {
3665 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3666 				break;
3667 			udelay(1);
3668 		}
3669 
3670 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3671 			DRM_DEBUG("KIQ dequeue request failed.\n");
3672 
3673 			/* Manual disable if dequeue request times out */
3674 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3675 		}
3676 
3677 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3678 		      0);
3679 	}
3680 
3681 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3682 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3683 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3684 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3685 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3686 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3687 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3688 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3689 
3690 	return 0;
3691 }
3692 
3693 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3694 {
3695 	struct amdgpu_device *adev = ring->adev;
3696 	struct v9_mqd *mqd = ring->mqd_ptr;
3697 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3698 
3699 	gfx_v9_0_kiq_setting(ring);
3700 
3701 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3702 		/* reset MQD to a clean status */
3703 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3704 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3705 
3706 		/* reset ring buffer */
3707 		ring->wptr = 0;
3708 		amdgpu_ring_clear_ring(ring);
3709 
3710 		mutex_lock(&adev->srbm_mutex);
3711 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3712 		gfx_v9_0_kiq_init_register(ring);
3713 		soc15_grbm_select(adev, 0, 0, 0, 0);
3714 		mutex_unlock(&adev->srbm_mutex);
3715 	} else {
3716 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3717 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3718 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3719 		mutex_lock(&adev->srbm_mutex);
3720 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3721 		gfx_v9_0_mqd_init(ring);
3722 		gfx_v9_0_kiq_init_register(ring);
3723 		soc15_grbm_select(adev, 0, 0, 0, 0);
3724 		mutex_unlock(&adev->srbm_mutex);
3725 
3726 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3727 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3728 	}
3729 
3730 	return 0;
3731 }
3732 
3733 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3734 {
3735 	struct amdgpu_device *adev = ring->adev;
3736 	struct v9_mqd *mqd = ring->mqd_ptr;
3737 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3738 
3739 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3740 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3741 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3742 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3743 		mutex_lock(&adev->srbm_mutex);
3744 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3745 		gfx_v9_0_mqd_init(ring);
3746 		soc15_grbm_select(adev, 0, 0, 0, 0);
3747 		mutex_unlock(&adev->srbm_mutex);
3748 
3749 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3750 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3751 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3752 		/* reset MQD to a clean status */
3753 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3754 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3755 
3756 		/* reset ring buffer */
3757 		ring->wptr = 0;
3758 		amdgpu_ring_clear_ring(ring);
3759 	} else {
3760 		amdgpu_ring_clear_ring(ring);
3761 	}
3762 
3763 	return 0;
3764 }
3765 
3766 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3767 {
3768 	struct amdgpu_ring *ring;
3769 	int r;
3770 
3771 	ring = &adev->gfx.kiq.ring;
3772 
3773 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3774 	if (unlikely(r != 0))
3775 		return r;
3776 
3777 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3778 	if (unlikely(r != 0))
3779 		return r;
3780 
3781 	gfx_v9_0_kiq_init_queue(ring);
3782 	amdgpu_bo_kunmap(ring->mqd_obj);
3783 	ring->mqd_ptr = NULL;
3784 	amdgpu_bo_unreserve(ring->mqd_obj);
3785 	ring->sched.ready = true;
3786 	return 0;
3787 }
3788 
3789 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3790 {
3791 	struct amdgpu_ring *ring = NULL;
3792 	int r = 0, i;
3793 
3794 	gfx_v9_0_cp_compute_enable(adev, true);
3795 
3796 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3797 		ring = &adev->gfx.compute_ring[i];
3798 
3799 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3800 		if (unlikely(r != 0))
3801 			goto done;
3802 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3803 		if (!r) {
3804 			r = gfx_v9_0_kcq_init_queue(ring);
3805 			amdgpu_bo_kunmap(ring->mqd_obj);
3806 			ring->mqd_ptr = NULL;
3807 		}
3808 		amdgpu_bo_unreserve(ring->mqd_obj);
3809 		if (r)
3810 			goto done;
3811 	}
3812 
3813 	r = gfx_v9_0_kiq_kcq_enable(adev);
3814 done:
3815 	return r;
3816 }
3817 
3818 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3819 {
3820 	int r, i;
3821 	struct amdgpu_ring *ring;
3822 
3823 	if (!(adev->flags & AMD_IS_APU))
3824 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3825 
3826 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3827 		if (adev->asic_type != CHIP_ARCTURUS) {
3828 			/* legacy firmware loading */
3829 			r = gfx_v9_0_cp_gfx_load_microcode(adev);
3830 			if (r)
3831 				return r;
3832 		}
3833 
3834 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3835 		if (r)
3836 			return r;
3837 	}
3838 
3839 	r = gfx_v9_0_kiq_resume(adev);
3840 	if (r)
3841 		return r;
3842 
3843 	if (adev->asic_type != CHIP_ARCTURUS) {
3844 		r = gfx_v9_0_cp_gfx_resume(adev);
3845 		if (r)
3846 			return r;
3847 	}
3848 
3849 	r = gfx_v9_0_kcq_resume(adev);
3850 	if (r)
3851 		return r;
3852 
3853 	if (adev->asic_type != CHIP_ARCTURUS) {
3854 		ring = &adev->gfx.gfx_ring[0];
3855 		r = amdgpu_ring_test_helper(ring);
3856 		if (r)
3857 			return r;
3858 	}
3859 
3860 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3861 		ring = &adev->gfx.compute_ring[i];
3862 		amdgpu_ring_test_helper(ring);
3863 	}
3864 
3865 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3866 
3867 	return 0;
3868 }
3869 
3870 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3871 {
3872 	if (adev->asic_type != CHIP_ARCTURUS)
3873 		gfx_v9_0_cp_gfx_enable(adev, enable);
3874 	gfx_v9_0_cp_compute_enable(adev, enable);
3875 }
3876 
3877 static int gfx_v9_0_hw_init(void *handle)
3878 {
3879 	int r;
3880 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881 
3882 	if (!amdgpu_sriov_vf(adev))
3883 		gfx_v9_0_init_golden_registers(adev);
3884 
3885 	gfx_v9_0_constants_init(adev);
3886 
3887 	r = gfx_v9_0_csb_vram_pin(adev);
3888 	if (r)
3889 		return r;
3890 
3891 	r = adev->gfx.rlc.funcs->resume(adev);
3892 	if (r)
3893 		return r;
3894 
3895 	r = gfx_v9_0_cp_resume(adev);
3896 	if (r)
3897 		return r;
3898 
3899 	if (adev->asic_type != CHIP_ARCTURUS) {
3900 		r = gfx_v9_0_ngg_en(adev);
3901 		if (r)
3902 			return r;
3903 	}
3904 
3905 	return r;
3906 }
3907 
3908 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3909 {
3910 	int r, i;
3911 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3912 
3913 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3914 	if (r)
3915 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3916 
3917 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3918 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3919 
3920 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3921 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3922 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3923 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3924 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3925 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3926 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3927 		amdgpu_ring_write(kiq_ring, 0);
3928 		amdgpu_ring_write(kiq_ring, 0);
3929 		amdgpu_ring_write(kiq_ring, 0);
3930 	}
3931 	r = amdgpu_ring_test_helper(kiq_ring);
3932 	if (r)
3933 		DRM_ERROR("KCQ disable failed\n");
3934 
3935 	return r;
3936 }
3937 
3938 static int gfx_v9_0_hw_fini(void *handle)
3939 {
3940 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3941 
3942 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3943 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3944 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3945 
3946 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3947 	gfx_v9_0_kcq_disable(adev);
3948 
3949 	if (amdgpu_sriov_vf(adev)) {
3950 		gfx_v9_0_cp_gfx_enable(adev, false);
3951 		/* must disable polling for SRIOV when hw finished, otherwise
3952 		 * CPC engine may still keep fetching WB address which is already
3953 		 * invalid after sw finished and trigger DMAR reading error in
3954 		 * hypervisor side.
3955 		 */
3956 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3957 		return 0;
3958 	}
3959 
3960 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3961 	 * otherwise KIQ is hanging when binding back
3962 	 */
3963 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3964 		mutex_lock(&adev->srbm_mutex);
3965 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3966 				adev->gfx.kiq.ring.pipe,
3967 				adev->gfx.kiq.ring.queue, 0);
3968 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3969 		soc15_grbm_select(adev, 0, 0, 0, 0);
3970 		mutex_unlock(&adev->srbm_mutex);
3971 	}
3972 
3973 	gfx_v9_0_cp_enable(adev, false);
3974 	adev->gfx.rlc.funcs->stop(adev);
3975 
3976 	gfx_v9_0_csb_vram_unpin(adev);
3977 
3978 	return 0;
3979 }
3980 
3981 static int gfx_v9_0_suspend(void *handle)
3982 {
3983 	return gfx_v9_0_hw_fini(handle);
3984 }
3985 
3986 static int gfx_v9_0_resume(void *handle)
3987 {
3988 	return gfx_v9_0_hw_init(handle);
3989 }
3990 
3991 static bool gfx_v9_0_is_idle(void *handle)
3992 {
3993 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3994 
3995 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3996 				GRBM_STATUS, GUI_ACTIVE))
3997 		return false;
3998 	else
3999 		return true;
4000 }
4001 
4002 static int gfx_v9_0_wait_for_idle(void *handle)
4003 {
4004 	unsigned i;
4005 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4006 
4007 	for (i = 0; i < adev->usec_timeout; i++) {
4008 		if (gfx_v9_0_is_idle(handle))
4009 			return 0;
4010 		udelay(1);
4011 	}
4012 	return -ETIMEDOUT;
4013 }
4014 
4015 static int gfx_v9_0_soft_reset(void *handle)
4016 {
4017 	u32 grbm_soft_reset = 0;
4018 	u32 tmp;
4019 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4020 
4021 	/* GRBM_STATUS */
4022 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4023 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4024 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4025 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4026 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4027 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4028 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4029 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4030 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4031 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4032 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4033 	}
4034 
4035 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4036 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4037 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4038 	}
4039 
4040 	/* GRBM_STATUS2 */
4041 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4042 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4043 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4044 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4045 
4046 
4047 	if (grbm_soft_reset) {
4048 		/* stop the rlc */
4049 		adev->gfx.rlc.funcs->stop(adev);
4050 
4051 		if (adev->asic_type != CHIP_ARCTURUS)
4052 			/* Disable GFX parsing/prefetching */
4053 			gfx_v9_0_cp_gfx_enable(adev, false);
4054 
4055 		/* Disable MEC parsing/prefetching */
4056 		gfx_v9_0_cp_compute_enable(adev, false);
4057 
4058 		if (grbm_soft_reset) {
4059 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4060 			tmp |= grbm_soft_reset;
4061 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4062 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4063 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4064 
4065 			udelay(50);
4066 
4067 			tmp &= ~grbm_soft_reset;
4068 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4069 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4070 		}
4071 
4072 		/* Wait a little for things to settle down */
4073 		udelay(50);
4074 	}
4075 	return 0;
4076 }
4077 
4078 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4079 {
4080 	uint64_t clock;
4081 
4082 	mutex_lock(&adev->gfx.gpu_clock_mutex);
4083 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4084 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4085 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4086 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
4087 	return clock;
4088 }
4089 
4090 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4091 					  uint32_t vmid,
4092 					  uint32_t gds_base, uint32_t gds_size,
4093 					  uint32_t gws_base, uint32_t gws_size,
4094 					  uint32_t oa_base, uint32_t oa_size)
4095 {
4096 	struct amdgpu_device *adev = ring->adev;
4097 
4098 	/* GDS Base */
4099 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4100 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4101 				   gds_base);
4102 
4103 	/* GDS Size */
4104 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4105 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4106 				   gds_size);
4107 
4108 	/* GWS */
4109 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4110 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4111 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4112 
4113 	/* OA */
4114 	gfx_v9_0_write_data_to_reg(ring, 0, false,
4115 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4116 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
4117 }
4118 
4119 static const u32 vgpr_init_compute_shader[] =
4120 {
4121 	0xb07c0000, 0xbe8000ff,
4122 	0x000000f8, 0xbf110800,
4123 	0x7e000280, 0x7e020280,
4124 	0x7e040280, 0x7e060280,
4125 	0x7e080280, 0x7e0a0280,
4126 	0x7e0c0280, 0x7e0e0280,
4127 	0x80808800, 0xbe803200,
4128 	0xbf84fff5, 0xbf9c0000,
4129 	0xd28c0001, 0x0001007f,
4130 	0xd28d0001, 0x0002027e,
4131 	0x10020288, 0xb8810904,
4132 	0xb7814000, 0xd1196a01,
4133 	0x00000301, 0xbe800087,
4134 	0xbefc00c1, 0xd89c4000,
4135 	0x00020201, 0xd89cc080,
4136 	0x00040401, 0x320202ff,
4137 	0x00000800, 0x80808100,
4138 	0xbf84fff8, 0x7e020280,
4139 	0xbf810000, 0x00000000,
4140 };
4141 
4142 static const u32 sgpr_init_compute_shader[] =
4143 {
4144 	0xb07c0000, 0xbe8000ff,
4145 	0x0000005f, 0xbee50080,
4146 	0xbe812c65, 0xbe822c65,
4147 	0xbe832c65, 0xbe842c65,
4148 	0xbe852c65, 0xb77c0005,
4149 	0x80808500, 0xbf84fff8,
4150 	0xbe800080, 0xbf810000,
4151 };
4152 
4153 static const struct soc15_reg_entry vgpr_init_regs[] = {
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4164 };
4165 
4166 static const struct soc15_reg_entry sgpr_init_regs[] = {
4167    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4168    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4169    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4170    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4171    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4172    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4173    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4174    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4175    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4176    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4177 };
4178 
4179 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4180    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4181    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4182    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4183    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4184    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4185    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4186    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4187    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4188    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4189    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4190    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4191    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4194    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4195    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4196    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4198    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4199    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4200    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4201    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4202    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4203    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4204    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4205    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4206    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4207    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4208    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4209    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4210    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4211    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4212 };
4213 
4214 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4215 {
4216 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4217 	int i, r;
4218 
4219 	r = amdgpu_ring_alloc(ring, 7);
4220 	if (r) {
4221 		DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4222 			ring->name, r);
4223 		return r;
4224 	}
4225 
4226 	WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4227 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4228 
4229 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4230 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4231 				PACKET3_DMA_DATA_DST_SEL(1) |
4232 				PACKET3_DMA_DATA_SRC_SEL(2) |
4233 				PACKET3_DMA_DATA_ENGINE(0)));
4234 	amdgpu_ring_write(ring, 0);
4235 	amdgpu_ring_write(ring, 0);
4236 	amdgpu_ring_write(ring, 0);
4237 	amdgpu_ring_write(ring, 0);
4238 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4239 				adev->gds.gds_size);
4240 
4241 	amdgpu_ring_commit(ring);
4242 
4243 	for (i = 0; i < adev->usec_timeout; i++) {
4244 		if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4245 			break;
4246 		udelay(1);
4247 	}
4248 
4249 	if (i >= adev->usec_timeout)
4250 		r = -ETIMEDOUT;
4251 
4252 	WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4253 
4254 	return r;
4255 }
4256 
4257 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4258 {
4259 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4260 	struct amdgpu_ib ib;
4261 	struct dma_fence *f = NULL;
4262 	int r, i, j, k;
4263 	unsigned total_size, vgpr_offset, sgpr_offset;
4264 	u64 gpu_addr;
4265 
4266 	/* only support when RAS is enabled */
4267 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4268 		return 0;
4269 
4270 	/* bail if the compute ring is not ready */
4271 	if (!ring->sched.ready)
4272 		return 0;
4273 
4274 	total_size =
4275 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4276 	total_size +=
4277 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4278 	total_size = ALIGN(total_size, 256);
4279 	vgpr_offset = total_size;
4280 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4281 	sgpr_offset = total_size;
4282 	total_size += sizeof(sgpr_init_compute_shader);
4283 
4284 	/* allocate an indirect buffer to put the commands in */
4285 	memset(&ib, 0, sizeof(ib));
4286 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4287 	if (r) {
4288 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4289 		return r;
4290 	}
4291 
4292 	/* load the compute shaders */
4293 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4294 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4295 
4296 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4297 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4298 
4299 	/* init the ib length to 0 */
4300 	ib.length_dw = 0;
4301 
4302 	/* VGPR */
4303 	/* write the register state for the compute dispatch */
4304 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4305 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4306 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4307 								- PACKET3_SET_SH_REG_START;
4308 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4309 	}
4310 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4311 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4312 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4313 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4314 							- PACKET3_SET_SH_REG_START;
4315 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4316 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4317 
4318 	/* write dispatch packet */
4319 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4320 	ib.ptr[ib.length_dw++] = 128; /* x */
4321 	ib.ptr[ib.length_dw++] = 1; /* y */
4322 	ib.ptr[ib.length_dw++] = 1; /* z */
4323 	ib.ptr[ib.length_dw++] =
4324 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4325 
4326 	/* write CS partial flush packet */
4327 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4328 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4329 
4330 	/* SGPR */
4331 	/* write the register state for the compute dispatch */
4332 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4333 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4334 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4335 								- PACKET3_SET_SH_REG_START;
4336 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4337 	}
4338 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4339 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4340 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4341 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4342 							- PACKET3_SET_SH_REG_START;
4343 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4344 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4345 
4346 	/* write dispatch packet */
4347 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4348 	ib.ptr[ib.length_dw++] = 128; /* x */
4349 	ib.ptr[ib.length_dw++] = 1; /* y */
4350 	ib.ptr[ib.length_dw++] = 1; /* z */
4351 	ib.ptr[ib.length_dw++] =
4352 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4353 
4354 	/* write CS partial flush packet */
4355 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4356 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4357 
4358 	/* shedule the ib on the ring */
4359 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4360 	if (r) {
4361 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4362 		goto fail;
4363 	}
4364 
4365 	/* wait for the GPU to finish processing the IB */
4366 	r = dma_fence_wait(f, false);
4367 	if (r) {
4368 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4369 		goto fail;
4370 	}
4371 
4372 	/* read back registers to clear the counters */
4373 	mutex_lock(&adev->grbm_idx_mutex);
4374 	for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4375 		for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4376 			for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4377 				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4378 				RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4379 			}
4380 		}
4381 	}
4382 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4383 	mutex_unlock(&adev->grbm_idx_mutex);
4384 
4385 fail:
4386 	amdgpu_ib_free(adev, &ib, NULL);
4387 	dma_fence_put(f);
4388 
4389 	return r;
4390 }
4391 
4392 static int gfx_v9_0_early_init(void *handle)
4393 {
4394 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4395 
4396 	if (adev->asic_type == CHIP_ARCTURUS)
4397 		adev->gfx.num_gfx_rings = 0;
4398 	else
4399 		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4400 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4401 	gfx_v9_0_set_ring_funcs(adev);
4402 	gfx_v9_0_set_irq_funcs(adev);
4403 	gfx_v9_0_set_gds_init(adev);
4404 	gfx_v9_0_set_rlc_funcs(adev);
4405 
4406 	return 0;
4407 }
4408 
4409 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4410 		struct ras_err_data *err_data,
4411 		struct amdgpu_iv_entry *entry);
4412 
4413 static int gfx_v9_0_ecc_late_init(void *handle)
4414 {
4415 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4416 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
4417 	struct ras_ih_if ih_info = {
4418 		.cb = gfx_v9_0_process_ras_data_cb,
4419 	};
4420 	struct ras_fs_if fs_info = {
4421 		.sysfs_name = "gfx_err_count",
4422 		.debugfs_name = "gfx_err_inject",
4423 	};
4424 	struct ras_common_if ras_block = {
4425 		.block = AMDGPU_RAS_BLOCK__GFX,
4426 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4427 		.sub_block_index = 0,
4428 		.name = "gfx",
4429 	};
4430 	int r;
4431 
4432 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4433 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4434 		return 0;
4435 	}
4436 
4437 	r = gfx_v9_0_do_edc_gds_workarounds(adev);
4438 	if (r)
4439 		return r;
4440 
4441 	/* requires IBs so do in late init after IB pool is initialized */
4442 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4443 	if (r)
4444 		return r;
4445 
4446 	/* handle resume path. */
4447 	if (*ras_if) {
4448 		/* resend ras TA enable cmd during resume.
4449 		 * prepare to handle failure.
4450 		 */
4451 		ih_info.head = **ras_if;
4452 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4453 		if (r) {
4454 			if (r == -EAGAIN) {
4455 				/* request a gpu reset. will run again. */
4456 				amdgpu_ras_request_reset_on_boot(adev,
4457 						AMDGPU_RAS_BLOCK__GFX);
4458 				return 0;
4459 			}
4460 			/* fail to enable ras, cleanup all. */
4461 			goto irq;
4462 		}
4463 		/* enable successfully. continue. */
4464 		goto resume;
4465 	}
4466 
4467 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4468 	if (!*ras_if)
4469 		return -ENOMEM;
4470 
4471 	**ras_if = ras_block;
4472 
4473 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4474 	if (r) {
4475 		if (r == -EAGAIN) {
4476 			amdgpu_ras_request_reset_on_boot(adev,
4477 					AMDGPU_RAS_BLOCK__GFX);
4478 			r = 0;
4479 		}
4480 		goto feature;
4481 	}
4482 
4483 	ih_info.head = **ras_if;
4484 	fs_info.head = **ras_if;
4485 
4486 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4487 	if (r)
4488 		goto interrupt;
4489 
4490 	amdgpu_ras_debugfs_create(adev, &fs_info);
4491 
4492 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
4493 	if (r)
4494 		goto sysfs;
4495 resume:
4496 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4497 	if (r)
4498 		goto irq;
4499 
4500 	return 0;
4501 irq:
4502 	amdgpu_ras_sysfs_remove(adev, *ras_if);
4503 sysfs:
4504 	amdgpu_ras_debugfs_remove(adev, *ras_if);
4505 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4506 interrupt:
4507 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
4508 feature:
4509 	kfree(*ras_if);
4510 	*ras_if = NULL;
4511 	return r;
4512 }
4513 
4514 static int gfx_v9_0_late_init(void *handle)
4515 {
4516 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4517 	int r;
4518 
4519 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4520 	if (r)
4521 		return r;
4522 
4523 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4524 	if (r)
4525 		return r;
4526 
4527 	r = gfx_v9_0_ecc_late_init(handle);
4528 	if (r)
4529 		return r;
4530 
4531 	return 0;
4532 }
4533 
4534 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4535 {
4536 	uint32_t rlc_setting;
4537 
4538 	/* if RLC is not enabled, do nothing */
4539 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4540 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4541 		return false;
4542 
4543 	return true;
4544 }
4545 
4546 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4547 {
4548 	uint32_t data;
4549 	unsigned i;
4550 
4551 	data = RLC_SAFE_MODE__CMD_MASK;
4552 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4553 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4554 
4555 	/* wait for RLC_SAFE_MODE */
4556 	for (i = 0; i < adev->usec_timeout; i++) {
4557 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4558 			break;
4559 		udelay(1);
4560 	}
4561 }
4562 
4563 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4564 {
4565 	uint32_t data;
4566 
4567 	data = RLC_SAFE_MODE__CMD_MASK;
4568 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4569 }
4570 
4571 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4572 						bool enable)
4573 {
4574 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4575 
4576 	if (is_support_sw_smu(adev) && !enable)
4577 		smu_set_gfx_cgpg(&adev->smu, enable);
4578 
4579 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4580 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4581 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4582 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4583 	} else {
4584 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4585 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4586 	}
4587 
4588 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4589 }
4590 
4591 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4592 						bool enable)
4593 {
4594 	/* TODO: double check if we need to perform under safe mode */
4595 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
4596 
4597 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4598 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4599 	else
4600 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4601 
4602 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4603 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4604 	else
4605 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4606 
4607 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
4608 }
4609 
4610 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4611 						      bool enable)
4612 {
4613 	uint32_t data, def;
4614 
4615 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4616 
4617 	/* It is disabled by HW by default */
4618 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4619 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
4620 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4621 
4622 		if (adev->asic_type != CHIP_VEGA12)
4623 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4624 
4625 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4626 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4627 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4628 
4629 		/* only for Vega10 & Raven1 */
4630 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4631 
4632 		if (def != data)
4633 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4634 
4635 		/* MGLS is a global flag to control all MGLS in GFX */
4636 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4637 			/* 2 - RLC memory Light sleep */
4638 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4639 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4640 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4641 				if (def != data)
4642 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4643 			}
4644 			/* 3 - CP memory Light sleep */
4645 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4646 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4647 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4648 				if (def != data)
4649 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4650 			}
4651 		}
4652 	} else {
4653 		/* 1 - MGCG_OVERRIDE */
4654 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4655 
4656 		if (adev->asic_type != CHIP_VEGA12)
4657 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4658 
4659 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4660 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4661 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4662 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4663 
4664 		if (def != data)
4665 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4666 
4667 		/* 2 - disable MGLS in RLC */
4668 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4669 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4670 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4671 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4672 		}
4673 
4674 		/* 3 - disable MGLS in CP */
4675 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4676 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4677 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4678 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4679 		}
4680 	}
4681 
4682 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4683 }
4684 
4685 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4686 					   bool enable)
4687 {
4688 	uint32_t data, def;
4689 
4690 	if (adev->asic_type == CHIP_ARCTURUS)
4691 		return;
4692 
4693 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4694 
4695 	/* Enable 3D CGCG/CGLS */
4696 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4697 		/* write cmd to clear cgcg/cgls ov */
4698 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4699 		/* unset CGCG override */
4700 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4701 		/* update CGCG and CGLS override bits */
4702 		if (def != data)
4703 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4704 
4705 		/* enable 3Dcgcg FSM(0x0000363f) */
4706 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4707 
4708 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4709 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4710 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4711 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4712 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4713 		if (def != data)
4714 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4715 
4716 		/* set IDLE_POLL_COUNT(0x00900100) */
4717 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4718 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4719 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4720 		if (def != data)
4721 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4722 	} else {
4723 		/* Disable CGCG/CGLS */
4724 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4725 		/* disable cgcg, cgls should be disabled */
4726 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4727 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4728 		/* disable cgcg and cgls in FSM */
4729 		if (def != data)
4730 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4731 	}
4732 
4733 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4734 }
4735 
4736 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4737 						      bool enable)
4738 {
4739 	uint32_t def, data;
4740 
4741 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4742 
4743 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4744 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4745 		/* unset CGCG override */
4746 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4747 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4748 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4749 		else
4750 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4751 		/* update CGCG and CGLS override bits */
4752 		if (def != data)
4753 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4754 
4755 		/* enable cgcg FSM(0x0000363F) */
4756 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4757 
4758 		if (adev->asic_type == CHIP_ARCTURUS)
4759 			data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4760 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4761 		else
4762 			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4763 				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4764 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4765 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4766 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4767 		if (def != data)
4768 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4769 
4770 		/* set IDLE_POLL_COUNT(0x00900100) */
4771 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4772 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4773 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4774 		if (def != data)
4775 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4776 	} else {
4777 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4778 		/* reset CGCG/CGLS bits */
4779 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4780 		/* disable cgcg and cgls in FSM */
4781 		if (def != data)
4782 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4783 	}
4784 
4785 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4786 }
4787 
4788 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4789 					    bool enable)
4790 {
4791 	if (enable) {
4792 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4793 		 * ===  MGCG + MGLS ===
4794 		 */
4795 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4796 		/* ===  CGCG /CGLS for GFX 3D Only === */
4797 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4798 		/* ===  CGCG + CGLS === */
4799 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4800 	} else {
4801 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4802 		 * ===  CGCG + CGLS ===
4803 		 */
4804 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4805 		/* ===  CGCG /CGLS for GFX 3D Only === */
4806 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4807 		/* ===  MGCG + MGLS === */
4808 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4809 	}
4810 	return 0;
4811 }
4812 
4813 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4814 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4815 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4816 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4817 	.init = gfx_v9_0_rlc_init,
4818 	.get_csb_size = gfx_v9_0_get_csb_size,
4819 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4820 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4821 	.resume = gfx_v9_0_rlc_resume,
4822 	.stop = gfx_v9_0_rlc_stop,
4823 	.reset = gfx_v9_0_rlc_reset,
4824 	.start = gfx_v9_0_rlc_start
4825 };
4826 
4827 static int gfx_v9_0_set_powergating_state(void *handle,
4828 					  enum amd_powergating_state state)
4829 {
4830 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4831 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4832 
4833 	switch (adev->asic_type) {
4834 	case CHIP_RAVEN:
4835 	case CHIP_RENOIR:
4836 		if (!enable) {
4837 			amdgpu_gfx_off_ctrl(adev, false);
4838 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4839 		}
4840 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4841 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4842 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4843 		} else {
4844 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4845 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4846 		}
4847 
4848 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4849 			gfx_v9_0_enable_cp_power_gating(adev, true);
4850 		else
4851 			gfx_v9_0_enable_cp_power_gating(adev, false);
4852 
4853 		/* update gfx cgpg state */
4854 		if (is_support_sw_smu(adev) && enable)
4855 			smu_set_gfx_cgpg(&adev->smu, enable);
4856 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4857 
4858 		/* update mgcg state */
4859 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4860 
4861 		if (enable)
4862 			amdgpu_gfx_off_ctrl(adev, true);
4863 		break;
4864 	case CHIP_VEGA12:
4865 		if (!enable) {
4866 			amdgpu_gfx_off_ctrl(adev, false);
4867 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4868 		} else {
4869 			amdgpu_gfx_off_ctrl(adev, true);
4870 		}
4871 		break;
4872 	default:
4873 		break;
4874 	}
4875 
4876 	return 0;
4877 }
4878 
4879 static int gfx_v9_0_set_clockgating_state(void *handle,
4880 					  enum amd_clockgating_state state)
4881 {
4882 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883 
4884 	if (amdgpu_sriov_vf(adev))
4885 		return 0;
4886 
4887 	switch (adev->asic_type) {
4888 	case CHIP_VEGA10:
4889 	case CHIP_VEGA12:
4890 	case CHIP_VEGA20:
4891 	case CHIP_RAVEN:
4892 	case CHIP_ARCTURUS:
4893 	case CHIP_RENOIR:
4894 		gfx_v9_0_update_gfx_clock_gating(adev,
4895 						 state == AMD_CG_STATE_GATE ? true : false);
4896 		break;
4897 	default:
4898 		break;
4899 	}
4900 	return 0;
4901 }
4902 
4903 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4904 {
4905 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4906 	int data;
4907 
4908 	if (amdgpu_sriov_vf(adev))
4909 		*flags = 0;
4910 
4911 	/* AMD_CG_SUPPORT_GFX_MGCG */
4912 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4913 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4914 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4915 
4916 	/* AMD_CG_SUPPORT_GFX_CGCG */
4917 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4918 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4919 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4920 
4921 	/* AMD_CG_SUPPORT_GFX_CGLS */
4922 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4923 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4924 
4925 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4926 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4927 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4928 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4929 
4930 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4931 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4932 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4933 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4934 
4935 	if (adev->asic_type != CHIP_ARCTURUS) {
4936 		/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4937 		data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4938 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4939 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4940 
4941 		/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4942 		if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4943 			*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4944 	}
4945 }
4946 
4947 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4948 {
4949 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4950 }
4951 
4952 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4953 {
4954 	struct amdgpu_device *adev = ring->adev;
4955 	u64 wptr;
4956 
4957 	/* XXX check if swapping is necessary on BE */
4958 	if (ring->use_doorbell) {
4959 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4960 	} else {
4961 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4962 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4963 	}
4964 
4965 	return wptr;
4966 }
4967 
4968 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4969 {
4970 	struct amdgpu_device *adev = ring->adev;
4971 
4972 	if (ring->use_doorbell) {
4973 		/* XXX check if swapping is necessary on BE */
4974 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4975 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4976 	} else {
4977 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4978 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4979 	}
4980 }
4981 
4982 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4983 {
4984 	struct amdgpu_device *adev = ring->adev;
4985 	u32 ref_and_mask, reg_mem_engine;
4986 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4987 
4988 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4989 		switch (ring->me) {
4990 		case 1:
4991 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4992 			break;
4993 		case 2:
4994 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4995 			break;
4996 		default:
4997 			return;
4998 		}
4999 		reg_mem_engine = 0;
5000 	} else {
5001 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5002 		reg_mem_engine = 1; /* pfp */
5003 	}
5004 
5005 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5006 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
5007 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
5008 			      ref_and_mask, ref_and_mask, 0x20);
5009 }
5010 
5011 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5012 					struct amdgpu_job *job,
5013 					struct amdgpu_ib *ib,
5014 					uint32_t flags)
5015 {
5016 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5017 	u32 header, control = 0;
5018 
5019 	if (ib->flags & AMDGPU_IB_FLAG_CE)
5020 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5021 	else
5022 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5023 
5024 	control |= ib->length_dw | (vmid << 24);
5025 
5026 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5027 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5028 
5029 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5030 			gfx_v9_0_ring_emit_de_meta(ring);
5031 	}
5032 
5033 	amdgpu_ring_write(ring, header);
5034 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5035 	amdgpu_ring_write(ring,
5036 #ifdef __BIG_ENDIAN
5037 		(2 << 0) |
5038 #endif
5039 		lower_32_bits(ib->gpu_addr));
5040 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5041 	amdgpu_ring_write(ring, control);
5042 }
5043 
5044 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5045 					  struct amdgpu_job *job,
5046 					  struct amdgpu_ib *ib,
5047 					  uint32_t flags)
5048 {
5049 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5050 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5051 
5052 	/* Currently, there is a high possibility to get wave ID mismatch
5053 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5054 	 * different wave IDs than the GDS expects. This situation happens
5055 	 * randomly when at least 5 compute pipes use GDS ordered append.
5056 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5057 	 * Those are probably bugs somewhere else in the kernel driver.
5058 	 *
5059 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5060 	 * GDS to 0 for this ring (me/pipe).
5061 	 */
5062 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5063 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5064 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5065 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5066 	}
5067 
5068 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5069 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5070 	amdgpu_ring_write(ring,
5071 #ifdef __BIG_ENDIAN
5072 				(2 << 0) |
5073 #endif
5074 				lower_32_bits(ib->gpu_addr));
5075 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5076 	amdgpu_ring_write(ring, control);
5077 }
5078 
5079 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5080 				     u64 seq, unsigned flags)
5081 {
5082 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5083 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5084 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5085 
5086 	/* RELEASE_MEM - flush caches, send int */
5087 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5088 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5089 					       EOP_TC_NC_ACTION_EN) :
5090 					      (EOP_TCL1_ACTION_EN |
5091 					       EOP_TC_ACTION_EN |
5092 					       EOP_TC_WB_ACTION_EN |
5093 					       EOP_TC_MD_ACTION_EN)) |
5094 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5095 				 EVENT_INDEX(5)));
5096 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5097 
5098 	/*
5099 	 * the address should be Qword aligned if 64bit write, Dword
5100 	 * aligned if only send 32bit data low (discard data high)
5101 	 */
5102 	if (write64bit)
5103 		BUG_ON(addr & 0x7);
5104 	else
5105 		BUG_ON(addr & 0x3);
5106 	amdgpu_ring_write(ring, lower_32_bits(addr));
5107 	amdgpu_ring_write(ring, upper_32_bits(addr));
5108 	amdgpu_ring_write(ring, lower_32_bits(seq));
5109 	amdgpu_ring_write(ring, upper_32_bits(seq));
5110 	amdgpu_ring_write(ring, 0);
5111 }
5112 
5113 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5114 {
5115 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5116 	uint32_t seq = ring->fence_drv.sync_seq;
5117 	uint64_t addr = ring->fence_drv.gpu_addr;
5118 
5119 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5120 			      lower_32_bits(addr), upper_32_bits(addr),
5121 			      seq, 0xffffffff, 4);
5122 }
5123 
5124 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5125 					unsigned vmid, uint64_t pd_addr)
5126 {
5127 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5128 
5129 	/* compute doesn't have PFP */
5130 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5131 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5132 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5133 		amdgpu_ring_write(ring, 0x0);
5134 	}
5135 }
5136 
5137 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5138 {
5139 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5140 }
5141 
5142 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5143 {
5144 	u64 wptr;
5145 
5146 	/* XXX check if swapping is necessary on BE */
5147 	if (ring->use_doorbell)
5148 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5149 	else
5150 		BUG();
5151 	return wptr;
5152 }
5153 
5154 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5155 					   bool acquire)
5156 {
5157 	struct amdgpu_device *adev = ring->adev;
5158 	int pipe_num, tmp, reg;
5159 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5160 
5161 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5162 
5163 	/* first me only has 2 entries, GFX and HP3D */
5164 	if (ring->me > 0)
5165 		pipe_num -= 2;
5166 
5167 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5168 	tmp = RREG32(reg);
5169 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5170 	WREG32(reg, tmp);
5171 }
5172 
5173 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5174 					    struct amdgpu_ring *ring,
5175 					    bool acquire)
5176 {
5177 	int i, pipe;
5178 	bool reserve;
5179 	struct amdgpu_ring *iring;
5180 
5181 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
5182 	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5183 	if (acquire)
5184 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5185 	else
5186 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5187 
5188 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5189 		/* Clear all reservations - everyone reacquires all resources */
5190 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5191 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5192 						       true);
5193 
5194 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5195 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5196 						       true);
5197 	} else {
5198 		/* Lower all pipes without a current reservation */
5199 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5200 			iring = &adev->gfx.gfx_ring[i];
5201 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5202 							   iring->me,
5203 							   iring->pipe,
5204 							   0);
5205 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5206 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5207 		}
5208 
5209 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5210 			iring = &adev->gfx.compute_ring[i];
5211 			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5212 							   iring->me,
5213 							   iring->pipe,
5214 							   0);
5215 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5216 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5217 		}
5218 	}
5219 
5220 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5221 }
5222 
5223 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5224 				      struct amdgpu_ring *ring,
5225 				      bool acquire)
5226 {
5227 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5228 	uint32_t queue_priority = acquire ? 0xf : 0x0;
5229 
5230 	mutex_lock(&adev->srbm_mutex);
5231 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5232 
5233 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5234 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5235 
5236 	soc15_grbm_select(adev, 0, 0, 0, 0);
5237 	mutex_unlock(&adev->srbm_mutex);
5238 }
5239 
5240 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5241 					       enum drm_sched_priority priority)
5242 {
5243 	struct amdgpu_device *adev = ring->adev;
5244 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5245 
5246 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5247 		return;
5248 
5249 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5250 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5251 }
5252 
5253 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5254 {
5255 	struct amdgpu_device *adev = ring->adev;
5256 
5257 	/* XXX check if swapping is necessary on BE */
5258 	if (ring->use_doorbell) {
5259 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5260 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5261 	} else{
5262 		BUG(); /* only DOORBELL method supported on gfx9 now */
5263 	}
5264 }
5265 
5266 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5267 					 u64 seq, unsigned int flags)
5268 {
5269 	struct amdgpu_device *adev = ring->adev;
5270 
5271 	/* we only allocate 32bit for each seq wb address */
5272 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5273 
5274 	/* write fence seq to the "addr" */
5275 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5276 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5277 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5278 	amdgpu_ring_write(ring, lower_32_bits(addr));
5279 	amdgpu_ring_write(ring, upper_32_bits(addr));
5280 	amdgpu_ring_write(ring, lower_32_bits(seq));
5281 
5282 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5283 		/* set register to trigger INT */
5284 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5285 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5286 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5287 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5288 		amdgpu_ring_write(ring, 0);
5289 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5290 	}
5291 }
5292 
5293 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5294 {
5295 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5296 	amdgpu_ring_write(ring, 0);
5297 }
5298 
5299 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5300 {
5301 	struct v9_ce_ib_state ce_payload = {0};
5302 	uint64_t csa_addr;
5303 	int cnt;
5304 
5305 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5306 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5307 
5308 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5309 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5310 				 WRITE_DATA_DST_SEL(8) |
5311 				 WR_CONFIRM) |
5312 				 WRITE_DATA_CACHE_POLICY(0));
5313 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5314 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5315 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5316 }
5317 
5318 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5319 {
5320 	struct v9_de_ib_state de_payload = {0};
5321 	uint64_t csa_addr, gds_addr;
5322 	int cnt;
5323 
5324 	csa_addr = amdgpu_csa_vaddr(ring->adev);
5325 	gds_addr = csa_addr + 4096;
5326 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5327 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5328 
5329 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5330 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5331 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5332 				 WRITE_DATA_DST_SEL(8) |
5333 				 WR_CONFIRM) |
5334 				 WRITE_DATA_CACHE_POLICY(0));
5335 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5336 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5337 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5338 }
5339 
5340 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5341 {
5342 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5343 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5344 }
5345 
5346 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5347 {
5348 	uint32_t dw2 = 0;
5349 
5350 	if (amdgpu_sriov_vf(ring->adev))
5351 		gfx_v9_0_ring_emit_ce_meta(ring);
5352 
5353 	gfx_v9_0_ring_emit_tmz(ring, true);
5354 
5355 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5356 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5357 		/* set load_global_config & load_global_uconfig */
5358 		dw2 |= 0x8001;
5359 		/* set load_cs_sh_regs */
5360 		dw2 |= 0x01000000;
5361 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5362 		dw2 |= 0x10002;
5363 
5364 		/* set load_ce_ram if preamble presented */
5365 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5366 			dw2 |= 0x10000000;
5367 	} else {
5368 		/* still load_ce_ram if this is the first time preamble presented
5369 		 * although there is no context switch happens.
5370 		 */
5371 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5372 			dw2 |= 0x10000000;
5373 	}
5374 
5375 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5376 	amdgpu_ring_write(ring, dw2);
5377 	amdgpu_ring_write(ring, 0);
5378 }
5379 
5380 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5381 {
5382 	unsigned ret;
5383 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5384 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5385 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5386 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5387 	ret = ring->wptr & ring->buf_mask;
5388 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5389 	return ret;
5390 }
5391 
5392 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5393 {
5394 	unsigned cur;
5395 	BUG_ON(offset > ring->buf_mask);
5396 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
5397 
5398 	cur = (ring->wptr & ring->buf_mask) - 1;
5399 	if (likely(cur > offset))
5400 		ring->ring[offset] = cur - offset;
5401 	else
5402 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5403 }
5404 
5405 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5406 {
5407 	struct amdgpu_device *adev = ring->adev;
5408 
5409 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5410 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5411 				(5 << 8) |	/* dst: memory */
5412 				(1 << 20));	/* write confirm */
5413 	amdgpu_ring_write(ring, reg);
5414 	amdgpu_ring_write(ring, 0);
5415 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5416 				adev->virt.reg_val_offs * 4));
5417 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5418 				adev->virt.reg_val_offs * 4));
5419 }
5420 
5421 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5422 				    uint32_t val)
5423 {
5424 	uint32_t cmd = 0;
5425 
5426 	switch (ring->funcs->type) {
5427 	case AMDGPU_RING_TYPE_GFX:
5428 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5429 		break;
5430 	case AMDGPU_RING_TYPE_KIQ:
5431 		cmd = (1 << 16); /* no inc addr */
5432 		break;
5433 	default:
5434 		cmd = WR_CONFIRM;
5435 		break;
5436 	}
5437 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438 	amdgpu_ring_write(ring, cmd);
5439 	amdgpu_ring_write(ring, reg);
5440 	amdgpu_ring_write(ring, 0);
5441 	amdgpu_ring_write(ring, val);
5442 }
5443 
5444 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5445 					uint32_t val, uint32_t mask)
5446 {
5447 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5448 }
5449 
5450 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5451 						  uint32_t reg0, uint32_t reg1,
5452 						  uint32_t ref, uint32_t mask)
5453 {
5454 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5455 	struct amdgpu_device *adev = ring->adev;
5456 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5457 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5458 
5459 	if (fw_version_ok)
5460 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5461 				      ref, mask, 0x20);
5462 	else
5463 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5464 							   ref, mask);
5465 }
5466 
5467 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5468 {
5469 	struct amdgpu_device *adev = ring->adev;
5470 	uint32_t value = 0;
5471 
5472 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5473 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5474 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5475 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5476 	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5477 }
5478 
5479 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5480 						 enum amdgpu_interrupt_state state)
5481 {
5482 	switch (state) {
5483 	case AMDGPU_IRQ_STATE_DISABLE:
5484 	case AMDGPU_IRQ_STATE_ENABLE:
5485 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5486 			       TIME_STAMP_INT_ENABLE,
5487 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5488 		break;
5489 	default:
5490 		break;
5491 	}
5492 }
5493 
5494 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5495 						     int me, int pipe,
5496 						     enum amdgpu_interrupt_state state)
5497 {
5498 	u32 mec_int_cntl, mec_int_cntl_reg;
5499 
5500 	/*
5501 	 * amdgpu controls only the first MEC. That's why this function only
5502 	 * handles the setting of interrupts for this specific MEC. All other
5503 	 * pipes' interrupts are set by amdkfd.
5504 	 */
5505 
5506 	if (me == 1) {
5507 		switch (pipe) {
5508 		case 0:
5509 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5510 			break;
5511 		case 1:
5512 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5513 			break;
5514 		case 2:
5515 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5516 			break;
5517 		case 3:
5518 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5519 			break;
5520 		default:
5521 			DRM_DEBUG("invalid pipe %d\n", pipe);
5522 			return;
5523 		}
5524 	} else {
5525 		DRM_DEBUG("invalid me %d\n", me);
5526 		return;
5527 	}
5528 
5529 	switch (state) {
5530 	case AMDGPU_IRQ_STATE_DISABLE:
5531 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5532 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5533 					     TIME_STAMP_INT_ENABLE, 0);
5534 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5535 		break;
5536 	case AMDGPU_IRQ_STATE_ENABLE:
5537 		mec_int_cntl = RREG32(mec_int_cntl_reg);
5538 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5539 					     TIME_STAMP_INT_ENABLE, 1);
5540 		WREG32(mec_int_cntl_reg, mec_int_cntl);
5541 		break;
5542 	default:
5543 		break;
5544 	}
5545 }
5546 
5547 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5548 					     struct amdgpu_irq_src *source,
5549 					     unsigned type,
5550 					     enum amdgpu_interrupt_state state)
5551 {
5552 	switch (state) {
5553 	case AMDGPU_IRQ_STATE_DISABLE:
5554 	case AMDGPU_IRQ_STATE_ENABLE:
5555 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5556 			       PRIV_REG_INT_ENABLE,
5557 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5558 		break;
5559 	default:
5560 		break;
5561 	}
5562 
5563 	return 0;
5564 }
5565 
5566 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5567 					      struct amdgpu_irq_src *source,
5568 					      unsigned type,
5569 					      enum amdgpu_interrupt_state state)
5570 {
5571 	switch (state) {
5572 	case AMDGPU_IRQ_STATE_DISABLE:
5573 	case AMDGPU_IRQ_STATE_ENABLE:
5574 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5575 			       PRIV_INSTR_INT_ENABLE,
5576 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5577 	default:
5578 		break;
5579 	}
5580 
5581 	return 0;
5582 }
5583 
5584 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
5585 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5586 			CP_ECC_ERROR_INT_ENABLE, 1)
5587 
5588 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
5589 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5590 			CP_ECC_ERROR_INT_ENABLE, 0)
5591 
5592 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5593 					      struct amdgpu_irq_src *source,
5594 					      unsigned type,
5595 					      enum amdgpu_interrupt_state state)
5596 {
5597 	switch (state) {
5598 	case AMDGPU_IRQ_STATE_DISABLE:
5599 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5600 				CP_ECC_ERROR_INT_ENABLE, 0);
5601 		DISABLE_ECC_ON_ME_PIPE(1, 0);
5602 		DISABLE_ECC_ON_ME_PIPE(1, 1);
5603 		DISABLE_ECC_ON_ME_PIPE(1, 2);
5604 		DISABLE_ECC_ON_ME_PIPE(1, 3);
5605 		break;
5606 
5607 	case AMDGPU_IRQ_STATE_ENABLE:
5608 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5609 				CP_ECC_ERROR_INT_ENABLE, 1);
5610 		ENABLE_ECC_ON_ME_PIPE(1, 0);
5611 		ENABLE_ECC_ON_ME_PIPE(1, 1);
5612 		ENABLE_ECC_ON_ME_PIPE(1, 2);
5613 		ENABLE_ECC_ON_ME_PIPE(1, 3);
5614 		break;
5615 	default:
5616 		break;
5617 	}
5618 
5619 	return 0;
5620 }
5621 
5622 
5623 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5624 					    struct amdgpu_irq_src *src,
5625 					    unsigned type,
5626 					    enum amdgpu_interrupt_state state)
5627 {
5628 	switch (type) {
5629 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5630 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5631 		break;
5632 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5633 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5634 		break;
5635 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5636 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5637 		break;
5638 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5639 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5640 		break;
5641 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5642 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5643 		break;
5644 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5645 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5646 		break;
5647 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5648 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5649 		break;
5650 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5651 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5652 		break;
5653 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5654 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5655 		break;
5656 	default:
5657 		break;
5658 	}
5659 	return 0;
5660 }
5661 
5662 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5663 			    struct amdgpu_irq_src *source,
5664 			    struct amdgpu_iv_entry *entry)
5665 {
5666 	int i;
5667 	u8 me_id, pipe_id, queue_id;
5668 	struct amdgpu_ring *ring;
5669 
5670 	DRM_DEBUG("IH: CP EOP\n");
5671 	me_id = (entry->ring_id & 0x0c) >> 2;
5672 	pipe_id = (entry->ring_id & 0x03) >> 0;
5673 	queue_id = (entry->ring_id & 0x70) >> 4;
5674 
5675 	switch (me_id) {
5676 	case 0:
5677 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5678 		break;
5679 	case 1:
5680 	case 2:
5681 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5682 			ring = &adev->gfx.compute_ring[i];
5683 			/* Per-queue interrupt is supported for MEC starting from VI.
5684 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5685 			  */
5686 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5687 				amdgpu_fence_process(ring);
5688 		}
5689 		break;
5690 	}
5691 	return 0;
5692 }
5693 
5694 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5695 			   struct amdgpu_iv_entry *entry)
5696 {
5697 	u8 me_id, pipe_id, queue_id;
5698 	struct amdgpu_ring *ring;
5699 	int i;
5700 
5701 	me_id = (entry->ring_id & 0x0c) >> 2;
5702 	pipe_id = (entry->ring_id & 0x03) >> 0;
5703 	queue_id = (entry->ring_id & 0x70) >> 4;
5704 
5705 	switch (me_id) {
5706 	case 0:
5707 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5708 		break;
5709 	case 1:
5710 	case 2:
5711 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5712 			ring = &adev->gfx.compute_ring[i];
5713 			if (ring->me == me_id && ring->pipe == pipe_id &&
5714 			    ring->queue == queue_id)
5715 				drm_sched_fault(&ring->sched);
5716 		}
5717 		break;
5718 	}
5719 }
5720 
5721 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5722 				 struct amdgpu_irq_src *source,
5723 				 struct amdgpu_iv_entry *entry)
5724 {
5725 	DRM_ERROR("Illegal register access in command stream\n");
5726 	gfx_v9_0_fault(adev, entry);
5727 	return 0;
5728 }
5729 
5730 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5731 				  struct amdgpu_irq_src *source,
5732 				  struct amdgpu_iv_entry *entry)
5733 {
5734 	DRM_ERROR("Illegal instruction in command stream\n");
5735 	gfx_v9_0_fault(adev, entry);
5736 	return 0;
5737 }
5738 
5739 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5740 		struct ras_err_data *err_data,
5741 		struct amdgpu_iv_entry *entry)
5742 {
5743 	/* TODO ue will trigger an interrupt. */
5744 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5745 	if (adev->gfx.funcs->query_ras_error_count)
5746 		adev->gfx.funcs->query_ras_error_count(adev, err_data);
5747 	amdgpu_ras_reset_gpu(adev, 0);
5748 	return AMDGPU_RAS_SUCCESS;
5749 }
5750 
5751 static const struct {
5752 	const char *name;
5753 	uint32_t ip;
5754 	uint32_t inst;
5755 	uint32_t seg;
5756 	uint32_t reg_offset;
5757 	uint32_t per_se_instance;
5758 	int32_t num_instance;
5759 	uint32_t sec_count_mask;
5760 	uint32_t ded_count_mask;
5761 } gfx_ras_edc_regs[] = {
5762 	{ "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5763 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5764 	  REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5765 	{ "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5766 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5767 	  REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5768 	{ "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5769 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5770 	{ "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5771 	  REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5772 	{ "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5773 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5774 	  REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5775 	{ "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5776 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5777 	{ "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5778 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5779 	  REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5780 	{ "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5781 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5782 	  REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5783 	{ "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5784 	  REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5785 	{ "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5786 	  REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5787 	{ "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5788 	  REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5789 	{ "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5790 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5791 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5792 	{ "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5793 	  REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5794 	{ "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5795 	  0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5796 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5797 	{ "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5798 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5799 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5800 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5801 	{ "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5802 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5803 	  REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5804 	{ "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5805 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5806 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5807 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5808 	{ "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5809 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5810 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5811 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5812 	{ "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5813 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5814 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5815 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5816 	{ "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5817 	  SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5818 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5819 	  REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5820 	{ "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5821 	  REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5822 	{ "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5823 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5824 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5825 	{ "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5826 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5827 	{ "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5828 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5829 	{ "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5830 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5831 	{ "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5832 	  REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5833 	{ "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5834 	  REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5835 	{ "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5836 	  REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5837 	{ "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5838 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5839 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5840 	{ "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5841 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5842 	  REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5843 	{ "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5844 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5845 	  REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5846 	{ "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5847 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5848 	  REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5849 	{ "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5850 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5851 	  REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5852 	{ "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5853 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5854 	{ "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5855 	  REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5856 	{ "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5857 	  REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5858 	{ "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5859 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5860 	{ "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5861 	  REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5862 	{ "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5863 	  REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5864 	{ "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5865 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5866 	{ "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5867 	  REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5868 	{ "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5869 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5870 	{ "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5871 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5872 	  0 },
5873 	{ "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5874 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5875 	{ "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5876 	  0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5877 	  0 },
5878 	{ "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5879 	  16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5880 	{ "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5881 	  REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5882 	{ "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5883 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5884 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5885 	{ "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5886 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5887 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5888 	{ "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5889 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5890 	{ "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5891 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5892 	{ "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5893 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5894 	{ "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5895 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5896 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5897 	{ "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5898 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5899 	  REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5900 	{ "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5901 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5902 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5903 	{ "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5904 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5905 	  REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5906 	{ "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5907 	  REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5908 	{ "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5909 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5910 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5911 	{ "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5912 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5913 	  REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5914 	{ "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5915 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5916 	  REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5917 	{ "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5918 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5919 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5920 	{ "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5921 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5922 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5923 	{ "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5924 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5925 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5926 	{ "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5927 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5928 	  REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5929 	{ "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5930 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5931 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5932 	{ "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5933 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5934 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5935 	{ "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5936 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5937 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5938 	{ "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5939 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5940 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5941 	{ "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5942 	  1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5943 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5944 	{ "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5945 	  6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5946 	  REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5947 	{ "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5948 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5949 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5950 	{ "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5951 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5952 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5953 	{ "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5954 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5955 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5956 	{ "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5957 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5958 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5959 	{ "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5960 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5961 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5962 	  0 },
5963 	{ "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5964 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5965 	{ "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5966 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5967 	{ "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5968 	  6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5969 	{ "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5970 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5971 	  REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5972 	{ "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5973 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5974 	  REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5975 	{ "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5976 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5977 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5978 	{ "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5979 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5980 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5981 	{ "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5982 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5983 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5984 	{ "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5985 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5986 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5987 	{ "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5988 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5989 	  REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5990 	  0 },
5991 	{ "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5992 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5993 	{ "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5994 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5995 	{ "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5996 	  6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5997 	{ "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5998 	  SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5999 	  REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
6000 	{ "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6001 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6002 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
6003 	{ "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6004 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6005 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
6006 	{ "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6007 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6008 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
6009 	{ "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6010 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6011 	  REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
6012 	{ "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6013 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6014 	  REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6015 	{ "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6016 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6017 	{ "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6018 	  REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6019 	{ "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6020 	  REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6021 	{ "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6022 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6023 	{ "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6024 	  REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6025 	{ "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6026 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6027 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6028 	{ "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6029 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6030 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6031 	{ "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6032 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6033 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6034 	{ "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6035 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6036 	{ "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6037 	  REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6038 	{ "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6039 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6040 	{ "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6041 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6042 	{ "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6043 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6044 	{ "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6045 	  REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6046 };
6047 
6048 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6049 				     void *inject_if)
6050 {
6051 	struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6052 	int ret;
6053 	struct ta_ras_trigger_error_input block_info = { 0 };
6054 
6055 	if (adev->asic_type != CHIP_VEGA20)
6056 		return -EINVAL;
6057 
6058 	if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6059 		return -EINVAL;
6060 
6061 	if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6062 		return -EPERM;
6063 
6064 	if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6065 	      info->head.type)) {
6066 		DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6067 			ras_gfx_subblocks[info->head.sub_block_index].name,
6068 			info->head.type);
6069 		return -EPERM;
6070 	}
6071 
6072 	if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6073 	      info->head.type)) {
6074 		DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6075 			ras_gfx_subblocks[info->head.sub_block_index].name,
6076 			info->head.type);
6077 		return -EPERM;
6078 	}
6079 
6080 	block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6081 	block_info.sub_block_index =
6082 		ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6083 	block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6084 	block_info.address = info->address;
6085 	block_info.value = info->value;
6086 
6087 	mutex_lock(&adev->grbm_idx_mutex);
6088 	ret = psp_ras_trigger_error(&adev->psp, &block_info);
6089 	mutex_unlock(&adev->grbm_idx_mutex);
6090 
6091 	return ret;
6092 }
6093 
6094 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6095 					  void *ras_error_status)
6096 {
6097 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6098 	uint32_t sec_count, ded_count;
6099 	uint32_t i;
6100 	uint32_t reg_value;
6101 	uint32_t se_id, instance_id;
6102 
6103 	if (adev->asic_type != CHIP_VEGA20)
6104 		return -EINVAL;
6105 
6106 	err_data->ue_count = 0;
6107 	err_data->ce_count = 0;
6108 
6109 	mutex_lock(&adev->grbm_idx_mutex);
6110 	for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6111 		for (instance_id = 0; instance_id < 256; instance_id++) {
6112 			for (i = 0;
6113 			     i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6114 			     i++) {
6115 				if (se_id != 0 &&
6116 				    !gfx_ras_edc_regs[i].per_se_instance)
6117 					continue;
6118 				if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6119 					continue;
6120 
6121 				gfx_v9_0_select_se_sh(adev, se_id, 0,
6122 						      instance_id);
6123 
6124 				reg_value = RREG32(
6125 					adev->reg_offset[gfx_ras_edc_regs[i].ip]
6126 							[gfx_ras_edc_regs[i].inst]
6127 							[gfx_ras_edc_regs[i].seg] +
6128 					gfx_ras_edc_regs[i].reg_offset);
6129 				sec_count = reg_value &
6130 					    gfx_ras_edc_regs[i].sec_count_mask;
6131 				ded_count = reg_value &
6132 					    gfx_ras_edc_regs[i].ded_count_mask;
6133 				if (sec_count) {
6134 					DRM_INFO(
6135 						"Instance[%d][%d]: SubBlock %s, SEC %d\n",
6136 						se_id, instance_id,
6137 						gfx_ras_edc_regs[i].name,
6138 						sec_count);
6139 					err_data->ce_count++;
6140 				}
6141 
6142 				if (ded_count) {
6143 					DRM_INFO(
6144 						"Instance[%d][%d]: SubBlock %s, DED %d\n",
6145 						se_id, instance_id,
6146 						gfx_ras_edc_regs[i].name,
6147 						ded_count);
6148 					err_data->ue_count++;
6149 				}
6150 			}
6151 		}
6152 	}
6153 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6154 	mutex_unlock(&adev->grbm_idx_mutex);
6155 
6156 	return 0;
6157 }
6158 
6159 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6160 				  struct amdgpu_irq_src *source,
6161 				  struct amdgpu_iv_entry *entry)
6162 {
6163 	struct ras_common_if *ras_if = adev->gfx.ras_if;
6164 	struct ras_dispatch_if ih_data = {
6165 		.entry = entry,
6166 	};
6167 
6168 	if (!ras_if)
6169 		return 0;
6170 
6171 	ih_data.head = *ras_if;
6172 
6173 	DRM_ERROR("CP ECC ERROR IRQ\n");
6174 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6175 	return 0;
6176 }
6177 
6178 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6179 	.name = "gfx_v9_0",
6180 	.early_init = gfx_v9_0_early_init,
6181 	.late_init = gfx_v9_0_late_init,
6182 	.sw_init = gfx_v9_0_sw_init,
6183 	.sw_fini = gfx_v9_0_sw_fini,
6184 	.hw_init = gfx_v9_0_hw_init,
6185 	.hw_fini = gfx_v9_0_hw_fini,
6186 	.suspend = gfx_v9_0_suspend,
6187 	.resume = gfx_v9_0_resume,
6188 	.is_idle = gfx_v9_0_is_idle,
6189 	.wait_for_idle = gfx_v9_0_wait_for_idle,
6190 	.soft_reset = gfx_v9_0_soft_reset,
6191 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
6192 	.set_powergating_state = gfx_v9_0_set_powergating_state,
6193 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
6194 };
6195 
6196 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6197 	.type = AMDGPU_RING_TYPE_GFX,
6198 	.align_mask = 0xff,
6199 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6200 	.support_64bit_ptrs = true,
6201 	.vmhub = AMDGPU_GFXHUB_0,
6202 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6203 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6204 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6205 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
6206 		5 +  /* COND_EXEC */
6207 		7 +  /* PIPELINE_SYNC */
6208 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6209 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6210 		2 + /* VM_FLUSH */
6211 		8 +  /* FENCE for VM_FLUSH */
6212 		20 + /* GDS switch */
6213 		4 + /* double SWITCH_BUFFER,
6214 		       the first COND_EXEC jump to the place just
6215 			   prior to this double SWITCH_BUFFER  */
6216 		5 + /* COND_EXEC */
6217 		7 +	 /*	HDP_flush */
6218 		4 +	 /*	VGT_flush */
6219 		14 + /*	CE_META */
6220 		31 + /*	DE_META */
6221 		3 + /* CNTX_CTRL */
6222 		5 + /* HDP_INVL */
6223 		8 + 8 + /* FENCE x2 */
6224 		2, /* SWITCH_BUFFER */
6225 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
6226 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6227 	.emit_fence = gfx_v9_0_ring_emit_fence,
6228 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6229 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6230 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6231 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6232 	.test_ring = gfx_v9_0_ring_test_ring,
6233 	.test_ib = gfx_v9_0_ring_test_ib,
6234 	.insert_nop = amdgpu_ring_insert_nop,
6235 	.pad_ib = amdgpu_ring_generic_pad_ib,
6236 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
6237 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6238 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6239 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6240 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
6241 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6242 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6243 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6244 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
6245 };
6246 
6247 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6248 	.type = AMDGPU_RING_TYPE_COMPUTE,
6249 	.align_mask = 0xff,
6250 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6251 	.support_64bit_ptrs = true,
6252 	.vmhub = AMDGPU_GFXHUB_0,
6253 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6254 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6255 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6256 	.emit_frame_size =
6257 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6258 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6259 		5 + /* hdp invalidate */
6260 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6261 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6262 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6263 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6264 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6265 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6266 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
6267 	.emit_fence = gfx_v9_0_ring_emit_fence,
6268 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6269 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6270 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6271 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6272 	.test_ring = gfx_v9_0_ring_test_ring,
6273 	.test_ib = gfx_v9_0_ring_test_ib,
6274 	.insert_nop = amdgpu_ring_insert_nop,
6275 	.pad_ib = amdgpu_ring_generic_pad_ib,
6276 	.set_priority = gfx_v9_0_ring_set_priority_compute,
6277 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6278 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6279 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6280 };
6281 
6282 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6283 	.type = AMDGPU_RING_TYPE_KIQ,
6284 	.align_mask = 0xff,
6285 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6286 	.support_64bit_ptrs = true,
6287 	.vmhub = AMDGPU_GFXHUB_0,
6288 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
6289 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
6290 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
6291 	.emit_frame_size =
6292 		20 + /* gfx_v9_0_ring_emit_gds_switch */
6293 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
6294 		5 + /* hdp invalidate */
6295 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6296 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6297 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6298 		2 + /* gfx_v9_0_ring_emit_vm_flush */
6299 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6300 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
6301 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6302 	.test_ring = gfx_v9_0_ring_test_ring,
6303 	.insert_nop = amdgpu_ring_insert_nop,
6304 	.pad_ib = amdgpu_ring_generic_pad_ib,
6305 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
6306 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
6307 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6308 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6309 };
6310 
6311 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6312 {
6313 	int i;
6314 
6315 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6316 
6317 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6318 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6319 
6320 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6321 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6322 }
6323 
6324 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6325 	.set = gfx_v9_0_set_eop_interrupt_state,
6326 	.process = gfx_v9_0_eop_irq,
6327 };
6328 
6329 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6330 	.set = gfx_v9_0_set_priv_reg_fault_state,
6331 	.process = gfx_v9_0_priv_reg_irq,
6332 };
6333 
6334 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6335 	.set = gfx_v9_0_set_priv_inst_fault_state,
6336 	.process = gfx_v9_0_priv_inst_irq,
6337 };
6338 
6339 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6340 	.set = gfx_v9_0_set_cp_ecc_error_state,
6341 	.process = gfx_v9_0_cp_ecc_error_irq,
6342 };
6343 
6344 
6345 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6346 {
6347 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6348 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6349 
6350 	adev->gfx.priv_reg_irq.num_types = 1;
6351 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6352 
6353 	adev->gfx.priv_inst_irq.num_types = 1;
6354 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6355 
6356 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6357 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6358 }
6359 
6360 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6361 {
6362 	switch (adev->asic_type) {
6363 	case CHIP_VEGA10:
6364 	case CHIP_VEGA12:
6365 	case CHIP_VEGA20:
6366 	case CHIP_RAVEN:
6367 	case CHIP_ARCTURUS:
6368 	case CHIP_RENOIR:
6369 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6370 		break;
6371 	default:
6372 		break;
6373 	}
6374 }
6375 
6376 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6377 {
6378 	/* init asci gds info */
6379 	switch (adev->asic_type) {
6380 	case CHIP_VEGA10:
6381 	case CHIP_VEGA12:
6382 	case CHIP_VEGA20:
6383 		adev->gds.gds_size = 0x10000;
6384 		break;
6385 	case CHIP_RAVEN:
6386 	case CHIP_ARCTURUS:
6387 		adev->gds.gds_size = 0x1000;
6388 		break;
6389 	default:
6390 		adev->gds.gds_size = 0x10000;
6391 		break;
6392 	}
6393 
6394 	switch (adev->asic_type) {
6395 	case CHIP_VEGA10:
6396 	case CHIP_VEGA20:
6397 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6398 		break;
6399 	case CHIP_VEGA12:
6400 		adev->gds.gds_compute_max_wave_id = 0x27f;
6401 		break;
6402 	case CHIP_RAVEN:
6403 		if (adev->rev_id >= 0x8)
6404 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6405 		else
6406 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6407 		break;
6408 	case CHIP_ARCTURUS:
6409 		adev->gds.gds_compute_max_wave_id = 0xfff;
6410 		break;
6411 	default:
6412 		/* this really depends on the chip */
6413 		adev->gds.gds_compute_max_wave_id = 0x7ff;
6414 		break;
6415 	}
6416 
6417 	adev->gds.gws_size = 64;
6418 	adev->gds.oa_size = 16;
6419 }
6420 
6421 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6422 						 u32 bitmap)
6423 {
6424 	u32 data;
6425 
6426 	if (!bitmap)
6427 		return;
6428 
6429 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6430 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6431 
6432 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6433 }
6434 
6435 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6436 {
6437 	u32 data, mask;
6438 
6439 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6440 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6441 
6442 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6443 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6444 
6445 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6446 
6447 	return (~data) & mask;
6448 }
6449 
6450 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6451 				 struct amdgpu_cu_info *cu_info)
6452 {
6453 	int i, j, k, counter, active_cu_number = 0;
6454 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6455 	unsigned disable_masks[4 * 4];
6456 
6457 	if (!adev || !cu_info)
6458 		return -EINVAL;
6459 
6460 	/*
6461 	 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6462 	 */
6463 	if (adev->gfx.config.max_shader_engines *
6464 		adev->gfx.config.max_sh_per_se > 16)
6465 		return -EINVAL;
6466 
6467 	amdgpu_gfx_parse_disable_cu(disable_masks,
6468 				    adev->gfx.config.max_shader_engines,
6469 				    adev->gfx.config.max_sh_per_se);
6470 
6471 	mutex_lock(&adev->grbm_idx_mutex);
6472 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6473 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6474 			mask = 1;
6475 			ao_bitmap = 0;
6476 			counter = 0;
6477 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6478 			gfx_v9_0_set_user_cu_inactive_bitmap(
6479 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6480 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6481 
6482 			/*
6483 			 * The bitmap(and ao_cu_bitmap) in cu_info structure is
6484 			 * 4x4 size array, and it's usually suitable for Vega
6485 			 * ASICs which has 4*2 SE/SH layout.
6486 			 * But for Arcturus, SE/SH layout is changed to 8*1.
6487 			 * To mostly reduce the impact, we make it compatible
6488 			 * with current bitmap array as below:
6489 			 *    SE4,SH0 --> bitmap[0][1]
6490 			 *    SE5,SH0 --> bitmap[1][1]
6491 			 *    SE6,SH0 --> bitmap[2][1]
6492 			 *    SE7,SH0 --> bitmap[3][1]
6493 			 */
6494 			cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6495 
6496 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6497 				if (bitmap & mask) {
6498 					if (counter < adev->gfx.config.max_cu_per_sh)
6499 						ao_bitmap |= mask;
6500 					counter ++;
6501 				}
6502 				mask <<= 1;
6503 			}
6504 			active_cu_number += counter;
6505 			if (i < 2 && j < 2)
6506 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6507 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6508 		}
6509 	}
6510 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6511 	mutex_unlock(&adev->grbm_idx_mutex);
6512 
6513 	cu_info->number = active_cu_number;
6514 	cu_info->ao_cu_mask = ao_cu_mask;
6515 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6516 
6517 	return 0;
6518 }
6519 
6520 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6521 {
6522 	.type = AMD_IP_BLOCK_TYPE_GFX,
6523 	.major = 9,
6524 	.minor = 0,
6525 	.rev = 0,
6526 	.funcs = &gfx_v9_0_ip_funcs,
6527 };
6528