xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "imu_v11_0.h"
33 #include "soc21.h"
34 #include "nvd.h"
35 
36 #include "gc/gc_11_0_0_offset.h"
37 #include "gc/gc_11_0_0_sh_mask.h"
38 #include "smuio/smuio_13_0_6_offset.h"
39 #include "smuio/smuio_13_0_6_sh_mask.h"
40 #include "navi10_enum.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
42 
43 #include "soc15.h"
44 #include "clearstate_gfx11.h"
45 #include "v11_structs.h"
46 #include "gfx_v11_0.h"
47 #include "gfx_v11_0_cleaner_shader.h"
48 #include "gfx_v11_0_3.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 #include "mes_userqueue.h"
52 #include "amdgpu_userq_fence.h"
53 
54 #define GFX11_NUM_GFX_RINGS		1
55 #define GFX11_MEC_HPD_SIZE	2048
56 
57 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
59 
60 #define regCGTT_WD_CLK_CTRL		0x5086
61 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
64 #define regPC_CONFIG_CNTL_1		0x194d
65 #define regPC_CONFIG_CNTL_1_BASE_IDX	1
66 
67 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0               0x0030
68 #define regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0_BASE_IDX      1
69 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0               0x0031
70 #define regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0_BASE_IDX      1
71 
72 #define regCP_GFX_MQD_CONTROL_DEFAULT                                             0x00000100
73 #define regCP_GFX_HQD_VMID_DEFAULT                                                0x00000000
74 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT                                      0x00000000
75 #define regCP_GFX_HQD_QUANTUM_DEFAULT                                             0x00000a01
76 #define regCP_GFX_HQD_CNTL_DEFAULT                                                0x00a00000
77 #define regCP_RB_DOORBELL_CONTROL_DEFAULT                                         0x00000000
78 #define regCP_GFX_HQD_RPTR_DEFAULT                                                0x00000000
79 
80 #define regCP_HQD_EOP_CONTROL_DEFAULT                                             0x00000006
81 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
82 #define regCP_MQD_CONTROL_DEFAULT                                                 0x00000100
83 #define regCP_HQD_PQ_CONTROL_DEFAULT                                              0x00308509
84 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
85 #define regCP_HQD_PQ_RPTR_DEFAULT                                                 0x00000000
86 #define regCP_HQD_PERSISTENT_STATE_DEFAULT                                        0x0be05501
87 #define regCP_HQD_IB_CONTROL_DEFAULT                                              0x00300000
88 
89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
91 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
92 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
93 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
94 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
95 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
96 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
98 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
99 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
102 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
104 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
106 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
107 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
110 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
111 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
112 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
114 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
115 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
116 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
118 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
119 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
120 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
122 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
123 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
124 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
126 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
127 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin");
130 MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin");
131 MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/gc_11_5_6_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/gc_11_5_6_me.bin");
134 MODULE_FIRMWARE("amdgpu/gc_11_5_6_mec.bin");
135 MODULE_FIRMWARE("amdgpu/gc_11_5_6_rlc.bin");
136 
137 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
138 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
139 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
140 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
141 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
142 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
143 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
144 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
145 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
146 	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
147 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
148 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
149 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
150 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
151 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
152 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
153 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
154 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
155 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
156 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
157 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
158 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
159 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
160 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
161 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
162 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
163 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
164 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
165 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
166 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
167 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
168 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
169 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
170 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
171 	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
172 	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
173 	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
174 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
175 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
176 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
177 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
178 	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
179 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
180 	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
181 	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
182 	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
183 	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
184 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
185 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
186 	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
187 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
188 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
189 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
190 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
191 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
192 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
193 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
194 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
195 	/* cp header registers */
196 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
197 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
198 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
199 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
200 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
201 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
202 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
203 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
204 	/* SE status registers */
205 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
206 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
207 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
208 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
209 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
210 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
211 };
212 
213 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
214 	/* compute registers */
215 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
216 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
217 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
218 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
219 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
220 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
221 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
222 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
223 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
224 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
225 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
226 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
227 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
228 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
229 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
230 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
231 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
232 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
233 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
234 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
235 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
236 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
237 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
238 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
239 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
240 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
241 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
242 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
243 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
244 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
245 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
246 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
247 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
248 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
249 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
250 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
251 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
252 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
253 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
254 	/* cp header registers */
255 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
256 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
257 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
258 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
259 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
260 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
261 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
262 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
263 };
264 
265 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
266 	/* gfx queue registers */
267 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
268 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
269 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
270 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
271 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
272 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
273 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
274 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
275 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
276 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
277 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
278 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
279 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
280 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
281 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
282 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
283 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
284 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
285 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
286 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
287 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
288 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
289 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
290 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
291 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
292 	/* cp header registers */
293 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
294 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
295 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
296 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
297 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
298 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
299 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
300 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
301 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
302 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
303 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
304 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
305 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
306 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
307 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
308 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
309 };
310 
311 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
312 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
313 };
314 
315 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
316 {
317 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
318 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
319 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
320 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
321 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
322 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
323 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
324 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
325 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
326 };
327 
328 #define DEFAULT_SH_MEM_CONFIG \
329 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
330 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
331 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
332 
333 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
334 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
335 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
336 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
337 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
338 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
339 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
340 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
341                                  struct amdgpu_cu_info *cu_info);
342 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
343 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
344 				   u32 sh_num, u32 instance, int xcc_id);
345 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
346 
347 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
348 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
349 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
350 				     uint32_t val);
351 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
352 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
353 					   uint16_t pasid, uint32_t flush_type,
354 					   bool all_hub, uint8_t dst_sel);
355 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
356 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
357 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
358 				      bool enable);
359 
360 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
361 {
362 	struct amdgpu_device *adev = kiq_ring->adev;
363 	u64 shader_mc_addr;
364 
365 	/* Cleaner shader MC address */
366 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
367 
368 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
369 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
370 			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
371 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
372 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
373 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
374 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
375 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
376 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
377 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
378 }
379 
380 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
381 				 struct amdgpu_ring *ring)
382 {
383 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
384 	uint64_t wptr_addr = ring->wptr_gpu_addr;
385 	uint32_t me = 0, eng_sel = 0;
386 
387 	switch (ring->funcs->type) {
388 	case AMDGPU_RING_TYPE_COMPUTE:
389 		me = 1;
390 		eng_sel = 0;
391 		break;
392 	case AMDGPU_RING_TYPE_GFX:
393 		me = 0;
394 		eng_sel = 4;
395 		break;
396 	case AMDGPU_RING_TYPE_MES:
397 		me = 2;
398 		eng_sel = 5;
399 		break;
400 	default:
401 		WARN_ON(1);
402 	}
403 
404 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
405 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
406 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
407 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
408 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
409 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
410 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
411 			  PACKET3_MAP_QUEUES_ME((me)) |
412 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
413 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
414 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
415 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
416 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
417 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
418 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
419 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
420 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
421 }
422 
423 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
424 				   struct amdgpu_ring *ring,
425 				   enum amdgpu_unmap_queues_action action,
426 				   u64 gpu_addr, u64 seq)
427 {
428 	struct amdgpu_device *adev = kiq_ring->adev;
429 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
430 
431 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
432 		amdgpu_mes_unmap_legacy_queue(adev, ring, action,
433 					      gpu_addr, seq, 0);
434 		return;
435 	}
436 
437 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
438 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
439 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
440 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
441 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
442 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
443 	amdgpu_ring_write(kiq_ring,
444 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
445 
446 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
447 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
448 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
449 		amdgpu_ring_write(kiq_ring, seq);
450 	} else {
451 		amdgpu_ring_write(kiq_ring, 0);
452 		amdgpu_ring_write(kiq_ring, 0);
453 		amdgpu_ring_write(kiq_ring, 0);
454 	}
455 }
456 
457 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
458 				   struct amdgpu_ring *ring,
459 				   u64 addr,
460 				   u64 seq)
461 {
462 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
463 
464 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
465 	amdgpu_ring_write(kiq_ring,
466 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
467 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
468 			  PACKET3_QUERY_STATUS_COMMAND(2));
469 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
470 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
471 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
472 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
473 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
474 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
475 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
476 }
477 
478 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
479 				uint16_t pasid, uint32_t flush_type,
480 				bool all_hub)
481 {
482 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
483 }
484 
485 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
486 	.kiq_set_resources = gfx11_kiq_set_resources,
487 	.kiq_map_queues = gfx11_kiq_map_queues,
488 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
489 	.kiq_query_status = gfx11_kiq_query_status,
490 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
491 	.set_resources_size = 8,
492 	.map_queues_size = 7,
493 	.unmap_queues_size = 6,
494 	.query_status_size = 7,
495 	.invalidate_tlbs_size = 2,
496 };
497 
498 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
499 {
500 	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
501 }
502 
503 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
504 {
505 	if (amdgpu_sriov_vf(adev))
506 		return;
507 
508 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
509 	case IP_VERSION(11, 0, 1):
510 	case IP_VERSION(11, 0, 4):
511 		soc15_program_register_sequence(adev,
512 						golden_settings_gc_11_0_1,
513 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
514 		break;
515 	default:
516 		break;
517 	}
518 	soc15_program_register_sequence(adev,
519 					golden_settings_gc_11_0,
520 					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
521 
522 }
523 
524 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
525 				       bool wc, uint32_t reg, uint32_t val)
526 {
527 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
528 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
529 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
530 	amdgpu_ring_write(ring, reg);
531 	amdgpu_ring_write(ring, 0);
532 	amdgpu_ring_write(ring, val);
533 }
534 
535 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
536 				  int mem_space, int opt, uint32_t addr0,
537 				  uint32_t addr1, uint32_t ref, uint32_t mask,
538 				  uint32_t inv)
539 {
540 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
541 	amdgpu_ring_write(ring,
542 			  /* memory (1) or register (0) */
543 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
544 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
545 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
546 			   WAIT_REG_MEM_ENGINE(eng_sel)));
547 
548 	if (mem_space)
549 		BUG_ON(addr0 & 0x3); /* Dword align */
550 	amdgpu_ring_write(ring, addr0);
551 	amdgpu_ring_write(ring, addr1);
552 	amdgpu_ring_write(ring, ref);
553 	amdgpu_ring_write(ring, mask);
554 	amdgpu_ring_write(ring, inv); /* poll interval */
555 }
556 
557 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
558 {
559 	/* Header itself is a NOP packet */
560 	if (num_nop == 1) {
561 		amdgpu_ring_write(ring, ring->funcs->nop);
562 		return;
563 	}
564 
565 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
566 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
567 
568 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
569 	amdgpu_ring_insert_nop(ring, num_nop - 1);
570 }
571 
572 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
573 {
574 	struct amdgpu_device *adev = ring->adev;
575 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
576 	uint32_t tmp = 0;
577 	unsigned i;
578 	int r;
579 
580 	WREG32(scratch, 0xCAFEDEAD);
581 	r = amdgpu_ring_alloc(ring, 5);
582 	if (r) {
583 		drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n",
584 			ring->idx, r);
585 		return r;
586 	}
587 
588 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
589 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
590 	} else {
591 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
592 		amdgpu_ring_write(ring, scratch -
593 				  PACKET3_SET_UCONFIG_REG_START);
594 		amdgpu_ring_write(ring, 0xDEADBEEF);
595 	}
596 	amdgpu_ring_commit(ring);
597 
598 	for (i = 0; i < adev->usec_timeout; i++) {
599 		tmp = RREG32(scratch);
600 		if (tmp == 0xDEADBEEF)
601 			break;
602 		if (amdgpu_emu_mode == 1)
603 			msleep(1);
604 		else
605 			udelay(1);
606 	}
607 
608 	if (i >= adev->usec_timeout)
609 		r = -ETIMEDOUT;
610 	return r;
611 }
612 
613 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
614 {
615 	struct amdgpu_device *adev = ring->adev;
616 	struct amdgpu_ib ib;
617 	struct dma_fence *f = NULL;
618 	unsigned index;
619 	uint64_t gpu_addr;
620 	uint32_t *cpu_ptr;
621 	long r;
622 
623 	/* MES KIQ fw hasn't indirect buffer support for now */
624 	if (adev->enable_mes_kiq &&
625 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
626 		return 0;
627 
628 	memset(&ib, 0, sizeof(ib));
629 
630 	r = amdgpu_device_wb_get(adev, &index);
631 	if (r)
632 		return r;
633 
634 	gpu_addr = adev->wb.gpu_addr + (index * 4);
635 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
636 	cpu_ptr = &adev->wb.wb[index];
637 
638 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
639 	if (r) {
640 		drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
641 		goto err1;
642 	}
643 
644 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
645 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
646 	ib.ptr[2] = lower_32_bits(gpu_addr);
647 	ib.ptr[3] = upper_32_bits(gpu_addr);
648 	ib.ptr[4] = 0xDEADBEEF;
649 	ib.length_dw = 5;
650 
651 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
652 	if (r)
653 		goto err2;
654 
655 	r = dma_fence_wait_timeout(f, false, timeout);
656 	if (r == 0) {
657 		r = -ETIMEDOUT;
658 		goto err2;
659 	} else if (r < 0) {
660 		goto err2;
661 	}
662 
663 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
664 		r = 0;
665 	else
666 		r = -EINVAL;
667 err2:
668 	amdgpu_ib_free(&ib, NULL);
669 	dma_fence_put(f);
670 err1:
671 	amdgpu_device_wb_free(adev, index);
672 	return r;
673 }
674 
675 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
676 {
677 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
678 	amdgpu_ucode_release(&adev->gfx.me_fw);
679 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
680 	amdgpu_ucode_release(&adev->gfx.mec_fw);
681 
682 	kfree(adev->gfx.rlc.register_list_format);
683 }
684 
685 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
686 {
687 	const struct psp_firmware_header_v1_0 *toc_hdr;
688 	int err = 0;
689 
690 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
691 				   AMDGPU_UCODE_REQUIRED,
692 				   "amdgpu/%s_toc.bin", ucode_prefix);
693 	if (err)
694 		goto out;
695 
696 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
697 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
698 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
699 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
700 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
701 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
702 	return 0;
703 out:
704 	amdgpu_ucode_release(&adev->psp.toc_fw);
705 	return err;
706 }
707 
708 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
709 {
710 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
711 	case IP_VERSION(11, 0, 0):
712 	case IP_VERSION(11, 0, 2):
713 	case IP_VERSION(11, 0, 3):
714 		if ((adev->gfx.me_fw_version >= 1505) &&
715 		    (adev->gfx.pfp_fw_version >= 1600) &&
716 		    (adev->gfx.mec_fw_version >= 512)) {
717 			if (amdgpu_sriov_vf(adev))
718 				adev->gfx.cp_gfx_shadow = true;
719 			else
720 				adev->gfx.cp_gfx_shadow = false;
721 		}
722 		break;
723 	default:
724 		adev->gfx.cp_gfx_shadow = false;
725 		break;
726 	}
727 }
728 
729 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
730 {
731 	char ucode_prefix[25];
732 	int err;
733 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
734 	uint16_t version_major;
735 	uint16_t version_minor;
736 
737 	DRM_DEBUG("\n");
738 
739 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
740 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
741 				   AMDGPU_UCODE_REQUIRED,
742 				   "amdgpu/%s_pfp.bin", ucode_prefix);
743 	if (err)
744 		goto out;
745 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
746 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
747 				(union amdgpu_firmware_header *)
748 				adev->gfx.pfp_fw->data, 2, 0);
749 	if (adev->gfx.rs64_enable) {
750 		dev_info(adev->dev, "CP RS64 enable\n");
751 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
752 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
753 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
754 	} else {
755 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
756 	}
757 
758 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
759 				   AMDGPU_UCODE_REQUIRED,
760 				   "amdgpu/%s_me.bin", ucode_prefix);
761 	if (err)
762 		goto out;
763 	if (adev->gfx.rs64_enable) {
764 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
765 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
766 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
767 	} else {
768 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
769 	}
770 
771 	if (!amdgpu_sriov_vf(adev)) {
772 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
773 		    adev->pdev->revision == 0xCE)
774 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
775 						   AMDGPU_UCODE_REQUIRED,
776 						   "amdgpu/gc_11_0_0_rlc_1.bin");
777 		else if (amdgpu_is_kicker_fw(adev))
778 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
779 						   AMDGPU_UCODE_REQUIRED,
780 						   "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
781 		else
782 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
783 						   AMDGPU_UCODE_REQUIRED,
784 						   "amdgpu/%s_rlc.bin", ucode_prefix);
785 		if (err)
786 			goto out;
787 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
788 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
789 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
790 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
791 		if (err)
792 			goto out;
793 	}
794 
795 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
796 				   AMDGPU_UCODE_REQUIRED,
797 				   "amdgpu/%s_mec.bin", ucode_prefix);
798 	if (err)
799 		goto out;
800 	if (adev->gfx.rs64_enable) {
801 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
802 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
803 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
804 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
805 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
806 	} else {
807 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
808 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
809 	}
810 
811 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
812 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
813 
814 	/* only one MEC for gfx 11.0.0. */
815 	adev->gfx.mec2_fw = NULL;
816 
817 	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
818 
819 	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
820 		err = adev->gfx.imu.funcs->init_microcode(adev);
821 		if (err)
822 			DRM_ERROR("Failed to init imu firmware!\n");
823 		return err;
824 	}
825 
826 out:
827 	if (err) {
828 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
829 		amdgpu_ucode_release(&adev->gfx.me_fw);
830 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
831 		amdgpu_ucode_release(&adev->gfx.mec_fw);
832 	}
833 
834 	return err;
835 }
836 
837 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
838 {
839 	u32 count = 0;
840 	const struct cs_section_def *sect = NULL;
841 	const struct cs_extent_def *ext = NULL;
842 
843 	/* begin clear state */
844 	count += 2;
845 	/* context control state */
846 	count += 3;
847 
848 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
849 		for (ext = sect->section; ext->extent != NULL; ++ext) {
850 			if (sect->id == SECT_CONTEXT)
851 				count += 2 + ext->reg_count;
852 			else
853 				return 0;
854 		}
855 	}
856 
857 	/* set PA_SC_TILE_STEERING_OVERRIDE */
858 	count += 3;
859 	/* end clear state */
860 	count += 2;
861 	/* clear state */
862 	count += 2;
863 
864 	return count;
865 }
866 
867 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
868 {
869 	u32 count = 0;
870 	int ctx_reg_offset;
871 
872 	if (adev->gfx.rlc.cs_data == NULL)
873 		return;
874 	if (buffer == NULL)
875 		return;
876 
877 	count = amdgpu_gfx_csb_preamble_start(buffer);
878 	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
879 
880 	ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
881 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
882 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
883 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
884 
885 	amdgpu_gfx_csb_preamble_end(buffer, count);
886 }
887 
888 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
889 {
890 	/* clear state block */
891 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
892 			&adev->gfx.rlc.clear_state_gpu_addr,
893 			(void **)&adev->gfx.rlc.cs_ptr);
894 
895 	/* jump table block */
896 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
897 			&adev->gfx.rlc.cp_table_gpu_addr,
898 			(void **)&adev->gfx.rlc.cp_table_ptr);
899 }
900 
901 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
902 {
903 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
904 
905 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
906 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
907 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
908 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
909 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
910 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
911 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
912 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
913 	adev->gfx.rlc.rlcg_reg_access_supported = true;
914 }
915 
916 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
917 {
918 	const struct cs_section_def *cs_data;
919 	int r;
920 
921 	adev->gfx.rlc.cs_data = gfx11_cs_data;
922 
923 	cs_data = adev->gfx.rlc.cs_data;
924 
925 	if (cs_data) {
926 		/* init clear state block */
927 		r = amdgpu_gfx_rlc_init_csb(adev);
928 		if (r)
929 			return r;
930 	}
931 
932 	/* init spm vmid with 0xf */
933 	if (adev->gfx.rlc.funcs->update_spm_vmid)
934 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
935 
936 	return 0;
937 }
938 
939 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
940 {
941 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
942 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
943 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
944 }
945 
946 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
947 {
948 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
949 
950 	amdgpu_gfx_graphics_queue_acquire(adev);
951 }
952 
953 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
954 {
955 	int r;
956 	u32 *hpd;
957 	size_t mec_hpd_size;
958 
959 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
960 
961 	/* take ownership of the relevant compute queues */
962 	amdgpu_gfx_compute_queue_acquire(adev);
963 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
964 
965 	if (mec_hpd_size) {
966 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
967 					      AMDGPU_GEM_DOMAIN_GTT,
968 					      &adev->gfx.mec.hpd_eop_obj,
969 					      &adev->gfx.mec.hpd_eop_gpu_addr,
970 					      (void **)&hpd);
971 		if (r) {
972 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
973 			gfx_v11_0_mec_fini(adev);
974 			return r;
975 		}
976 
977 		memset(hpd, 0, mec_hpd_size);
978 
979 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
980 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
981 	}
982 
983 	return 0;
984 }
985 
986 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
987 {
988 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
989 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
990 		(address << SQ_IND_INDEX__INDEX__SHIFT));
991 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
992 }
993 
994 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
995 			   uint32_t thread, uint32_t regno,
996 			   uint32_t num, uint32_t *out)
997 {
998 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
999 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1000 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1001 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
1002 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1003 	while (num--)
1004 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
1005 }
1006 
1007 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1008 {
1009 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
1010 	 * field when performing a select_se_sh so it should be
1011 	 * zero here */
1012 	WARN_ON(simd != 0);
1013 
1014 	/* type 3 wave data */
1015 	dst[(*no_fields)++] = 3;
1016 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
1017 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
1018 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
1019 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
1020 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
1021 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
1022 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
1023 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
1024 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
1025 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
1026 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
1027 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
1028 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
1029 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
1030 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
1031 }
1032 
1033 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1034 				     uint32_t wave, uint32_t start,
1035 				     uint32_t size, uint32_t *dst)
1036 {
1037 	WARN_ON(simd != 0);
1038 
1039 	wave_read_regs(
1040 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
1041 		dst);
1042 }
1043 
1044 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1045 				      uint32_t wave, uint32_t thread,
1046 				      uint32_t start, uint32_t size,
1047 				      uint32_t *dst)
1048 {
1049 	wave_read_regs(
1050 		adev, wave, thread,
1051 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1052 }
1053 
1054 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
1055 					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1056 {
1057 	soc21_grbm_select(adev, me, pipe, q, vm);
1058 }
1059 
1060 /* all sizes are in bytes */
1061 #define MQD_SHADOW_BASE_SIZE      73728
1062 #define MQD_SHADOW_BASE_ALIGNMENT 256
1063 #define MQD_FWWORKAREA_SIZE       484
1064 #define MQD_FWWORKAREA_ALIGNMENT  256
1065 
1066 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
1067 					 struct amdgpu_gfx_shadow_info *shadow_info)
1068 {
1069 	/* for gfx */
1070 	shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
1071 	shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
1072 	shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
1073 	shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
1074 	/* for compute */
1075 	shadow_info->eop_size = GFX11_MEC_HPD_SIZE;
1076 	shadow_info->eop_alignment = 256;
1077 }
1078 
1079 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
1080 					 struct amdgpu_gfx_shadow_info *shadow_info,
1081 					 bool skip_check)
1082 {
1083 	if (adev->gfx.cp_gfx_shadow || skip_check) {
1084 		gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
1085 		return 0;
1086 	} else {
1087 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
1088 		return -ENOTSUPP;
1089 	}
1090 }
1091 
1092 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
1093 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
1094 	.select_se_sh = &gfx_v11_0_select_se_sh,
1095 	.read_wave_data = &gfx_v11_0_read_wave_data,
1096 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
1097 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
1098 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1099 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1100 	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
1101 	.get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
1102 };
1103 
1104 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1105 {
1106 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1107 	case IP_VERSION(11, 0, 0):
1108 	case IP_VERSION(11, 0, 2):
1109 		adev->gfx.config.max_hw_contexts = 8;
1110 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1111 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1112 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1113 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1114 		break;
1115 	case IP_VERSION(11, 0, 3):
1116 		adev->gfx.ras = &gfx_v11_0_3_ras;
1117 		adev->gfx.config.max_hw_contexts = 8;
1118 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1119 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1120 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1121 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1122 		break;
1123 	case IP_VERSION(11, 0, 1):
1124 	case IP_VERSION(11, 0, 4):
1125 	case IP_VERSION(11, 5, 0):
1126 	case IP_VERSION(11, 5, 1):
1127 	case IP_VERSION(11, 5, 2):
1128 	case IP_VERSION(11, 5, 3):
1129 	case IP_VERSION(11, 5, 4):
1130 	case IP_VERSION(11, 5, 6):
1131 		adev->gfx.config.max_hw_contexts = 8;
1132 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1133 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1134 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1135 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1136 		break;
1137 	default:
1138 		BUG();
1139 		break;
1140 	}
1141 
1142 	return 0;
1143 }
1144 
1145 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1146 				   int me, int pipe, int queue)
1147 {
1148 	struct amdgpu_ring *ring;
1149 	unsigned int irq_type;
1150 	unsigned int hw_prio;
1151 
1152 	ring = &adev->gfx.gfx_ring[ring_id];
1153 
1154 	ring->me = me;
1155 	ring->pipe = pipe;
1156 	ring->queue = queue;
1157 
1158 	ring->ring_obj = NULL;
1159 	ring->use_doorbell = true;
1160 	if (adev->gfx.disable_kq) {
1161 		ring->no_scheduler = true;
1162 		ring->no_user_submission = true;
1163 	}
1164 
1165 	if (!ring_id)
1166 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1167 	else
1168 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1169 	ring->vm_hub = AMDGPU_GFXHUB(0);
1170 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1171 
1172 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1173 	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
1174 		AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1175 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1176 				hw_prio, NULL);
1177 }
1178 
1179 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1180 				       int mec, int pipe, int queue)
1181 {
1182 	int r;
1183 	unsigned irq_type;
1184 	struct amdgpu_ring *ring;
1185 	unsigned int hw_prio;
1186 
1187 	ring = &adev->gfx.compute_ring[ring_id];
1188 
1189 	/* mec0 is me1 */
1190 	ring->me = mec + 1;
1191 	ring->pipe = pipe;
1192 	ring->queue = queue;
1193 
1194 	ring->ring_obj = NULL;
1195 	ring->use_doorbell = true;
1196 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1197 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1198 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1199 	ring->vm_hub = AMDGPU_GFXHUB(0);
1200 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1201 
1202 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1203 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1204 		+ ring->pipe;
1205 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1206 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1207 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1208 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1209 			     hw_prio, NULL);
1210 	if (r)
1211 		return r;
1212 
1213 	return 0;
1214 }
1215 
1216 static struct {
1217 	SOC21_FIRMWARE_ID	id;
1218 	unsigned int		offset;
1219 	unsigned int		size;
1220 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1221 
1222 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1223 {
1224 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1225 
1226 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1227 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1228 		rlc_autoload_info[ucode->id].id = ucode->id;
1229 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1230 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1231 
1232 		ucode++;
1233 	}
1234 }
1235 
1236 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1237 {
1238 	uint32_t total_size = 0;
1239 	SOC21_FIRMWARE_ID id;
1240 
1241 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1242 
1243 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1244 		total_size += rlc_autoload_info[id].size;
1245 
1246 	/* In case the offset in rlc toc ucode is aligned */
1247 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1248 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1249 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1250 
1251 	return total_size;
1252 }
1253 
1254 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1255 {
1256 	int r;
1257 	uint32_t total_size;
1258 
1259 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1260 
1261 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1262 				      AMDGPU_GEM_DOMAIN_VRAM |
1263 				      AMDGPU_GEM_DOMAIN_GTT,
1264 				      &adev->gfx.rlc.rlc_autoload_bo,
1265 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
1266 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1267 
1268 	if (r) {
1269 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1270 		return r;
1271 	}
1272 
1273 	return 0;
1274 }
1275 
1276 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1277 					      SOC21_FIRMWARE_ID id,
1278 			    		      const void *fw_data,
1279 					      uint32_t fw_size,
1280 					      uint32_t *fw_autoload_mask)
1281 {
1282 	uint32_t toc_offset;
1283 	uint32_t toc_fw_size;
1284 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1285 
1286 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1287 		return;
1288 
1289 	toc_offset = rlc_autoload_info[id].offset;
1290 	toc_fw_size = rlc_autoload_info[id].size;
1291 
1292 	if (fw_size == 0)
1293 		fw_size = toc_fw_size;
1294 
1295 	if (fw_size > toc_fw_size)
1296 		fw_size = toc_fw_size;
1297 
1298 	memcpy(ptr + toc_offset, fw_data, fw_size);
1299 
1300 	if (fw_size < toc_fw_size)
1301 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1302 
1303 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1304 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1305 }
1306 
1307 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1308 							uint32_t *fw_autoload_mask)
1309 {
1310 	void *data;
1311 	uint32_t size;
1312 	uint64_t *toc_ptr;
1313 
1314 	*(uint64_t *)fw_autoload_mask |= 0x1;
1315 
1316 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1317 
1318 	data = adev->psp.toc.start_addr;
1319 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1320 
1321 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1322 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1323 
1324 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1325 					data, size, fw_autoload_mask);
1326 }
1327 
1328 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1329 							uint32_t *fw_autoload_mask)
1330 {
1331 	const __le32 *fw_data;
1332 	uint32_t fw_size;
1333 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1334 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1335 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1336 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1337 	uint16_t version_major, version_minor;
1338 
1339 	if (adev->gfx.rs64_enable) {
1340 		/* pfp ucode */
1341 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1342 			adev->gfx.pfp_fw->data;
1343 		/* instruction */
1344 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1345 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1346 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1347 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1348 						fw_data, fw_size, fw_autoload_mask);
1349 		/* data */
1350 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1351 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1352 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1353 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1354 						fw_data, fw_size, fw_autoload_mask);
1355 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1356 						fw_data, fw_size, fw_autoload_mask);
1357 		/* me ucode */
1358 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1359 			adev->gfx.me_fw->data;
1360 		/* instruction */
1361 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1362 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1363 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1364 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1365 						fw_data, fw_size, fw_autoload_mask);
1366 		/* data */
1367 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1368 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1369 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1370 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1371 						fw_data, fw_size, fw_autoload_mask);
1372 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1373 						fw_data, fw_size, fw_autoload_mask);
1374 		/* mec ucode */
1375 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1376 			adev->gfx.mec_fw->data;
1377 		/* instruction */
1378 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1379 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1380 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1381 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1382 						fw_data, fw_size, fw_autoload_mask);
1383 		/* data */
1384 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1385 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1386 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1387 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1388 						fw_data, fw_size, fw_autoload_mask);
1389 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1390 						fw_data, fw_size, fw_autoload_mask);
1391 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1392 						fw_data, fw_size, fw_autoload_mask);
1393 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1394 						fw_data, fw_size, fw_autoload_mask);
1395 	} else {
1396 		/* pfp ucode */
1397 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1398 			adev->gfx.pfp_fw->data;
1399 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1400 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1401 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1402 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1403 						fw_data, fw_size, fw_autoload_mask);
1404 
1405 		/* me ucode */
1406 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1407 			adev->gfx.me_fw->data;
1408 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1409 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1410 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1411 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1412 						fw_data, fw_size, fw_autoload_mask);
1413 
1414 		/* mec ucode */
1415 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1416 			adev->gfx.mec_fw->data;
1417 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1418 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1419 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1420 			cp_hdr->jt_size * 4;
1421 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1422 						fw_data, fw_size, fw_autoload_mask);
1423 	}
1424 
1425 	/* rlc ucode */
1426 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1427 		adev->gfx.rlc_fw->data;
1428 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1429 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1430 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1431 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1432 					fw_data, fw_size, fw_autoload_mask);
1433 
1434 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1435 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1436 	if (version_major == 2) {
1437 		if (version_minor >= 2) {
1438 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1439 
1440 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1441 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1442 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1443 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1444 					fw_data, fw_size, fw_autoload_mask);
1445 
1446 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1447 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1448 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1449 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1450 					fw_data, fw_size, fw_autoload_mask);
1451 		}
1452 	}
1453 }
1454 
1455 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1456 							uint32_t *fw_autoload_mask)
1457 {
1458 	const __le32 *fw_data;
1459 	uint32_t fw_size;
1460 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1461 
1462 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1463 		adev->sdma.instance[0].fw->data;
1464 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1465 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1466 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1467 
1468 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1469 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1470 
1471 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1472 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1473 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1474 
1475 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1476 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1477 }
1478 
1479 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1480 							uint32_t *fw_autoload_mask)
1481 {
1482 	const __le32 *fw_data;
1483 	unsigned fw_size;
1484 	const struct mes_firmware_header_v1_0 *mes_hdr;
1485 	int pipe, ucode_id, data_id;
1486 
1487 	for (pipe = 0; pipe < 2; pipe++) {
1488 		if (pipe==0) {
1489 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1490 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1491 		} else {
1492 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1493 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1494 		}
1495 
1496 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1497 			adev->mes.fw[pipe]->data;
1498 
1499 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1500 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1501 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1502 
1503 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1504 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1505 
1506 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1507 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1508 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1509 
1510 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1511 				data_id, fw_data, fw_size, fw_autoload_mask);
1512 	}
1513 }
1514 
1515 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1516 {
1517 	uint32_t rlc_g_offset, rlc_g_size;
1518 	uint64_t gpu_addr;
1519 	uint32_t autoload_fw_id[2];
1520 
1521 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1522 
1523 	/* RLC autoload sequence 2: copy ucode */
1524 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1525 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1526 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1527 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1528 
1529 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1530 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1531 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1532 
1533 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1534 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1535 
1536 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1537 
1538 	/* RLC autoload sequence 3: load IMU fw */
1539 	if (adev->gfx.imu.funcs->load_microcode)
1540 		adev->gfx.imu.funcs->load_microcode(adev);
1541 	/* RLC autoload sequence 4 init IMU fw */
1542 	if (adev->gfx.imu.funcs->setup_imu)
1543 		adev->gfx.imu.funcs->setup_imu(adev);
1544 	if (adev->gfx.imu.funcs->start_imu)
1545 		adev->gfx.imu.funcs->start_imu(adev);
1546 
1547 	/* RLC autoload sequence 5 disable gpa mode */
1548 	gfx_v11_0_disable_gpa_mode(adev);
1549 
1550 	return 0;
1551 }
1552 
1553 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
1554 {
1555 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
1556 	uint32_t *ptr;
1557 	uint32_t inst;
1558 
1559 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1560 	if (!ptr) {
1561 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1562 		adev->gfx.ip_dump_core = NULL;
1563 	} else {
1564 		adev->gfx.ip_dump_core = ptr;
1565 	}
1566 
1567 	/* Allocate memory for compute queue registers for all the instances */
1568 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
1569 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1570 		adev->gfx.mec.num_queue_per_pipe;
1571 
1572 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1573 	if (!ptr) {
1574 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1575 		adev->gfx.ip_dump_compute_queues = NULL;
1576 	} else {
1577 		adev->gfx.ip_dump_compute_queues = ptr;
1578 	}
1579 
1580 	/* Allocate memory for gfx queue registers for all the instances */
1581 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
1582 	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1583 		adev->gfx.me.num_queue_per_pipe;
1584 
1585 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1586 	if (!ptr) {
1587 		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1588 		adev->gfx.ip_dump_gfx_queues = NULL;
1589 	} else {
1590 		adev->gfx.ip_dump_gfx_queues = ptr;
1591 	}
1592 }
1593 
1594 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
1595 {
1596 	int i, j, k, r, ring_id;
1597 	int xcc_id = 0;
1598 	struct amdgpu_device *adev = ip_block->adev;
1599 	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
1600 
1601 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
1602 
1603 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1604 	case IP_VERSION(11, 0, 0):
1605 	case IP_VERSION(11, 0, 1):
1606 	case IP_VERSION(11, 0, 2):
1607 	case IP_VERSION(11, 0, 3):
1608 	case IP_VERSION(11, 0, 4):
1609 	case IP_VERSION(11, 5, 0):
1610 	case IP_VERSION(11, 5, 1):
1611 	case IP_VERSION(11, 5, 2):
1612 	case IP_VERSION(11, 5, 3):
1613 	case IP_VERSION(11, 5, 4):
1614 	case IP_VERSION(11, 5, 6):
1615 		adev->gfx.me.num_me = 1;
1616 		adev->gfx.me.num_pipe_per_me = 1;
1617 		adev->gfx.me.num_queue_per_pipe = 2;
1618 		adev->gfx.mec.num_mec = 1;
1619 		adev->gfx.mec.num_pipe_per_mec = 4;
1620 		adev->gfx.mec.num_queue_per_pipe = 4;
1621 		break;
1622 	default:
1623 		adev->gfx.me.num_me = 1;
1624 		adev->gfx.me.num_pipe_per_me = 1;
1625 		adev->gfx.me.num_queue_per_pipe = 1;
1626 		adev->gfx.mec.num_mec = 1;
1627 		adev->gfx.mec.num_pipe_per_mec = 4;
1628 		adev->gfx.mec.num_queue_per_pipe = 8;
1629 		break;
1630 	}
1631 
1632 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1633 	case IP_VERSION(11, 0, 0):
1634 	case IP_VERSION(11, 0, 2):
1635 	case IP_VERSION(11, 0, 3):
1636 		if (!adev->gfx.disable_uq &&
1637 		    adev->gfx.me_fw_version  >= 2420 &&
1638 		    adev->gfx.pfp_fw_version >= 2580 &&
1639 		    adev->gfx.mec_fw_version >= 2650 &&
1640 		    adev->mes.fw_version[0] >= 120) {
1641 			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
1642 			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
1643 		}
1644 		break;
1645 	case IP_VERSION(11, 0, 1):
1646 	case IP_VERSION(11, 0, 4):
1647 	case IP_VERSION(11, 5, 0):
1648 	case IP_VERSION(11, 5, 1):
1649 	case IP_VERSION(11, 5, 2):
1650 	case IP_VERSION(11, 5, 3):
1651 		/* add firmware version checks here */
1652 		if (0 && !adev->gfx.disable_uq) {
1653 			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
1654 			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
1655 		}
1656 		break;
1657 	default:
1658 		break;
1659 	}
1660 
1661 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1662 	case IP_VERSION(11, 0, 0):
1663 	case IP_VERSION(11, 0, 2):
1664 	case IP_VERSION(11, 0, 3):
1665 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1666 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1667 		if (adev->gfx.me_fw_version  >= 2280 &&
1668 		    adev->gfx.pfp_fw_version >= 2370 &&
1669 		    adev->gfx.mec_fw_version >= 2450  &&
1670 		    adev->mes.fw_version[0] >= 99) {
1671 			adev->gfx.enable_cleaner_shader = true;
1672 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1673 			if (r) {
1674 				adev->gfx.enable_cleaner_shader = false;
1675 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1676 			}
1677 		}
1678 		break;
1679 	case IP_VERSION(11, 0, 1):
1680 	case IP_VERSION(11, 0, 4):
1681 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1682 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1683 		if (adev->gfx.pfp_fw_version >= 102 &&
1684 		    adev->gfx.mec_fw_version >= 66 &&
1685 		    adev->mes.fw_version[0] >= 128) {
1686 			adev->gfx.enable_cleaner_shader = true;
1687 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1688 			if (r) {
1689 				adev->gfx.enable_cleaner_shader = false;
1690 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1691 			}
1692 		}
1693 		break;
1694 	case IP_VERSION(11, 5, 0):
1695 	case IP_VERSION(11, 5, 1):
1696 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1697 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1698 		if (adev->gfx.mec_fw_version >= 26 &&
1699 		    adev->mes.fw_version[0] >= 114) {
1700 			adev->gfx.enable_cleaner_shader = true;
1701 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1702 			if (r) {
1703 				adev->gfx.enable_cleaner_shader = false;
1704 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1705 			}
1706 		}
1707 		break;
1708 	case IP_VERSION(11, 5, 2):
1709 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1710 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1711 		if (adev->gfx.me_fw_version  >= 12 &&
1712 		    adev->gfx.pfp_fw_version >= 15 &&
1713 		    adev->gfx.mec_fw_version >= 15) {
1714 			adev->gfx.enable_cleaner_shader = true;
1715 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1716 			if (r) {
1717 				adev->gfx.enable_cleaner_shader = false;
1718 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1719 			}
1720 		}
1721 		break;
1722 	case IP_VERSION(11, 5, 3):
1723 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1724 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1725 		if (adev->gfx.me_fw_version  >= 7 &&
1726 		    adev->gfx.pfp_fw_version >= 8 &&
1727 		    adev->gfx.mec_fw_version >= 8) {
1728 			adev->gfx.enable_cleaner_shader = true;
1729 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1730 			if (r) {
1731 				adev->gfx.enable_cleaner_shader = false;
1732 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1733 			}
1734 		}
1735 		break;
1736 	case IP_VERSION(11, 5, 4):
1737 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1738 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1739 		if (adev->gfx.me_fw_version  >= 4 &&
1740 		    adev->gfx.pfp_fw_version >= 7 &&
1741 		    adev->gfx.mec_fw_version >= 5) {
1742 			adev->gfx.enable_cleaner_shader = true;
1743 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1744 			if (r) {
1745 				adev->gfx.enable_cleaner_shader = false;
1746 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1747 			}
1748 		}
1749 		break;
1750 	default:
1751 		adev->gfx.enable_cleaner_shader = false;
1752 		break;
1753 	}
1754 
1755 	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1756 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1757 	    amdgpu_sriov_is_pp_one_vf(adev))
1758 		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1759 
1760 	/* EOP Event */
1761 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1762 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1763 			      &adev->gfx.eop_irq);
1764 	if (r)
1765 		return r;
1766 
1767 	/* Bad opcode Event */
1768 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1769 			      GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1770 			      &adev->gfx.bad_op_irq);
1771 	if (r)
1772 		return r;
1773 
1774 	/* Privileged reg */
1775 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1776 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1777 			      &adev->gfx.priv_reg_irq);
1778 	if (r)
1779 		return r;
1780 
1781 	/* Privileged inst */
1782 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1783 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1784 			      &adev->gfx.priv_inst_irq);
1785 	if (r)
1786 		return r;
1787 
1788 	/* FED error */
1789 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1790 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1791 				  &adev->gfx.rlc_gc_fed_irq);
1792 	if (r)
1793 		return r;
1794 
1795 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1796 
1797 	gfx_v11_0_me_init(adev);
1798 
1799 	r = gfx_v11_0_rlc_init(adev);
1800 	if (r) {
1801 		DRM_ERROR("Failed to init rlc BOs!\n");
1802 		return r;
1803 	}
1804 
1805 	r = gfx_v11_0_mec_init(adev);
1806 	if (r) {
1807 		DRM_ERROR("Failed to init MEC BOs!\n");
1808 		return r;
1809 	}
1810 
1811 	if (adev->gfx.num_gfx_rings) {
1812 		ring_id = 0;
1813 		/* set up the gfx ring */
1814 		for (i = 0; i < adev->gfx.me.num_me; i++) {
1815 			for (j = 0; j < num_queue_per_pipe; j++) {
1816 				for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1817 					if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1818 						continue;
1819 
1820 					r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1821 								    i, k, j);
1822 					if (r)
1823 						return r;
1824 					ring_id++;
1825 				}
1826 			}
1827 		}
1828 	}
1829 
1830 	if (adev->gfx.num_compute_rings) {
1831 		ring_id = 0;
1832 		/* set up the compute queues - allocate horizontally across pipes */
1833 		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1834 			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1835 				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1836 					if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1837 									     k, j))
1838 						continue;
1839 
1840 					r = gfx_v11_0_compute_ring_init(adev, ring_id,
1841 									i, k, j);
1842 					if (r)
1843 						return r;
1844 
1845 					ring_id++;
1846 				}
1847 			}
1848 		}
1849 	}
1850 
1851 	adev->gfx.gfx_supported_reset =
1852 		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
1853 	adev->gfx.compute_supported_reset =
1854 		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
1855 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1856 	case IP_VERSION(11, 0, 0):
1857 	case IP_VERSION(11, 0, 2):
1858 	case IP_VERSION(11, 0, 3):
1859 		if ((adev->gfx.me_fw_version >= 2280) &&
1860 		    (adev->gfx.mec_fw_version >= 2410) &&
1861 		    !amdgpu_sriov_vf(adev) &&
1862 		    !adev->debug_disable_gpu_ring_reset) {
1863 			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1864 			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1865 		}
1866 		break;
1867 	default:
1868 		if (!amdgpu_sriov_vf(adev) &&
1869 		    !adev->debug_disable_gpu_ring_reset) {
1870 			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1871 			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1872 		}
1873 		break;
1874 	}
1875 
1876 	if (!adev->enable_mes_kiq) {
1877 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1878 		if (r) {
1879 			DRM_ERROR("Failed to init KIQ BOs!\n");
1880 			return r;
1881 		}
1882 
1883 		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1884 		if (r)
1885 			return r;
1886 	}
1887 
1888 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1889 	if (r)
1890 		return r;
1891 
1892 	/* allocate visible FB for rlc auto-loading fw */
1893 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1894 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1895 		if (r)
1896 			return r;
1897 	}
1898 
1899 	r = gfx_v11_0_gpu_early_init(adev);
1900 	if (r)
1901 		return r;
1902 
1903 	if (amdgpu_gfx_ras_sw_init(adev)) {
1904 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1905 		return -EINVAL;
1906 	}
1907 
1908 	gfx_v11_0_alloc_ip_dump(adev);
1909 
1910 	r = amdgpu_gfx_sysfs_init(adev);
1911 	if (r)
1912 		return r;
1913 
1914 	return 0;
1915 }
1916 
1917 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1918 {
1919 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1920 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1921 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1922 
1923 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1924 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1925 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1926 }
1927 
1928 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1929 {
1930 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1931 			      &adev->gfx.me.me_fw_gpu_addr,
1932 			      (void **)&adev->gfx.me.me_fw_ptr);
1933 
1934 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1935 			       &adev->gfx.me.me_fw_data_gpu_addr,
1936 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1937 }
1938 
1939 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1940 {
1941 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1942 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1943 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1944 }
1945 
1946 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
1947 {
1948 	int i;
1949 	struct amdgpu_device *adev = ip_block->adev;
1950 
1951 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1952 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1953 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1954 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1955 
1956 	amdgpu_gfx_mqd_sw_fini(adev, 0);
1957 
1958 	if (!adev->enable_mes_kiq) {
1959 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1960 		amdgpu_gfx_kiq_fini(adev, 0);
1961 	}
1962 
1963 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
1964 
1965 	gfx_v11_0_pfp_fini(adev);
1966 	gfx_v11_0_me_fini(adev);
1967 	gfx_v11_0_rlc_fini(adev);
1968 	gfx_v11_0_mec_fini(adev);
1969 
1970 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1971 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1972 
1973 	gfx_v11_0_free_microcode(adev);
1974 
1975 	amdgpu_gfx_sysfs_fini(adev);
1976 
1977 	kfree(adev->gfx.ip_dump_core);
1978 	kfree(adev->gfx.ip_dump_compute_queues);
1979 	kfree(adev->gfx.ip_dump_gfx_queues);
1980 
1981 	return 0;
1982 }
1983 
1984 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1985 				   u32 sh_num, u32 instance, int xcc_id)
1986 {
1987 	u32 data;
1988 
1989 	if (instance == 0xffffffff)
1990 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1991 				     INSTANCE_BROADCAST_WRITES, 1);
1992 	else
1993 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1994 				     instance);
1995 
1996 	if (se_num == 0xffffffff)
1997 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1998 				     1);
1999 	else
2000 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2001 
2002 	if (sh_num == 0xffffffff)
2003 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
2004 				     1);
2005 	else
2006 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
2007 
2008 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
2009 }
2010 
2011 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
2012 {
2013 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
2014 
2015 	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
2016 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
2017 					   CC_GC_SA_UNIT_DISABLE,
2018 					   SA_DISABLE);
2019 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
2020 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
2021 						 GC_USER_SA_UNIT_DISABLE,
2022 						 SA_DISABLE);
2023 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
2024 					    adev->gfx.config.max_shader_engines);
2025 
2026 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
2027 }
2028 
2029 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2030 {
2031 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
2032 	u32 rb_mask;
2033 
2034 	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
2035 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
2036 					    CC_RB_BACKEND_DISABLE,
2037 					    BACKEND_DISABLE);
2038 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
2039 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
2040 						 GC_USER_RB_BACKEND_DISABLE,
2041 						 BACKEND_DISABLE);
2042 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
2043 					    adev->gfx.config.max_shader_engines);
2044 
2045 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
2046 }
2047 
2048 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
2049 {
2050 	u32 rb_bitmap_per_sa;
2051 	u32 rb_bitmap_width_per_sa;
2052 	u32 max_sa;
2053 	u32 active_sa_bitmap;
2054 	u32 global_active_rb_bitmap;
2055 	u32 active_rb_bitmap = 0;
2056 	u32 i;
2057 
2058 	/* query sa bitmap from SA_UNIT_DISABLE registers */
2059 	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
2060 	/* query rb bitmap from RB_BACKEND_DISABLE registers */
2061 	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
2062 
2063 	/* generate active rb bitmap according to active sa bitmap */
2064 	max_sa = adev->gfx.config.max_shader_engines *
2065 		 adev->gfx.config.max_sh_per_se;
2066 	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
2067 				 adev->gfx.config.max_sh_per_se;
2068 	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
2069 
2070 	for (i = 0; i < max_sa; i++) {
2071 		if (active_sa_bitmap & (1 << i))
2072 			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
2073 	}
2074 
2075 	active_rb_bitmap &= global_active_rb_bitmap;
2076 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
2077 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
2078 }
2079 
2080 #define DEFAULT_SH_MEM_BASES	(0x6000)
2081 #define LDS_APP_BASE           0x1
2082 #define SCRATCH_APP_BASE       0x2
2083 
2084 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
2085 {
2086 	int i;
2087 	uint32_t sh_mem_bases;
2088 	uint32_t data;
2089 
2090 	/*
2091 	 * Configure apertures:
2092 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2093 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2094 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2095 	 */
2096 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
2097 			SCRATCH_APP_BASE;
2098 
2099 	mutex_lock(&adev->srbm_mutex);
2100 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2101 		soc21_grbm_select(adev, 0, 0, 0, i);
2102 		/* CP and shaders */
2103 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2104 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
2105 
2106 		/* Enable trap for each kfd vmid. */
2107 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
2108 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
2109 		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
2110 	}
2111 	soc21_grbm_select(adev, 0, 0, 0, 0);
2112 	mutex_unlock(&adev->srbm_mutex);
2113 
2114 	/*
2115 	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
2116 	 * access. These should be enabled by FW for target VMIDs.
2117 	 */
2118 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2119 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
2120 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
2121 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
2122 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
2123 	}
2124 }
2125 
2126 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
2127 {
2128 	int vmid;
2129 
2130 	/*
2131 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2132 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2133 	 * the driver can enable them for graphics. VMID0 should maintain
2134 	 * access so that HWS firmware can save/restore entries.
2135 	 */
2136 	for (vmid = 1; vmid < 16; vmid++) {
2137 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
2138 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
2139 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
2140 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
2141 	}
2142 }
2143 
2144 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
2145 {
2146 	/* TODO: harvest feature to be added later. */
2147 }
2148 
2149 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
2150 {
2151 	/* TCCs are global (not instanced). */
2152 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
2153 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
2154 
2155 	adev->gfx.config.tcc_disabled_mask =
2156 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
2157 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
2158 }
2159 
2160 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
2161 {
2162 	u32 tmp;
2163 	int i;
2164 
2165 	if (!amdgpu_sriov_vf(adev))
2166 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2167 
2168 	gfx_v11_0_setup_rb(adev);
2169 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
2170 	gfx_v11_0_get_tcc_info(adev);
2171 	adev->gfx.config.pa_sc_tile_steering_override = 0;
2172 
2173 	/* Set whether texture coordinate truncation is conformant. */
2174 	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
2175 	adev->gfx.config.ta_cntl2_truncate_coord_mode =
2176 		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
2177 
2178 	/* XXX SH_MEM regs */
2179 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2180 	mutex_lock(&adev->srbm_mutex);
2181 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2182 		soc21_grbm_select(adev, 0, 0, 0, i);
2183 		/* CP and shaders */
2184 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2185 		if (i != 0) {
2186 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2187 				(adev->gmc.private_aperture_start >> 48));
2188 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2189 				(adev->gmc.shared_aperture_start >> 48));
2190 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
2191 		}
2192 	}
2193 	soc21_grbm_select(adev, 0, 0, 0, 0);
2194 
2195 	mutex_unlock(&adev->srbm_mutex);
2196 
2197 	gfx_v11_0_init_compute_vmid(adev);
2198 	gfx_v11_0_init_gds_vmid(adev);
2199 }
2200 
2201 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
2202 				      int me, int pipe)
2203 {
2204 	if (me != 0)
2205 		return 0;
2206 
2207 	switch (pipe) {
2208 	case 0:
2209 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
2210 	case 1:
2211 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
2212 	default:
2213 		return 0;
2214 	}
2215 }
2216 
2217 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
2218 				      int me, int pipe)
2219 {
2220 	/*
2221 	 * amdgpu controls only the first MEC. That's why this function only
2222 	 * handles the setting of interrupts for this specific MEC. All other
2223 	 * pipes' interrupts are set by amdkfd.
2224 	 */
2225 	if (me != 1)
2226 		return 0;
2227 
2228 	switch (pipe) {
2229 	case 0:
2230 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
2231 	case 1:
2232 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
2233 	case 2:
2234 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
2235 	case 3:
2236 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
2237 	default:
2238 		return 0;
2239 	}
2240 }
2241 
2242 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2243 					       bool enable)
2244 {
2245 	u32 tmp, cp_int_cntl_reg;
2246 	int i, j;
2247 
2248 	if (amdgpu_sriov_vf(adev))
2249 		return;
2250 
2251 	for (i = 0; i < adev->gfx.me.num_me; i++) {
2252 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
2253 			cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
2254 
2255 			if (cp_int_cntl_reg) {
2256 				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
2257 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
2258 						    enable ? 1 : 0);
2259 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
2260 						    enable ? 1 : 0);
2261 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
2262 						    enable ? 1 : 0);
2263 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
2264 						    enable ? 1 : 0);
2265 				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
2266 			}
2267 		}
2268 	}
2269 }
2270 
2271 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
2272 {
2273 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2274 
2275 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
2276 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2277 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
2278 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2279 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
2280 
2281 	return 0;
2282 }
2283 
2284 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
2285 {
2286 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
2287 
2288 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2289 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
2290 }
2291 
2292 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
2293 {
2294 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2295 	udelay(50);
2296 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2297 	udelay(50);
2298 }
2299 
2300 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
2301 					     bool enable)
2302 {
2303 	uint32_t rlc_pg_cntl;
2304 
2305 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
2306 
2307 	if (!enable) {
2308 		/* RLC_PG_CNTL[23] = 0 (default)
2309 		 * RLC will wait for handshake acks with SMU
2310 		 * GFXOFF will be enabled
2311 		 * RLC_PG_CNTL[23] = 1
2312 		 * RLC will not issue any message to SMU
2313 		 * hence no handshake between SMU & RLC
2314 		 * GFXOFF will be disabled
2315 		 */
2316 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2317 	} else
2318 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2319 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
2320 }
2321 
2322 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
2323 {
2324 	/* TODO: enable rlc & smu handshake until smu
2325 	 * and gfxoff feature works as expected */
2326 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
2327 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
2328 
2329 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2330 	udelay(50);
2331 }
2332 
2333 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
2334 {
2335 	uint32_t tmp;
2336 
2337 	/* enable Save Restore Machine */
2338 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
2339 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2340 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
2341 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
2342 }
2343 
2344 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
2345 {
2346 	const struct rlc_firmware_header_v2_0 *hdr;
2347 	const __le32 *fw_data;
2348 	unsigned i, fw_size;
2349 
2350 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2351 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2352 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2353 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2354 
2355 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
2356 		     RLCG_UCODE_LOADING_START_ADDRESS);
2357 
2358 	for (i = 0; i < fw_size; i++)
2359 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
2360 			     le32_to_cpup(fw_data++));
2361 
2362 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2363 }
2364 
2365 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
2366 {
2367 	const struct rlc_firmware_header_v2_2 *hdr;
2368 	const __le32 *fw_data;
2369 	unsigned i, fw_size;
2370 	u32 tmp;
2371 
2372 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
2373 
2374 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2375 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
2376 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
2377 
2378 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
2379 
2380 	for (i = 0; i < fw_size; i++) {
2381 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2382 			msleep(1);
2383 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
2384 				le32_to_cpup(fw_data++));
2385 	}
2386 
2387 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2388 
2389 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2390 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
2391 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
2392 
2393 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
2394 	for (i = 0; i < fw_size; i++) {
2395 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2396 			msleep(1);
2397 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
2398 				le32_to_cpup(fw_data++));
2399 	}
2400 
2401 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2402 
2403 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
2404 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
2405 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
2406 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
2407 }
2408 
2409 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
2410 {
2411 	const struct rlc_firmware_header_v2_3 *hdr;
2412 	const __le32 *fw_data;
2413 	unsigned i, fw_size;
2414 	u32 tmp;
2415 
2416 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
2417 
2418 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2419 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
2420 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
2421 
2422 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
2423 
2424 	for (i = 0; i < fw_size; i++) {
2425 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2426 			msleep(1);
2427 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
2428 				le32_to_cpup(fw_data++));
2429 	}
2430 
2431 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2432 
2433 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2434 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2435 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2436 
2437 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2438 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2439 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2440 
2441 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2442 
2443 	for (i = 0; i < fw_size; i++) {
2444 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2445 			msleep(1);
2446 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2447 				le32_to_cpup(fw_data++));
2448 	}
2449 
2450 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2451 
2452 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2453 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2454 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2455 }
2456 
2457 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2458 {
2459 	const struct rlc_firmware_header_v2_0 *hdr;
2460 	uint16_t version_major;
2461 	uint16_t version_minor;
2462 
2463 	if (!adev->gfx.rlc_fw)
2464 		return -EINVAL;
2465 
2466 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2467 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2468 
2469 	version_major = le16_to_cpu(hdr->header.header_version_major);
2470 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2471 
2472 	if (version_major == 2) {
2473 		gfx_v11_0_load_rlcg_microcode(adev);
2474 		if (amdgpu_dpm == 1) {
2475 			if (version_minor >= 2)
2476 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2477 			if (version_minor == 3)
2478 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2479 		}
2480 
2481 		return 0;
2482 	}
2483 
2484 	return -EINVAL;
2485 }
2486 
2487 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2488 {
2489 	int r;
2490 
2491 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2492 		gfx_v11_0_init_csb(adev);
2493 
2494 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2495 			gfx_v11_0_rlc_enable_srm(adev);
2496 	} else {
2497 		if (amdgpu_sriov_vf(adev)) {
2498 			gfx_v11_0_init_csb(adev);
2499 			return 0;
2500 		}
2501 
2502 		adev->gfx.rlc.funcs->stop(adev);
2503 
2504 		/* disable CG */
2505 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2506 
2507 		/* disable PG */
2508 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2509 
2510 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2511 			/* legacy rlc firmware loading */
2512 			r = gfx_v11_0_rlc_load_microcode(adev);
2513 			if (r)
2514 				return r;
2515 		}
2516 
2517 		gfx_v11_0_init_csb(adev);
2518 
2519 		adev->gfx.rlc.funcs->start(adev);
2520 	}
2521 	return 0;
2522 }
2523 
2524 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2525 {
2526 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2527 	uint32_t tmp;
2528 	int i;
2529 
2530 	/* Trigger an invalidation of the L1 instruction caches */
2531 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2532 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2533 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2534 
2535 	/* Wait for invalidation complete */
2536 	for (i = 0; i < usec_timeout; i++) {
2537 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2538 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2539 					INVALIDATE_CACHE_COMPLETE))
2540 			break;
2541 		udelay(1);
2542 	}
2543 
2544 	if (i >= usec_timeout) {
2545 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2546 		return -EINVAL;
2547 	}
2548 
2549 	if (amdgpu_emu_mode == 1)
2550 		amdgpu_device_flush_hdp(adev, NULL);
2551 
2552 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2553 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2554 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2555 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2556 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2557 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2558 
2559 	/* Program me ucode address into intruction cache address register */
2560 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2561 			lower_32_bits(addr) & 0xFFFFF000);
2562 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2563 			upper_32_bits(addr));
2564 
2565 	return 0;
2566 }
2567 
2568 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2569 {
2570 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2571 	uint32_t tmp;
2572 	int i;
2573 
2574 	/* Trigger an invalidation of the L1 instruction caches */
2575 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2576 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2577 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2578 
2579 	/* Wait for invalidation complete */
2580 	for (i = 0; i < usec_timeout; i++) {
2581 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2582 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2583 					INVALIDATE_CACHE_COMPLETE))
2584 			break;
2585 		udelay(1);
2586 	}
2587 
2588 	if (i >= usec_timeout) {
2589 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2590 		return -EINVAL;
2591 	}
2592 
2593 	if (amdgpu_emu_mode == 1)
2594 		amdgpu_device_flush_hdp(adev, NULL);
2595 
2596 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2597 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2598 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2599 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2600 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2601 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2602 
2603 	/* Program pfp ucode address into intruction cache address register */
2604 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2605 			lower_32_bits(addr) & 0xFFFFF000);
2606 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2607 			upper_32_bits(addr));
2608 
2609 	return 0;
2610 }
2611 
2612 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2613 {
2614 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2615 	uint32_t tmp;
2616 	int i;
2617 
2618 	/* Trigger an invalidation of the L1 instruction caches */
2619 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2620 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2621 
2622 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2623 
2624 	/* Wait for invalidation complete */
2625 	for (i = 0; i < usec_timeout; i++) {
2626 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2627 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2628 					INVALIDATE_CACHE_COMPLETE))
2629 			break;
2630 		udelay(1);
2631 	}
2632 
2633 	if (i >= usec_timeout) {
2634 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2635 		return -EINVAL;
2636 	}
2637 
2638 	if (amdgpu_emu_mode == 1)
2639 		amdgpu_device_flush_hdp(adev, NULL);
2640 
2641 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2642 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2643 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2644 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2645 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2646 
2647 	/* Program mec1 ucode address into intruction cache address register */
2648 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2649 			lower_32_bits(addr) & 0xFFFFF000);
2650 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2651 			upper_32_bits(addr));
2652 
2653 	return 0;
2654 }
2655 
2656 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2657 {
2658 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2659 	uint32_t tmp;
2660 	unsigned i, pipe_id;
2661 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2662 
2663 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2664 		adev->gfx.pfp_fw->data;
2665 
2666 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2667 		lower_32_bits(addr));
2668 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2669 		upper_32_bits(addr));
2670 
2671 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2672 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2673 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2674 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2675 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2676 
2677 	/*
2678 	 * Programming any of the CP_PFP_IC_BASE registers
2679 	 * forces invalidation of the ME L1 I$. Wait for the
2680 	 * invalidation complete
2681 	 */
2682 	for (i = 0; i < usec_timeout; i++) {
2683 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2684 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2685 			INVALIDATE_CACHE_COMPLETE))
2686 			break;
2687 		udelay(1);
2688 	}
2689 
2690 	if (i >= usec_timeout) {
2691 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2692 		return -EINVAL;
2693 	}
2694 
2695 	/* Prime the L1 instruction caches */
2696 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2697 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2698 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2699 	/* Waiting for cache primed*/
2700 	for (i = 0; i < usec_timeout; i++) {
2701 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2702 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2703 			ICACHE_PRIMED))
2704 			break;
2705 		udelay(1);
2706 	}
2707 
2708 	if (i >= usec_timeout) {
2709 		dev_err(adev->dev, "failed to prime instruction cache\n");
2710 		return -EINVAL;
2711 	}
2712 
2713 	mutex_lock(&adev->srbm_mutex);
2714 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2715 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2716 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2717 			(pfp_hdr->ucode_start_addr_hi << 30) |
2718 			(pfp_hdr->ucode_start_addr_lo >> 2));
2719 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2720 			pfp_hdr->ucode_start_addr_hi >> 2);
2721 
2722 		/*
2723 		 * Program CP_ME_CNTL to reset given PIPE to take
2724 		 * effect of CP_PFP_PRGRM_CNTR_START.
2725 		 */
2726 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2727 		if (pipe_id == 0)
2728 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2729 					PFP_PIPE0_RESET, 1);
2730 		else
2731 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2732 					PFP_PIPE1_RESET, 1);
2733 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2734 
2735 		/* Clear pfp pipe0 reset bit. */
2736 		if (pipe_id == 0)
2737 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2738 					PFP_PIPE0_RESET, 0);
2739 		else
2740 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2741 					PFP_PIPE1_RESET, 0);
2742 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2743 
2744 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2745 			lower_32_bits(addr2));
2746 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2747 			upper_32_bits(addr2));
2748 	}
2749 	soc21_grbm_select(adev, 0, 0, 0, 0);
2750 	mutex_unlock(&adev->srbm_mutex);
2751 
2752 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2753 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2754 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2755 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2756 
2757 	/* Invalidate the data caches */
2758 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2759 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2760 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2761 
2762 	for (i = 0; i < usec_timeout; i++) {
2763 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2764 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2765 			INVALIDATE_DCACHE_COMPLETE))
2766 			break;
2767 		udelay(1);
2768 	}
2769 
2770 	if (i >= usec_timeout) {
2771 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2772 		return -EINVAL;
2773 	}
2774 
2775 	return 0;
2776 }
2777 
2778 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2779 {
2780 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2781 	uint32_t tmp;
2782 	unsigned i, pipe_id;
2783 	const struct gfx_firmware_header_v2_0 *me_hdr;
2784 
2785 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2786 		adev->gfx.me_fw->data;
2787 
2788 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2789 		lower_32_bits(addr));
2790 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2791 		upper_32_bits(addr));
2792 
2793 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2794 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2795 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2796 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2797 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2798 
2799 	/*
2800 	 * Programming any of the CP_ME_IC_BASE registers
2801 	 * forces invalidation of the ME L1 I$. Wait for the
2802 	 * invalidation complete
2803 	 */
2804 	for (i = 0; i < usec_timeout; i++) {
2805 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2806 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2807 			INVALIDATE_CACHE_COMPLETE))
2808 			break;
2809 		udelay(1);
2810 	}
2811 
2812 	if (i >= usec_timeout) {
2813 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2814 		return -EINVAL;
2815 	}
2816 
2817 	/* Prime the instruction caches */
2818 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2819 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2820 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2821 
2822 	/* Waiting for instruction cache primed*/
2823 	for (i = 0; i < usec_timeout; i++) {
2824 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2825 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2826 			ICACHE_PRIMED))
2827 			break;
2828 		udelay(1);
2829 	}
2830 
2831 	if (i >= usec_timeout) {
2832 		dev_err(adev->dev, "failed to prime instruction cache\n");
2833 		return -EINVAL;
2834 	}
2835 
2836 	mutex_lock(&adev->srbm_mutex);
2837 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2838 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2839 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2840 			(me_hdr->ucode_start_addr_hi << 30) |
2841 			(me_hdr->ucode_start_addr_lo >> 2) );
2842 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2843 			me_hdr->ucode_start_addr_hi>>2);
2844 
2845 		/*
2846 		 * Program CP_ME_CNTL to reset given PIPE to take
2847 		 * effect of CP_PFP_PRGRM_CNTR_START.
2848 		 */
2849 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2850 		if (pipe_id == 0)
2851 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2852 					ME_PIPE0_RESET, 1);
2853 		else
2854 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2855 					ME_PIPE1_RESET, 1);
2856 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2857 
2858 		/* Clear pfp pipe0 reset bit. */
2859 		if (pipe_id == 0)
2860 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2861 					ME_PIPE0_RESET, 0);
2862 		else
2863 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2864 					ME_PIPE1_RESET, 0);
2865 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2866 
2867 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2868 			lower_32_bits(addr2));
2869 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2870 			upper_32_bits(addr2));
2871 	}
2872 	soc21_grbm_select(adev, 0, 0, 0, 0);
2873 	mutex_unlock(&adev->srbm_mutex);
2874 
2875 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2876 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2877 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2878 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2879 
2880 	/* Invalidate the data caches */
2881 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2882 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2883 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2884 
2885 	for (i = 0; i < usec_timeout; i++) {
2886 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2887 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2888 			INVALIDATE_DCACHE_COMPLETE))
2889 			break;
2890 		udelay(1);
2891 	}
2892 
2893 	if (i >= usec_timeout) {
2894 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2895 		return -EINVAL;
2896 	}
2897 
2898 	return 0;
2899 }
2900 
2901 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2902 {
2903 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2904 	uint32_t tmp;
2905 	unsigned i;
2906 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2907 
2908 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2909 		adev->gfx.mec_fw->data;
2910 
2911 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2912 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2913 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2914 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2915 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2916 
2917 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2918 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2919 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2920 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2921 
2922 	mutex_lock(&adev->srbm_mutex);
2923 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2924 		soc21_grbm_select(adev, 1, i, 0, 0);
2925 
2926 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2927 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2928 		     upper_32_bits(addr2));
2929 
2930 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2931 					mec_hdr->ucode_start_addr_lo >> 2 |
2932 					mec_hdr->ucode_start_addr_hi << 30);
2933 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2934 					mec_hdr->ucode_start_addr_hi >> 2);
2935 
2936 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2937 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2938 		     upper_32_bits(addr));
2939 	}
2940 	mutex_unlock(&adev->srbm_mutex);
2941 	soc21_grbm_select(adev, 0, 0, 0, 0);
2942 
2943 	/* Trigger an invalidation of the L1 instruction caches */
2944 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2945 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2946 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2947 
2948 	/* Wait for invalidation complete */
2949 	for (i = 0; i < usec_timeout; i++) {
2950 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2951 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2952 				       INVALIDATE_DCACHE_COMPLETE))
2953 			break;
2954 		udelay(1);
2955 	}
2956 
2957 	if (i >= usec_timeout) {
2958 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2959 		return -EINVAL;
2960 	}
2961 
2962 	/* Trigger an invalidation of the L1 instruction caches */
2963 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2964 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2965 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2966 
2967 	/* Wait for invalidation complete */
2968 	for (i = 0; i < usec_timeout; i++) {
2969 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2970 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2971 				       INVALIDATE_CACHE_COMPLETE))
2972 			break;
2973 		udelay(1);
2974 	}
2975 
2976 	if (i >= usec_timeout) {
2977 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2978 		return -EINVAL;
2979 	}
2980 
2981 	return 0;
2982 }
2983 
2984 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2985 {
2986 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2987 	const struct gfx_firmware_header_v2_0 *me_hdr;
2988 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2989 	uint32_t pipe_id, tmp;
2990 
2991 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2992 		adev->gfx.mec_fw->data;
2993 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2994 		adev->gfx.me_fw->data;
2995 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2996 		adev->gfx.pfp_fw->data;
2997 
2998 	/* config pfp program start addr */
2999 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
3000 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3001 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3002 			(pfp_hdr->ucode_start_addr_hi << 30) |
3003 			(pfp_hdr->ucode_start_addr_lo >> 2));
3004 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3005 			pfp_hdr->ucode_start_addr_hi >> 2);
3006 	}
3007 	soc21_grbm_select(adev, 0, 0, 0, 0);
3008 
3009 	/* reset pfp pipe */
3010 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3011 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
3012 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
3013 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3014 
3015 	/* clear pfp pipe reset */
3016 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
3017 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
3018 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3019 
3020 	/* config me program start addr */
3021 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
3022 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3023 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3024 			(me_hdr->ucode_start_addr_hi << 30) |
3025 			(me_hdr->ucode_start_addr_lo >> 2) );
3026 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3027 			me_hdr->ucode_start_addr_hi>>2);
3028 	}
3029 	soc21_grbm_select(adev, 0, 0, 0, 0);
3030 
3031 	/* reset me pipe */
3032 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3033 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
3034 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
3035 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3036 
3037 	/* clear me pipe reset */
3038 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
3039 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
3040 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3041 
3042 	/* config mec program start addr */
3043 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
3044 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
3045 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3046 					mec_hdr->ucode_start_addr_lo >> 2 |
3047 					mec_hdr->ucode_start_addr_hi << 30);
3048 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3049 					mec_hdr->ucode_start_addr_hi >> 2);
3050 	}
3051 	soc21_grbm_select(adev, 0, 0, 0, 0);
3052 
3053 	/* reset mec pipe */
3054 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3055 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
3056 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
3057 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
3058 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
3059 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
3060 
3061 	/* clear mec pipe reset */
3062 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
3063 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
3064 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
3065 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
3066 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
3067 }
3068 
3069 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
3070 {
3071 	uint32_t cp_status;
3072 	uint32_t bootload_status;
3073 	int i, r;
3074 	uint64_t addr, addr2;
3075 
3076 	for (i = 0; i < adev->usec_timeout; i++) {
3077 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
3078 
3079 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3080 			    IP_VERSION(11, 0, 1) ||
3081 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
3082 			    IP_VERSION(11, 0, 4) ||
3083 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
3084 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
3085 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
3086 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) ||
3087 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4) ||
3088 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 6))
3089 			bootload_status = RREG32_SOC15(GC, 0,
3090 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
3091 		else
3092 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
3093 
3094 		if ((cp_status == 0) &&
3095 		    (REG_GET_FIELD(bootload_status,
3096 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
3097 			break;
3098 		}
3099 		udelay(1);
3100 	}
3101 
3102 	if (i >= adev->usec_timeout) {
3103 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
3104 		return -ETIMEDOUT;
3105 	}
3106 
3107 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
3108 		if (adev->gfx.rs64_enable) {
3109 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3110 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
3111 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3112 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
3113 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
3114 			if (r)
3115 				return r;
3116 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3117 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
3118 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3119 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
3120 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
3121 			if (r)
3122 				return r;
3123 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3124 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
3125 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3126 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
3127 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
3128 			if (r)
3129 				return r;
3130 		} else {
3131 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3132 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
3133 			r = gfx_v11_0_config_me_cache(adev, addr);
3134 			if (r)
3135 				return r;
3136 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3137 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
3138 			r = gfx_v11_0_config_pfp_cache(adev, addr);
3139 			if (r)
3140 				return r;
3141 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3142 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
3143 			r = gfx_v11_0_config_mec_cache(adev, addr);
3144 			if (r)
3145 				return r;
3146 		}
3147 	}
3148 
3149 	return 0;
3150 }
3151 
3152 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3153 {
3154 	int i;
3155 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3156 
3157 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3158 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3159 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3160 
3161 	for (i = 0; i < adev->usec_timeout; i++) {
3162 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
3163 			break;
3164 		udelay(1);
3165 	}
3166 
3167 	if (i >= adev->usec_timeout)
3168 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
3169 
3170 	return 0;
3171 }
3172 
3173 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
3174 {
3175 	int r;
3176 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3177 	const __le32 *fw_data;
3178 	unsigned i, fw_size;
3179 
3180 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3181 		adev->gfx.pfp_fw->data;
3182 
3183 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3184 
3185 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3186 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3187 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
3188 
3189 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
3190 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3191 				      &adev->gfx.pfp.pfp_fw_obj,
3192 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3193 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3194 	if (r) {
3195 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
3196 		gfx_v11_0_pfp_fini(adev);
3197 		return r;
3198 	}
3199 
3200 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
3201 
3202 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3203 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3204 
3205 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
3206 
3207 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
3208 
3209 	for (i = 0; i < pfp_hdr->jt_size; i++)
3210 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
3211 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
3212 
3213 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3214 
3215 	return 0;
3216 }
3217 
3218 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
3219 {
3220 	int r;
3221 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
3222 	const __le32 *fw_ucode, *fw_data;
3223 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3224 	uint32_t tmp;
3225 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3226 
3227 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
3228 		adev->gfx.pfp_fw->data;
3229 
3230 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3231 
3232 	/* instruction */
3233 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
3234 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
3235 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
3236 	/* data */
3237 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3238 		le32_to_cpu(pfp_hdr->data_offset_bytes));
3239 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
3240 
3241 	/* 64kb align */
3242 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3243 				      64 * 1024,
3244 				      AMDGPU_GEM_DOMAIN_VRAM |
3245 				      AMDGPU_GEM_DOMAIN_GTT,
3246 				      &adev->gfx.pfp.pfp_fw_obj,
3247 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3248 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3249 	if (r) {
3250 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
3251 		gfx_v11_0_pfp_fini(adev);
3252 		return r;
3253 	}
3254 
3255 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3256 				      64 * 1024,
3257 				      AMDGPU_GEM_DOMAIN_VRAM |
3258 				      AMDGPU_GEM_DOMAIN_GTT,
3259 				      &adev->gfx.pfp.pfp_fw_data_obj,
3260 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
3261 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
3262 	if (r) {
3263 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
3264 		gfx_v11_0_pfp_fini(adev);
3265 		return r;
3266 	}
3267 
3268 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
3269 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
3270 
3271 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3272 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
3273 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3274 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
3275 
3276 	if (amdgpu_emu_mode == 1)
3277 		amdgpu_device_flush_hdp(adev, NULL);
3278 
3279 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
3280 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3281 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
3282 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3283 
3284 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
3285 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
3286 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
3287 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
3288 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
3289 
3290 	/*
3291 	 * Programming any of the CP_PFP_IC_BASE registers
3292 	 * forces invalidation of the ME L1 I$. Wait for the
3293 	 * invalidation complete
3294 	 */
3295 	for (i = 0; i < usec_timeout; i++) {
3296 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3297 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3298 			INVALIDATE_CACHE_COMPLETE))
3299 			break;
3300 		udelay(1);
3301 	}
3302 
3303 	if (i >= usec_timeout) {
3304 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3305 		return -EINVAL;
3306 	}
3307 
3308 	/* Prime the L1 instruction caches */
3309 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3310 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
3311 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
3312 	/* Waiting for cache primed*/
3313 	for (i = 0; i < usec_timeout; i++) {
3314 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3315 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3316 			ICACHE_PRIMED))
3317 			break;
3318 		udelay(1);
3319 	}
3320 
3321 	if (i >= usec_timeout) {
3322 		dev_err(adev->dev, "failed to prime instruction cache\n");
3323 		return -EINVAL;
3324 	}
3325 
3326 	mutex_lock(&adev->srbm_mutex);
3327 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3328 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3329 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3330 			(pfp_hdr->ucode_start_addr_hi << 30) |
3331 			(pfp_hdr->ucode_start_addr_lo >> 2) );
3332 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3333 			pfp_hdr->ucode_start_addr_hi>>2);
3334 
3335 		/*
3336 		 * Program CP_ME_CNTL to reset given PIPE to take
3337 		 * effect of CP_PFP_PRGRM_CNTR_START.
3338 		 */
3339 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3340 		if (pipe_id == 0)
3341 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3342 					PFP_PIPE0_RESET, 1);
3343 		else
3344 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3345 					PFP_PIPE1_RESET, 1);
3346 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3347 
3348 		/* Clear pfp pipe0 reset bit. */
3349 		if (pipe_id == 0)
3350 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3351 					PFP_PIPE0_RESET, 0);
3352 		else
3353 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3354 					PFP_PIPE1_RESET, 0);
3355 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3356 
3357 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
3358 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3359 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
3360 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3361 	}
3362 	soc21_grbm_select(adev, 0, 0, 0, 0);
3363 	mutex_unlock(&adev->srbm_mutex);
3364 
3365 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3366 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3367 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3368 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3369 
3370 	/* Invalidate the data caches */
3371 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3372 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3373 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3374 
3375 	for (i = 0; i < usec_timeout; i++) {
3376 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3377 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3378 			INVALIDATE_DCACHE_COMPLETE))
3379 			break;
3380 		udelay(1);
3381 	}
3382 
3383 	if (i >= usec_timeout) {
3384 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3385 		return -EINVAL;
3386 	}
3387 
3388 	return 0;
3389 }
3390 
3391 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
3392 {
3393 	int r;
3394 	const struct gfx_firmware_header_v1_0 *me_hdr;
3395 	const __le32 *fw_data;
3396 	unsigned i, fw_size;
3397 
3398 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3399 		adev->gfx.me_fw->data;
3400 
3401 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3402 
3403 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3404 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3405 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
3406 
3407 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
3408 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3409 				      &adev->gfx.me.me_fw_obj,
3410 				      &adev->gfx.me.me_fw_gpu_addr,
3411 				      (void **)&adev->gfx.me.me_fw_ptr);
3412 	if (r) {
3413 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
3414 		gfx_v11_0_me_fini(adev);
3415 		return r;
3416 	}
3417 
3418 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
3419 
3420 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3421 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3422 
3423 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
3424 
3425 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
3426 
3427 	for (i = 0; i < me_hdr->jt_size; i++)
3428 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
3429 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
3430 
3431 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
3432 
3433 	return 0;
3434 }
3435 
3436 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
3437 {
3438 	int r;
3439 	const struct gfx_firmware_header_v2_0 *me_hdr;
3440 	const __le32 *fw_ucode, *fw_data;
3441 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3442 	uint32_t tmp;
3443 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3444 
3445 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
3446 		adev->gfx.me_fw->data;
3447 
3448 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3449 
3450 	/* instruction */
3451 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
3452 		le32_to_cpu(me_hdr->ucode_offset_bytes));
3453 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
3454 	/* data */
3455 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3456 		le32_to_cpu(me_hdr->data_offset_bytes));
3457 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
3458 
3459 	/* 64kb align*/
3460 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3461 				      64 * 1024,
3462 				      AMDGPU_GEM_DOMAIN_VRAM |
3463 				      AMDGPU_GEM_DOMAIN_GTT,
3464 				      &adev->gfx.me.me_fw_obj,
3465 				      &adev->gfx.me.me_fw_gpu_addr,
3466 				      (void **)&adev->gfx.me.me_fw_ptr);
3467 	if (r) {
3468 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3469 		gfx_v11_0_me_fini(adev);
3470 		return r;
3471 	}
3472 
3473 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3474 				      64 * 1024,
3475 				      AMDGPU_GEM_DOMAIN_VRAM |
3476 				      AMDGPU_GEM_DOMAIN_GTT,
3477 				      &adev->gfx.me.me_fw_data_obj,
3478 				      &adev->gfx.me.me_fw_data_gpu_addr,
3479 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3480 	if (r) {
3481 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3482 		gfx_v11_0_pfp_fini(adev);
3483 		return r;
3484 	}
3485 
3486 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3487 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3488 
3489 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3490 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3491 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3492 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3493 
3494 	if (amdgpu_emu_mode == 1)
3495 		amdgpu_device_flush_hdp(adev, NULL);
3496 
3497 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3498 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3499 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3500 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3501 
3502 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3503 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3504 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3505 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3506 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3507 
3508 	/*
3509 	 * Programming any of the CP_ME_IC_BASE registers
3510 	 * forces invalidation of the ME L1 I$. Wait for the
3511 	 * invalidation complete
3512 	 */
3513 	for (i = 0; i < usec_timeout; i++) {
3514 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3515 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3516 			INVALIDATE_CACHE_COMPLETE))
3517 			break;
3518 		udelay(1);
3519 	}
3520 
3521 	if (i >= usec_timeout) {
3522 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3523 		return -EINVAL;
3524 	}
3525 
3526 	/* Prime the instruction caches */
3527 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3528 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3529 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3530 
3531 	/* Waiting for instruction cache primed*/
3532 	for (i = 0; i < usec_timeout; i++) {
3533 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3534 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3535 			ICACHE_PRIMED))
3536 			break;
3537 		udelay(1);
3538 	}
3539 
3540 	if (i >= usec_timeout) {
3541 		dev_err(adev->dev, "failed to prime instruction cache\n");
3542 		return -EINVAL;
3543 	}
3544 
3545 	mutex_lock(&adev->srbm_mutex);
3546 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3547 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3548 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3549 			(me_hdr->ucode_start_addr_hi << 30) |
3550 			(me_hdr->ucode_start_addr_lo >> 2) );
3551 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3552 			me_hdr->ucode_start_addr_hi>>2);
3553 
3554 		/*
3555 		 * Program CP_ME_CNTL to reset given PIPE to take
3556 		 * effect of CP_PFP_PRGRM_CNTR_START.
3557 		 */
3558 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3559 		if (pipe_id == 0)
3560 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3561 					ME_PIPE0_RESET, 1);
3562 		else
3563 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3564 					ME_PIPE1_RESET, 1);
3565 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3566 
3567 		/* Clear pfp pipe0 reset bit. */
3568 		if (pipe_id == 0)
3569 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3570 					ME_PIPE0_RESET, 0);
3571 		else
3572 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3573 					ME_PIPE1_RESET, 0);
3574 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3575 
3576 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3577 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3578 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3579 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3580 	}
3581 	soc21_grbm_select(adev, 0, 0, 0, 0);
3582 	mutex_unlock(&adev->srbm_mutex);
3583 
3584 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3585 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3586 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3587 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3588 
3589 	/* Invalidate the data caches */
3590 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3591 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3592 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3593 
3594 	for (i = 0; i < usec_timeout; i++) {
3595 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3596 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3597 			INVALIDATE_DCACHE_COMPLETE))
3598 			break;
3599 		udelay(1);
3600 	}
3601 
3602 	if (i >= usec_timeout) {
3603 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3604 		return -EINVAL;
3605 	}
3606 
3607 	return 0;
3608 }
3609 
3610 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3611 {
3612 	int r;
3613 
3614 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3615 		return -EINVAL;
3616 
3617 	gfx_v11_0_cp_gfx_enable(adev, false);
3618 
3619 	if (adev->gfx.rs64_enable)
3620 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3621 	else
3622 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3623 	if (r) {
3624 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3625 		return r;
3626 	}
3627 
3628 	if (adev->gfx.rs64_enable)
3629 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3630 	else
3631 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3632 	if (r) {
3633 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3634 		return r;
3635 	}
3636 
3637 	return 0;
3638 }
3639 
3640 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3641 {
3642 	struct amdgpu_ring *ring;
3643 	const struct cs_section_def *sect = NULL;
3644 	const struct cs_extent_def *ext = NULL;
3645 	int r, i;
3646 	int ctx_reg_offset;
3647 
3648 	/* init the CP */
3649 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3650 		     adev->gfx.config.max_hw_contexts - 1);
3651 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3652 
3653 	if (!amdgpu_async_gfx_ring)
3654 		gfx_v11_0_cp_gfx_enable(adev, true);
3655 
3656 	ring = &adev->gfx.gfx_ring[0];
3657 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3658 	if (r) {
3659 		drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r);
3660 		return r;
3661 	}
3662 
3663 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3664 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3665 
3666 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3667 	amdgpu_ring_write(ring, 0x80000000);
3668 	amdgpu_ring_write(ring, 0x80000000);
3669 
3670 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3671 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3672 			if (sect->id == SECT_CONTEXT) {
3673 				amdgpu_ring_write(ring,
3674 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3675 							  ext->reg_count));
3676 				amdgpu_ring_write(ring, ext->reg_index -
3677 						  PACKET3_SET_CONTEXT_REG_START);
3678 				for (i = 0; i < ext->reg_count; i++)
3679 					amdgpu_ring_write(ring, ext->extent[i]);
3680 			}
3681 		}
3682 	}
3683 
3684 	ctx_reg_offset =
3685 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3686 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3687 	amdgpu_ring_write(ring, ctx_reg_offset);
3688 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3689 
3690 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3691 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3692 
3693 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3694 	amdgpu_ring_write(ring, 0);
3695 
3696 	amdgpu_ring_commit(ring);
3697 
3698 	/* submit cs packet to copy state 0 to next available state */
3699 	if (adev->gfx.num_gfx_rings > 1) {
3700 		/* maximum supported gfx ring is 2 */
3701 		ring = &adev->gfx.gfx_ring[1];
3702 		r = amdgpu_ring_alloc(ring, 2);
3703 		if (r) {
3704 			drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
3705 			return r;
3706 		}
3707 
3708 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3709 		amdgpu_ring_write(ring, 0);
3710 
3711 		amdgpu_ring_commit(ring);
3712 	}
3713 	return 0;
3714 }
3715 
3716 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3717 					 CP_PIPE_ID pipe)
3718 {
3719 	u32 tmp;
3720 
3721 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3722 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3723 
3724 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3725 }
3726 
3727 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3728 					  struct amdgpu_ring *ring)
3729 {
3730 	u32 tmp;
3731 
3732 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3733 	if (ring->use_doorbell) {
3734 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3735 				    DOORBELL_OFFSET, ring->doorbell_index);
3736 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3737 				    DOORBELL_EN, 1);
3738 	} else {
3739 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3740 				    DOORBELL_EN, 0);
3741 	}
3742 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3743 
3744 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3745 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3746 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3747 
3748 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3749 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3750 }
3751 
3752 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3753 {
3754 	struct amdgpu_ring *ring;
3755 	u32 tmp;
3756 	u32 rb_bufsz;
3757 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3758 
3759 	/* Set the write pointer delay */
3760 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3761 
3762 	/* set the RB to use vmid 0 */
3763 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3764 
3765 	/* Init gfx ring 0 for pipe 0 */
3766 	mutex_lock(&adev->srbm_mutex);
3767 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3768 
3769 	/* Set ring buffer size */
3770 	ring = &adev->gfx.gfx_ring[0];
3771 	rb_bufsz = order_base_2(ring->ring_size / 8);
3772 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3773 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3774 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3775 
3776 	/* Initialize the ring buffer's write pointers */
3777 	ring->wptr = 0;
3778 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3779 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3780 
3781 	/* set the wb address whether it's enabled or not */
3782 	rptr_addr = ring->rptr_gpu_addr;
3783 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3784 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3785 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3786 
3787 	wptr_gpu_addr = ring->wptr_gpu_addr;
3788 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3789 		     lower_32_bits(wptr_gpu_addr));
3790 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3791 		     upper_32_bits(wptr_gpu_addr));
3792 
3793 	mdelay(1);
3794 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3795 
3796 	rb_addr = ring->gpu_addr >> 8;
3797 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3798 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3799 
3800 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3801 
3802 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3803 	mutex_unlock(&adev->srbm_mutex);
3804 
3805 	/* Init gfx ring 1 for pipe 1 */
3806 	if (adev->gfx.num_gfx_rings > 1) {
3807 		mutex_lock(&adev->srbm_mutex);
3808 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3809 		/* maximum supported gfx ring is 2 */
3810 		ring = &adev->gfx.gfx_ring[1];
3811 		rb_bufsz = order_base_2(ring->ring_size / 8);
3812 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3813 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3814 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3815 		/* Initialize the ring buffer's write pointers */
3816 		ring->wptr = 0;
3817 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3818 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3819 		/* Set the wb address whether it's enabled or not */
3820 		rptr_addr = ring->rptr_gpu_addr;
3821 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3822 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3823 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3824 		wptr_gpu_addr = ring->wptr_gpu_addr;
3825 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3826 			     lower_32_bits(wptr_gpu_addr));
3827 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3828 			     upper_32_bits(wptr_gpu_addr));
3829 
3830 		mdelay(1);
3831 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3832 
3833 		rb_addr = ring->gpu_addr >> 8;
3834 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3835 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3836 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3837 
3838 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3839 		mutex_unlock(&adev->srbm_mutex);
3840 	}
3841 	/* Switch to pipe 0 */
3842 	mutex_lock(&adev->srbm_mutex);
3843 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3844 	mutex_unlock(&adev->srbm_mutex);
3845 
3846 	/* start the ring */
3847 	gfx_v11_0_cp_gfx_start(adev);
3848 
3849 	return 0;
3850 }
3851 
3852 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3853 {
3854 	u32 data;
3855 
3856 	if (adev->gfx.rs64_enable) {
3857 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3858 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3859 							 enable ? 0 : 1);
3860 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3861 							 enable ? 0 : 1);
3862 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3863 							 enable ? 0 : 1);
3864 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3865 							 enable ? 0 : 1);
3866 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3867 							 enable ? 0 : 1);
3868 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3869 							 enable ? 1 : 0);
3870 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3871 				                         enable ? 1 : 0);
3872 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3873 							 enable ? 1 : 0);
3874 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3875 							 enable ? 1 : 0);
3876 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3877 							 enable ? 0 : 1);
3878 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3879 	} else {
3880 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3881 
3882 		if (enable) {
3883 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3884 			if (!adev->enable_mes_kiq)
3885 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3886 						     MEC_ME2_HALT, 0);
3887 		} else {
3888 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3889 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3890 		}
3891 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3892 	}
3893 
3894 	udelay(50);
3895 }
3896 
3897 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3898 {
3899 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3900 	const __le32 *fw_data;
3901 	unsigned i, fw_size;
3902 	u32 *fw = NULL;
3903 	int r;
3904 
3905 	if (!adev->gfx.mec_fw)
3906 		return -EINVAL;
3907 
3908 	gfx_v11_0_cp_compute_enable(adev, false);
3909 
3910 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3911 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3912 
3913 	fw_data = (const __le32 *)
3914 		(adev->gfx.mec_fw->data +
3915 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3916 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3917 
3918 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3919 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3920 					  &adev->gfx.mec.mec_fw_obj,
3921 					  &adev->gfx.mec.mec_fw_gpu_addr,
3922 					  (void **)&fw);
3923 	if (r) {
3924 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3925 		gfx_v11_0_mec_fini(adev);
3926 		return r;
3927 	}
3928 
3929 	memcpy(fw, fw_data, fw_size);
3930 
3931 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3932 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3933 
3934 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3935 
3936 	/* MEC1 */
3937 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3938 
3939 	for (i = 0; i < mec_hdr->jt_size; i++)
3940 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3941 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3942 
3943 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3944 
3945 	return 0;
3946 }
3947 
3948 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3949 {
3950 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3951 	const __le32 *fw_ucode, *fw_data;
3952 	u32 tmp, fw_ucode_size, fw_data_size;
3953 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3954 	u32 *fw_ucode_ptr, *fw_data_ptr;
3955 	int r;
3956 
3957 	if (!adev->gfx.mec_fw)
3958 		return -EINVAL;
3959 
3960 	gfx_v11_0_cp_compute_enable(adev, false);
3961 
3962 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3963 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3964 
3965 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3966 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3967 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3968 
3969 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3970 				le32_to_cpu(mec_hdr->data_offset_bytes));
3971 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3972 
3973 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3974 				      64 * 1024,
3975 				      AMDGPU_GEM_DOMAIN_VRAM |
3976 				      AMDGPU_GEM_DOMAIN_GTT,
3977 				      &adev->gfx.mec.mec_fw_obj,
3978 				      &adev->gfx.mec.mec_fw_gpu_addr,
3979 				      (void **)&fw_ucode_ptr);
3980 	if (r) {
3981 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3982 		gfx_v11_0_mec_fini(adev);
3983 		return r;
3984 	}
3985 
3986 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3987 				      64 * 1024,
3988 				      AMDGPU_GEM_DOMAIN_VRAM |
3989 				      AMDGPU_GEM_DOMAIN_GTT,
3990 				      &adev->gfx.mec.mec_fw_data_obj,
3991 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3992 				      (void **)&fw_data_ptr);
3993 	if (r) {
3994 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3995 		gfx_v11_0_mec_fini(adev);
3996 		return r;
3997 	}
3998 
3999 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
4000 	memcpy(fw_data_ptr, fw_data, fw_data_size);
4001 
4002 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
4003 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
4004 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
4005 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
4006 
4007 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
4008 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
4009 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
4010 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
4011 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
4012 
4013 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
4014 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
4015 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
4016 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
4017 
4018 	mutex_lock(&adev->srbm_mutex);
4019 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
4020 		soc21_grbm_select(adev, 1, i, 0, 0);
4021 
4022 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
4023 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
4024 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
4025 
4026 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
4027 					mec_hdr->ucode_start_addr_lo >> 2 |
4028 					mec_hdr->ucode_start_addr_hi << 30);
4029 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
4030 					mec_hdr->ucode_start_addr_hi >> 2);
4031 
4032 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
4033 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
4034 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
4035 	}
4036 	mutex_unlock(&adev->srbm_mutex);
4037 	soc21_grbm_select(adev, 0, 0, 0, 0);
4038 
4039 	/* Trigger an invalidation of the L1 instruction caches */
4040 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
4041 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
4042 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
4043 
4044 	/* Wait for invalidation complete */
4045 	for (i = 0; i < usec_timeout; i++) {
4046 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
4047 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
4048 				       INVALIDATE_DCACHE_COMPLETE))
4049 			break;
4050 		udelay(1);
4051 	}
4052 
4053 	if (i >= usec_timeout) {
4054 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
4055 		return -EINVAL;
4056 	}
4057 
4058 	/* Trigger an invalidation of the L1 instruction caches */
4059 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
4060 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
4061 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
4062 
4063 	/* Wait for invalidation complete */
4064 	for (i = 0; i < usec_timeout; i++) {
4065 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
4066 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
4067 				       INVALIDATE_CACHE_COMPLETE))
4068 			break;
4069 		udelay(1);
4070 	}
4071 
4072 	if (i >= usec_timeout) {
4073 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
4074 		return -EINVAL;
4075 	}
4076 
4077 	return 0;
4078 }
4079 
4080 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
4081 {
4082 	uint32_t tmp;
4083 	struct amdgpu_device *adev = ring->adev;
4084 
4085 	/* tell RLC which is KIQ queue */
4086 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
4087 	tmp &= 0xffffff00;
4088 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4089 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
4090 }
4091 
4092 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
4093 {
4094 	/* set graphics engine doorbell range */
4095 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
4096 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
4097 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
4098 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
4099 
4100 	/* set compute engine doorbell range */
4101 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4102 		     (adev->doorbell_index.kiq * 2) << 2);
4103 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4104 		     (adev->doorbell_index.userqueue_end * 2) << 2);
4105 }
4106 
4107 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
4108 					   struct v11_gfx_mqd *mqd,
4109 					   struct amdgpu_mqd_prop *prop)
4110 {
4111 	bool priority = 0;
4112 	u32 tmp;
4113 
4114 	/* set up default queue priority level
4115 	 * 0x0 = low priority, 0x1 = high priority
4116 	 */
4117 	if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM)
4118 		priority = 1;
4119 
4120 	tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
4121 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
4122 	mqd->cp_gfx_hqd_queue_priority = tmp;
4123 }
4124 
4125 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
4126 				  struct amdgpu_mqd_prop *prop)
4127 {
4128 	struct v11_gfx_mqd *mqd = m;
4129 	uint64_t hqd_gpu_addr, wb_gpu_addr;
4130 	uint32_t tmp;
4131 	uint32_t rb_bufsz;
4132 
4133 	/* set up gfx hqd wptr */
4134 	mqd->cp_gfx_hqd_wptr = 0;
4135 	mqd->cp_gfx_hqd_wptr_hi = 0;
4136 
4137 	/* set the pointer to the MQD */
4138 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
4139 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4140 
4141 	/* set up mqd control */
4142 	tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
4143 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
4144 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
4145 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
4146 	mqd->cp_gfx_mqd_control = tmp;
4147 
4148 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
4149 	tmp = regCP_GFX_HQD_VMID_DEFAULT;
4150 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
4151 	mqd->cp_gfx_hqd_vmid = 0;
4152 
4153 	/* set up gfx queue priority */
4154 	gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
4155 
4156 	/* set up time quantum */
4157 	tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
4158 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
4159 	mqd->cp_gfx_hqd_quantum = tmp;
4160 
4161 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
4162 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4163 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
4164 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
4165 
4166 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
4167 	wb_gpu_addr = prop->rptr_gpu_addr;
4168 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
4169 	mqd->cp_gfx_hqd_rptr_addr_hi =
4170 		upper_32_bits(wb_gpu_addr) & 0xffff;
4171 
4172 	/* set up rb_wptr_poll addr */
4173 	wb_gpu_addr = prop->wptr_gpu_addr;
4174 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4175 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4176 
4177 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
4178 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
4179 	tmp = regCP_GFX_HQD_CNTL_DEFAULT;
4180 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
4181 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
4182 #ifdef __BIG_ENDIAN
4183 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
4184 #endif
4185 	if (prop->tmz_queue)
4186 		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
4187 	if (!prop->kernel_queue)
4188 		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
4189 	mqd->cp_gfx_hqd_cntl = tmp;
4190 
4191 	/* set up cp_doorbell_control */
4192 	tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
4193 	if (prop->use_doorbell) {
4194 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4195 				    DOORBELL_OFFSET, prop->doorbell_index);
4196 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4197 				    DOORBELL_EN, 1);
4198 	} else
4199 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4200 				    DOORBELL_EN, 0);
4201 	mqd->cp_rb_doorbell_control = tmp;
4202 
4203 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4204 	mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
4205 
4206 	/* active the queue */
4207 	mqd->cp_gfx_hqd_active = 1;
4208 
4209 	/* set gfx UQ items */
4210 	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
4211 	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
4212 	mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
4213 	mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
4214 	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
4215 	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
4216 	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
4217 	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
4218 
4219 	return 0;
4220 }
4221 
4222 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
4223 {
4224 	struct amdgpu_device *adev = ring->adev;
4225 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
4226 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
4227 
4228 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4229 		memset((void *)mqd, 0, sizeof(*mqd));
4230 		mutex_lock(&adev->srbm_mutex);
4231 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4232 		amdgpu_ring_init_mqd(ring);
4233 		soc21_grbm_select(adev, 0, 0, 0, 0);
4234 		mutex_unlock(&adev->srbm_mutex);
4235 		if (adev->gfx.me.mqd_backup[mqd_idx])
4236 			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4237 	} else {
4238 		/* restore mqd with the backup copy */
4239 		if (adev->gfx.me.mqd_backup[mqd_idx])
4240 			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
4241 		/* reset the ring */
4242 		ring->wptr = 0;
4243 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4244 		amdgpu_ring_clear_ring(ring);
4245 	}
4246 
4247 	return 0;
4248 }
4249 
4250 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
4251 {
4252 	int r, i;
4253 
4254 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4255 		r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
4256 		if (r)
4257 			return r;
4258 	}
4259 
4260 	r = amdgpu_gfx_enable_kgq(adev, 0);
4261 	if (r)
4262 		return r;
4263 
4264 	return gfx_v11_0_cp_gfx_start(adev);
4265 }
4266 
4267 static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev,
4268 					      struct v11_compute_mqd *mqd,
4269 					      struct amdgpu_mqd_prop *prop)
4270 {
4271 	uint32_t se_mask[8] = {0};
4272 	uint32_t wa_mask;
4273 	bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE |
4274 					  AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE);
4275 
4276 	if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count))
4277 		return;
4278 
4279 	if (has_wa_flag) {
4280 		wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ?
4281 			  0xffff : 0xffffffff;
4282 		mqd->compute_static_thread_mgmt_se0 = wa_mask;
4283 		mqd->compute_static_thread_mgmt_se1 = wa_mask;
4284 		mqd->compute_static_thread_mgmt_se2 = wa_mask;
4285 		mqd->compute_static_thread_mgmt_se3 = wa_mask;
4286 		return;
4287 	}
4288 
4289 	amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask,
4290 						prop->cu_mask_count, se_mask);
4291 
4292 	mqd->compute_static_thread_mgmt_se0 = se_mask[0];
4293 	mqd->compute_static_thread_mgmt_se1 = se_mask[1];
4294 	mqd->compute_static_thread_mgmt_se2 = se_mask[2];
4295 	mqd->compute_static_thread_mgmt_se3 = se_mask[3];
4296 }
4297 
4298 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
4299 				      struct amdgpu_mqd_prop *prop)
4300 {
4301 	struct v11_compute_mqd *mqd = m;
4302 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4303 	uint32_t tmp;
4304 
4305 	mqd->header = 0xC0310800;
4306 	mqd->compute_pipelinestat_enable = 0x00000001;
4307 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4308 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4309 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4310 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4311 	mqd->compute_misc_reserved = 0x00000007;
4312 
4313 	eop_base_addr = prop->eop_gpu_addr >> 8;
4314 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4315 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4316 
4317 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4318 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
4319 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4320 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
4321 
4322 	mqd->cp_hqd_eop_control = tmp;
4323 
4324 	/* enable doorbell? */
4325 	tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
4326 
4327 	if (prop->use_doorbell) {
4328 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4329 				    DOORBELL_OFFSET, prop->doorbell_index);
4330 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4331 				    DOORBELL_EN, 1);
4332 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4333 				    DOORBELL_SOURCE, 0);
4334 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4335 				    DOORBELL_HIT, 0);
4336 	} else {
4337 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4338 				    DOORBELL_EN, 0);
4339 	}
4340 
4341 	mqd->cp_hqd_pq_doorbell_control = tmp;
4342 
4343 	/* disable the queue if it's active */
4344 	mqd->cp_hqd_dequeue_request = 0;
4345 	mqd->cp_hqd_pq_rptr = 0;
4346 	mqd->cp_hqd_pq_wptr_lo = 0;
4347 	mqd->cp_hqd_pq_wptr_hi = 0;
4348 
4349 	/* set the pointer to the MQD */
4350 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
4351 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4352 
4353 	/* set MQD vmid to 0 */
4354 	tmp = regCP_MQD_CONTROL_DEFAULT;
4355 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4356 	mqd->cp_mqd_control = tmp;
4357 
4358 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4359 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4360 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4361 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4362 
4363 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4364 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
4365 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4366 			    (order_base_2(prop->queue_size / 4) - 1));
4367 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4368 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4369 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
4370 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
4371 			    prop->allow_tunneling);
4372 	if (prop->kernel_queue) {
4373 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4374 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4375 	}
4376 	if (prop->tmz_queue)
4377 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
4378 	mqd->cp_hqd_pq_control = tmp;
4379 
4380 	/* set the wb address whether it's enabled or not */
4381 	wb_gpu_addr = prop->rptr_gpu_addr;
4382 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4383 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4384 		upper_32_bits(wb_gpu_addr) & 0xffff;
4385 
4386 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4387 	wb_gpu_addr = prop->wptr_gpu_addr;
4388 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4389 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4390 
4391 	tmp = 0;
4392 	/* enable the doorbell if requested */
4393 	if (prop->use_doorbell) {
4394 		tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
4395 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4396 				DOORBELL_OFFSET, prop->doorbell_index);
4397 
4398 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4399 				    DOORBELL_EN, 1);
4400 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4401 				    DOORBELL_SOURCE, 0);
4402 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4403 				    DOORBELL_HIT, 0);
4404 	}
4405 
4406 	mqd->cp_hqd_pq_doorbell_control = tmp;
4407 
4408 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4409 	mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
4410 
4411 	/* set the vmid for the queue */
4412 	mqd->cp_hqd_vmid = 0;
4413 
4414 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
4415 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4416 	mqd->cp_hqd_persistent_state = tmp;
4417 
4418 	/* set MIN_IB_AVAIL_SIZE */
4419 	tmp = regCP_HQD_IB_CONTROL_DEFAULT;
4420 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4421 	mqd->cp_hqd_ib_control = tmp;
4422 
4423 	/* set static priority for a compute queue/ring */
4424 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4425 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4426 
4427 	tmp = REG_SET_FIELD(0, CP_HQD_QUANTUM, QUANTUM_EN, 1);
4428 	tmp = REG_SET_FIELD(tmp, CP_HQD_QUANTUM, QUANTUM_SCALE, 1);
4429 	tmp = REG_SET_FIELD(tmp, CP_HQD_QUANTUM, QUANTUM_DURATION, 1);
4430 	mqd->cp_hqd_quantum = tmp;
4431 
4432 	mqd->cp_hqd_active = prop->hqd_active;
4433 
4434 	/* set UQ fenceaddress */
4435 	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
4436 	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
4437 	/* set CU mask */
4438 	gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop);
4439 
4440 	return 0;
4441 }
4442 
4443 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4444 {
4445 	struct amdgpu_device *adev = ring->adev;
4446 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4447 	int j;
4448 
4449 	/* inactivate the queue */
4450 	if (amdgpu_sriov_vf(adev))
4451 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4452 
4453 	/* disable wptr polling */
4454 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4455 
4456 	/* write the EOP addr */
4457 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4458 	       mqd->cp_hqd_eop_base_addr_lo);
4459 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4460 	       mqd->cp_hqd_eop_base_addr_hi);
4461 
4462 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4463 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4464 	       mqd->cp_hqd_eop_control);
4465 
4466 	/* enable doorbell? */
4467 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4468 	       mqd->cp_hqd_pq_doorbell_control);
4469 
4470 	/* disable the queue if it's active */
4471 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4472 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4473 		for (j = 0; j < adev->usec_timeout; j++) {
4474 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4475 				break;
4476 			udelay(1);
4477 		}
4478 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4479 		       mqd->cp_hqd_dequeue_request);
4480 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4481 		       mqd->cp_hqd_pq_rptr);
4482 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4483 		       mqd->cp_hqd_pq_wptr_lo);
4484 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4485 		       mqd->cp_hqd_pq_wptr_hi);
4486 	}
4487 
4488 	/* set the pointer to the MQD */
4489 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4490 	       mqd->cp_mqd_base_addr_lo);
4491 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4492 	       mqd->cp_mqd_base_addr_hi);
4493 
4494 	/* set MQD vmid to 0 */
4495 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4496 	       mqd->cp_mqd_control);
4497 
4498 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4499 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4500 	       mqd->cp_hqd_pq_base_lo);
4501 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4502 	       mqd->cp_hqd_pq_base_hi);
4503 
4504 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4505 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4506 	       mqd->cp_hqd_pq_control);
4507 
4508 	/* set the wb address whether it's enabled or not */
4509 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4510 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4511 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4512 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4513 
4514 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4515 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4516 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4517 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4518 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4519 
4520 	/* enable the doorbell if requested */
4521 	if (ring->use_doorbell) {
4522 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4523 			(adev->doorbell_index.kiq * 2) << 2);
4524 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4525 			(adev->doorbell_index.userqueue_end * 2) << 2);
4526 	}
4527 
4528 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4529 	       mqd->cp_hqd_pq_doorbell_control);
4530 
4531 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4532 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4533 	       mqd->cp_hqd_pq_wptr_lo);
4534 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4535 	       mqd->cp_hqd_pq_wptr_hi);
4536 
4537 	/* set the vmid for the queue */
4538 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4539 
4540 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4541 	       mqd->cp_hqd_persistent_state);
4542 
4543 	/* activate the queue */
4544 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4545 	       mqd->cp_hqd_active);
4546 
4547 	if (ring->use_doorbell)
4548 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4549 
4550 	return 0;
4551 }
4552 
4553 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4554 {
4555 	struct amdgpu_device *adev = ring->adev;
4556 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4557 
4558 	gfx_v11_0_kiq_setting(ring);
4559 
4560 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4561 		/* reset MQD to a clean status */
4562 		if (adev->gfx.kiq[0].mqd_backup)
4563 			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4564 
4565 		/* reset ring buffer */
4566 		ring->wptr = 0;
4567 		amdgpu_ring_clear_ring(ring);
4568 
4569 		mutex_lock(&adev->srbm_mutex);
4570 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4571 		gfx_v11_0_kiq_init_register(ring);
4572 		soc21_grbm_select(adev, 0, 0, 0, 0);
4573 		mutex_unlock(&adev->srbm_mutex);
4574 	} else {
4575 		memset((void *)mqd, 0, sizeof(*mqd));
4576 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4577 			amdgpu_ring_clear_ring(ring);
4578 		mutex_lock(&adev->srbm_mutex);
4579 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4580 		amdgpu_ring_init_mqd(ring);
4581 		gfx_v11_0_kiq_init_register(ring);
4582 		soc21_grbm_select(adev, 0, 0, 0, 0);
4583 		mutex_unlock(&adev->srbm_mutex);
4584 
4585 		if (adev->gfx.kiq[0].mqd_backup)
4586 			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4587 	}
4588 
4589 	return 0;
4590 }
4591 
4592 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
4593 {
4594 	struct amdgpu_device *adev = ring->adev;
4595 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4596 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4597 
4598 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4599 		memset((void *)mqd, 0, sizeof(*mqd));
4600 		mutex_lock(&adev->srbm_mutex);
4601 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4602 		amdgpu_ring_init_mqd(ring);
4603 		soc21_grbm_select(adev, 0, 0, 0, 0);
4604 		mutex_unlock(&adev->srbm_mutex);
4605 
4606 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4607 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4608 	} else {
4609 		/* restore MQD to a clean status */
4610 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4611 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4612 		/* reset ring buffer */
4613 		ring->wptr = 0;
4614 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4615 		amdgpu_ring_clear_ring(ring);
4616 	}
4617 
4618 	return 0;
4619 }
4620 
4621 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4622 {
4623 	gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
4624 	return 0;
4625 }
4626 
4627 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4628 {
4629 	int i, r;
4630 
4631 	if (!amdgpu_async_gfx_ring)
4632 		gfx_v11_0_cp_compute_enable(adev, true);
4633 
4634 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4635 		r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
4636 		if (r)
4637 			return r;
4638 	}
4639 
4640 	return amdgpu_gfx_enable_kcq(adev, 0);
4641 }
4642 
4643 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4644 {
4645 	int r, i;
4646 	struct amdgpu_ring *ring;
4647 
4648 	if (!(adev->flags & AMD_IS_APU))
4649 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4650 
4651 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4652 		/* legacy firmware loading */
4653 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4654 		if (r)
4655 			return r;
4656 
4657 		if (adev->gfx.rs64_enable)
4658 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4659 		else
4660 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4661 		if (r)
4662 			return r;
4663 	}
4664 
4665 	gfx_v11_0_cp_set_doorbell_range(adev);
4666 
4667 	if (amdgpu_async_gfx_ring) {
4668 		gfx_v11_0_cp_compute_enable(adev, true);
4669 		gfx_v11_0_cp_gfx_enable(adev, true);
4670 	}
4671 
4672 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4673 		r = amdgpu_mes_kiq_hw_init(adev, 0);
4674 	else
4675 		r = gfx_v11_0_kiq_resume(adev);
4676 	if (r)
4677 		return r;
4678 
4679 	r = gfx_v11_0_kcq_resume(adev);
4680 	if (r)
4681 		return r;
4682 
4683 	if (!amdgpu_async_gfx_ring) {
4684 		r = gfx_v11_0_cp_gfx_resume(adev);
4685 		if (r)
4686 			return r;
4687 	} else {
4688 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4689 		if (r)
4690 			return r;
4691 	}
4692 
4693 	if (adev->gfx.disable_kq) {
4694 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4695 			ring = &adev->gfx.gfx_ring[i];
4696 			/* we don't want to set ring->ready */
4697 			r = amdgpu_ring_test_ring(ring);
4698 			if (r)
4699 				return r;
4700 		}
4701 		if (amdgpu_async_gfx_ring)
4702 			amdgpu_gfx_disable_kgq(adev, 0);
4703 	} else {
4704 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4705 			ring = &adev->gfx.gfx_ring[i];
4706 			r = amdgpu_ring_test_helper(ring);
4707 			if (r)
4708 				return r;
4709 		}
4710 	}
4711 
4712 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4713 		ring = &adev->gfx.compute_ring[i];
4714 		r = amdgpu_ring_test_helper(ring);
4715 		if (r)
4716 			return r;
4717 	}
4718 
4719 	return 0;
4720 }
4721 
4722 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4723 {
4724 	gfx_v11_0_cp_gfx_enable(adev, enable);
4725 	gfx_v11_0_cp_compute_enable(adev, enable);
4726 }
4727 
4728 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4729 {
4730 	int r;
4731 	bool value;
4732 
4733 	r = adev->gfxhub.funcs->gart_enable(adev);
4734 	if (r)
4735 		return r;
4736 
4737 	amdgpu_device_flush_hdp(adev, NULL);
4738 
4739 	value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
4740 
4741 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4742 	/* TODO investigate why this and the hdp flush above is needed,
4743 	 * are we missing a flush somewhere else? */
4744 	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4745 
4746 	return 0;
4747 }
4748 
4749 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4750 {
4751 	u32 tmp;
4752 
4753 	/* select RS64 */
4754 	if (adev->gfx.rs64_enable) {
4755 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4756 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4757 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4758 
4759 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4760 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4761 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4762 	}
4763 
4764 	if (amdgpu_emu_mode == 1)
4765 		msleep(100);
4766 }
4767 
4768 static int get_gb_addr_config(struct amdgpu_device * adev)
4769 {
4770 	u32 gb_addr_config;
4771 
4772 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4773 	if (gb_addr_config == 0)
4774 		return -EINVAL;
4775 
4776 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4777 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4778 
4779 	adev->gfx.config.gb_addr_config = gb_addr_config;
4780 
4781 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4782 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4783 				      GB_ADDR_CONFIG, NUM_PIPES);
4784 
4785 	adev->gfx.config.max_tile_pipes =
4786 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4787 
4788 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4789 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4790 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4791 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4792 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4793 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4794 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4795 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4796 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4797 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4798 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4799 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4800 
4801 	return 0;
4802 }
4803 
4804 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4805 {
4806 	uint32_t data;
4807 
4808 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4809 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4810 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4811 
4812 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4813 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4814 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4815 }
4816 
4817 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
4818 					      bool enable)
4819 {
4820 	unsigned int irq_type;
4821 	int m, p, r;
4822 
4823 	if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
4824 		for (m = 0; m < adev->gfx.me.num_me; m++) {
4825 			for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
4826 				irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
4827 				if (enable)
4828 					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, irq_type);
4829 				else
4830 					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type);
4831 				if (r) {
4832 					if (!enable)
4833 						return r;
4834 					goto err_gfx;
4835 				}
4836 			}
4837 		}
4838 	}
4839 
4840 	if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
4841 		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
4842 			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
4843 				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4844 					+ (m * adev->gfx.mec.num_pipe_per_mec)
4845 					+ p;
4846 				if (enable)
4847 					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq, irq_type);
4848 				else
4849 					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type);
4850 				if (r) {
4851 					if (!enable)
4852 						return r;
4853 					goto err_compute;
4854 				}
4855 			}
4856 		}
4857 	}
4858 
4859 	return 0;
4860 
4861 err_compute:
4862 	for (p--; p >= 0; p--) {
4863 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4864 			+ (m * adev->gfx.mec.num_pipe_per_mec) + p;
4865 		amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type);
4866 	}
4867 	for (m--; m >= 0; m--) {
4868 		for (p = adev->gfx.mec.num_pipe_per_mec - 1; p >= 0; p--) {
4869 			irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4870 				+ (m * adev->gfx.mec.num_pipe_per_mec) + p;
4871 			amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type);
4872 		}
4873 	}
4874 	m = adev->gfx.me.num_me;
4875 err_gfx:
4876 	for (p--; p >= 0; p--) {
4877 		irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
4878 		amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type);
4879 	}
4880 	for (m--; m >= 0; m--) {
4881 		for (p = adev->gfx.me.num_pipe_per_me - 1; p >= 0; p--) {
4882 			irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
4883 			amdgpu_irq_put(adev, &adev->gfx.eop_irq, irq_type);
4884 		}
4885 	}
4886 	return r;
4887 }
4888 
4889 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
4890 {
4891 	int r;
4892 	struct amdgpu_device *adev = ip_block->adev;
4893 
4894 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4895 				       adev->gfx.cleaner_shader_ptr);
4896 
4897 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4898 		if (adev->gfx.imu.funcs) {
4899 			/* RLC autoload sequence 1: Program rlc ram */
4900 			if (adev->gfx.imu.funcs->program_rlc_ram)
4901 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4902 			/* rlc autoload firmware */
4903 			r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4904 			if (r)
4905 				return r;
4906 		}
4907 	} else {
4908 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4909 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4910 				if (adev->gfx.imu.funcs->load_microcode)
4911 					adev->gfx.imu.funcs->load_microcode(adev);
4912 				if (adev->gfx.imu.funcs->setup_imu)
4913 					adev->gfx.imu.funcs->setup_imu(adev);
4914 				if (adev->gfx.imu.funcs->start_imu)
4915 					adev->gfx.imu.funcs->start_imu(adev);
4916 			}
4917 
4918 			/* disable gpa mode in backdoor loading */
4919 			gfx_v11_0_disable_gpa_mode(adev);
4920 		}
4921 	}
4922 
4923 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4924 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4925 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4926 		if (r) {
4927 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4928 			return r;
4929 		}
4930 	}
4931 
4932 	adev->gfx.is_poweron = true;
4933 
4934 	if(get_gb_addr_config(adev))
4935 		drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n");
4936 
4937 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4938 	    adev->gfx.rs64_enable)
4939 		gfx_v11_0_config_gfx_rs64(adev);
4940 
4941 	r = gfx_v11_0_gfxhub_enable(adev);
4942 	if (r)
4943 		return r;
4944 
4945 	if (!amdgpu_emu_mode)
4946 		gfx_v11_0_init_golden_registers(adev);
4947 
4948 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4949 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4950 		/**
4951 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4952 		 * loaded firstly, so in direct type, it has to load smc ucode
4953 		 * here before rlc.
4954 		 */
4955 		r = amdgpu_pm_load_smu_firmware(adev, NULL);
4956 		if (r)
4957 			return r;
4958 	}
4959 
4960 	gfx_v11_0_constants_init(adev);
4961 
4962 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4963 		gfx_v11_0_select_cp_fw_arch(adev);
4964 
4965 	if (adev->nbio.funcs->gc_doorbell_init)
4966 		adev->nbio.funcs->gc_doorbell_init(adev);
4967 
4968 	r = gfx_v11_0_rlc_resume(adev);
4969 	if (r)
4970 		return r;
4971 
4972 	/*
4973 	 * init golden registers and rlc resume may override some registers,
4974 	 * reconfig them here
4975 	 */
4976 	gfx_v11_0_tcp_harvest(adev);
4977 
4978 	r = gfx_v11_0_cp_resume(adev);
4979 	if (r)
4980 		return r;
4981 
4982 	/* get IMU version from HW if it's not set */
4983 	if (!adev->gfx.imu_fw_version)
4984 		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4985 
4986 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4987 	if (r)
4988 		return r;
4989 
4990 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4991 	if (r)
4992 		goto err_priv_inst;
4993 
4994 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4995 	if (r)
4996 		goto err_bad_op;
4997 
4998 	r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
4999 	if (r)
5000 		goto err_userq_eop;
5001 
5002 	return 0;
5003 
5004 err_userq_eop:
5005 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
5006 err_bad_op:
5007 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5008 err_priv_inst:
5009 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5010 	return r;
5011 }
5012 
5013 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
5014 {
5015 	struct amdgpu_device *adev = ip_block->adev;
5016 
5017 	cancel_delayed_work_sync(&adev->gfx.idle_work);
5018 
5019 	gfx_v11_0_set_userq_eop_interrupts(adev, false);
5020 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
5021 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5022 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5023 
5024 	if (!adev->no_hw_access) {
5025 		if (amdgpu_async_gfx_ring &&
5026 		    !adev->gfx.disable_kq) {
5027 			if (amdgpu_gfx_disable_kgq(adev, 0))
5028 				DRM_ERROR("KGQ disable failed\n");
5029 		}
5030 
5031 		if (amdgpu_gfx_disable_kcq(adev, 0))
5032 			DRM_ERROR("KCQ disable failed\n");
5033 
5034 		amdgpu_mes_kiq_hw_fini(adev, 0);
5035 	}
5036 
5037 	if (amdgpu_sriov_vf(adev))
5038 		/* Remove the steps disabling CPG and clearing KIQ position,
5039 		 * so that CP could perform IDLE-SAVE during switch. Those
5040 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
5041 		 * not reproduced on gfx11.
5042 		 */
5043 		return 0;
5044 
5045 	gfx_v11_0_cp_enable(adev, false);
5046 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
5047 
5048 	adev->gfxhub.funcs->gart_disable(adev);
5049 
5050 	adev->gfx.is_poweron = false;
5051 
5052 	return 0;
5053 }
5054 
5055 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
5056 {
5057 	return gfx_v11_0_hw_fini(ip_block);
5058 }
5059 
5060 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
5061 {
5062 	return gfx_v11_0_hw_init(ip_block);
5063 }
5064 
5065 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
5066 {
5067 	struct amdgpu_device *adev = ip_block->adev;
5068 
5069 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
5070 				GRBM_STATUS, GUI_ACTIVE))
5071 		return false;
5072 	else
5073 		return true;
5074 }
5075 
5076 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
5077 {
5078 	unsigned i;
5079 	u32 tmp;
5080 	struct amdgpu_device *adev = ip_block->adev;
5081 
5082 	for (i = 0; i < adev->usec_timeout; i++) {
5083 		/* read MC_STATUS */
5084 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
5085 			GRBM_STATUS__GUI_ACTIVE_MASK;
5086 
5087 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5088 			return 0;
5089 		udelay(1);
5090 	}
5091 	return -ETIMEDOUT;
5092 }
5093 
5094 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
5095 				      bool req)
5096 {
5097 	u32 i, tmp, val;
5098 
5099 	for (i = 0; i < adev->usec_timeout; i++) {
5100 		/* Request with MeId=2, PipeId=0 */
5101 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
5102 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
5103 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
5104 
5105 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
5106 		if (req) {
5107 			if (val == tmp)
5108 				break;
5109 		} else {
5110 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
5111 					    REQUEST, 1);
5112 
5113 			/* unlocked or locked by firmware */
5114 			if (val != tmp)
5115 				break;
5116 		}
5117 		udelay(1);
5118 	}
5119 
5120 	if (i >= adev->usec_timeout)
5121 		return -EINVAL;
5122 
5123 	return 0;
5124 }
5125 
5126 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
5127 {
5128 	u32 grbm_soft_reset = 0;
5129 	u32 tmp;
5130 	int r, i, j, k;
5131 	struct amdgpu_device *adev = ip_block->adev;
5132 
5133 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5134 
5135 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5136 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
5137 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
5138 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
5139 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
5140 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
5141 
5142 	mutex_lock(&adev->srbm_mutex);
5143 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
5144 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
5145 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
5146 				soc21_grbm_select(adev, i, k, j, 0);
5147 
5148 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
5149 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
5150 			}
5151 		}
5152 	}
5153 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
5154 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
5155 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
5156 				soc21_grbm_select(adev, i, k, j, 0);
5157 
5158 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
5159 			}
5160 		}
5161 	}
5162 	soc21_grbm_select(adev, 0, 0, 0, 0);
5163 	mutex_unlock(&adev->srbm_mutex);
5164 
5165 	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
5166 	mutex_lock(&adev->gfx.reset_sem_mutex);
5167 	r = gfx_v11_0_request_gfx_index_mutex(adev, true);
5168 	if (r) {
5169 		mutex_unlock(&adev->gfx.reset_sem_mutex);
5170 		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
5171 		return r;
5172 	}
5173 
5174 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
5175 
5176 	// Read CP_VMID_RESET register three times.
5177 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
5178 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
5179 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
5180 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
5181 
5182 	/* release the gfx mutex */
5183 	r = gfx_v11_0_request_gfx_index_mutex(adev, false);
5184 	mutex_unlock(&adev->gfx.reset_sem_mutex);
5185 	if (r) {
5186 		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
5187 		return r;
5188 	}
5189 
5190 	for (i = 0; i < adev->usec_timeout; i++) {
5191 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
5192 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
5193 			break;
5194 		udelay(1);
5195 	}
5196 	if (i >= adev->usec_timeout) {
5197 		printk("Failed to wait all pipes clean\n");
5198 		return -EINVAL;
5199 	}
5200 
5201 	/**********  trigger soft reset  ***********/
5202 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
5203 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5204 					SOFT_RESET_CP, 1);
5205 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5206 					SOFT_RESET_GFX, 1);
5207 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5208 					SOFT_RESET_CPF, 1);
5209 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5210 					SOFT_RESET_CPC, 1);
5211 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5212 					SOFT_RESET_CPG, 1);
5213 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
5214 	/**********  exit soft reset  ***********/
5215 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
5216 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5217 					SOFT_RESET_CP, 0);
5218 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5219 					SOFT_RESET_GFX, 0);
5220 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5221 					SOFT_RESET_CPF, 0);
5222 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5223 					SOFT_RESET_CPC, 0);
5224 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5225 					SOFT_RESET_CPG, 0);
5226 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
5227 
5228 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
5229 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
5230 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
5231 
5232 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
5233 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
5234 
5235 	for (i = 0; i < adev->usec_timeout; i++) {
5236 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
5237 			break;
5238 		udelay(1);
5239 	}
5240 	if (i >= adev->usec_timeout) {
5241 		printk("Failed to wait CP_VMID_RESET to 0\n");
5242 		return -EINVAL;
5243 	}
5244 
5245 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5246 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5247 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5248 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5249 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5250 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
5251 
5252 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5253 
5254 	return gfx_v11_0_cp_resume(adev);
5255 }
5256 
5257 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
5258 {
5259 	int i, r;
5260 	struct amdgpu_device *adev = ip_block->adev;
5261 	struct amdgpu_ring *ring;
5262 	long tmo = msecs_to_jiffies(1000);
5263 
5264 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5265 		ring = &adev->gfx.gfx_ring[i];
5266 		r = amdgpu_ring_test_ib(ring, tmo);
5267 		if (r)
5268 			return true;
5269 	}
5270 
5271 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5272 		ring = &adev->gfx.compute_ring[i];
5273 		r = amdgpu_ring_test_ib(ring, tmo);
5274 		if (r)
5275 			return true;
5276 	}
5277 
5278 	return false;
5279 }
5280 
5281 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5282 {
5283 	struct amdgpu_device *adev = ip_block->adev;
5284 	/**
5285 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
5286 	 */
5287 	return amdgpu_mes_resume(adev, 0);
5288 }
5289 
5290 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5291 {
5292 	uint64_t clock;
5293 	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
5294 
5295 	if (amdgpu_sriov_vf(adev)) {
5296 		amdgpu_gfx_off_ctrl(adev, false);
5297 		mutex_lock(&adev->gfx.gpu_clock_mutex);
5298 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5299 		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5300 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5301 		if (clock_counter_hi_pre != clock_counter_hi_after)
5302 			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5303 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
5304 		amdgpu_gfx_off_ctrl(adev, true);
5305 	} else {
5306 		preempt_disable();
5307 		if (amdgpu_ip_version(adev, SMUIO_HWIP, 0) < IP_VERSION(15, 0, 0)) {
5308 			clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0,
5309 					regGOLDEN_TSC_COUNT_UPPER);
5310 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0,
5311 					regGOLDEN_TSC_COUNT_LOWER);
5312 			clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0,
5313 					regGOLDEN_TSC_COUNT_UPPER);
5314 			if (clock_counter_hi_pre != clock_counter_hi_after)
5315 				clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0,
5316 						regGOLDEN_TSC_COUNT_LOWER);
5317 		} else {
5318 			clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0,
5319 					regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0);
5320 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0,
5321 					regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0);
5322 			clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0,
5323 					regGOLDEN_TSC_COUNT_UPPER_smu_15_0_0);
5324 			if (clock_counter_hi_pre != clock_counter_hi_after)
5325 				clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0,
5326 						regGOLDEN_TSC_COUNT_LOWER_smu_15_0_0);
5327 		}
5328 		preempt_enable();
5329 	}
5330 	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
5331 
5332 	return clock;
5333 }
5334 
5335 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5336 					   uint32_t vmid,
5337 					   uint32_t gds_base, uint32_t gds_size,
5338 					   uint32_t gws_base, uint32_t gws_size,
5339 					   uint32_t oa_base, uint32_t oa_size)
5340 {
5341 	struct amdgpu_device *adev = ring->adev;
5342 
5343 	/* GDS Base */
5344 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5345 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
5346 				    gds_base);
5347 
5348 	/* GDS Size */
5349 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5350 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
5351 				    gds_size);
5352 
5353 	/* GWS */
5354 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5355 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
5356 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5357 
5358 	/* OA */
5359 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5360 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
5361 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
5362 }
5363 
5364 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
5365 {
5366 	struct amdgpu_device *adev = ip_block->adev;
5367 
5368 	switch (amdgpu_user_queue) {
5369 	case -1:
5370 	case 0:
5371 	default:
5372 		adev->gfx.disable_kq = false;
5373 		adev->gfx.disable_uq = true;
5374 		break;
5375 	case 1:
5376 		adev->gfx.disable_kq = false;
5377 		adev->gfx.disable_uq = false;
5378 		break;
5379 	case 2:
5380 		adev->gfx.disable_kq = true;
5381 		adev->gfx.disable_uq = false;
5382 		break;
5383 	}
5384 
5385 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
5386 
5387 	if (adev->gfx.disable_kq) {
5388 		/* We need one GFX ring temporarily to set up
5389 		 * the clear state.
5390 		 */
5391 		adev->gfx.num_gfx_rings = 1;
5392 		adev->gfx.num_compute_rings = 0;
5393 	} else {
5394 		adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
5395 		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5396 						  AMDGPU_MAX_COMPUTE_RINGS);
5397 	}
5398 
5399 	gfx_v11_0_set_kiq_pm4_funcs(adev);
5400 	gfx_v11_0_set_ring_funcs(adev);
5401 	gfx_v11_0_set_irq_funcs(adev);
5402 	gfx_v11_0_set_gds_init(adev);
5403 	gfx_v11_0_set_rlc_funcs(adev);
5404 	gfx_v11_0_set_mqd_funcs(adev);
5405 	gfx_v11_0_set_imu_funcs(adev);
5406 
5407 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
5408 
5409 	return gfx_v11_0_init_microcode(adev);
5410 }
5411 
5412 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
5413 {
5414 	uint32_t rlc_cntl;
5415 
5416 	/* if RLC is not enabled, do nothing */
5417 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
5418 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
5419 }
5420 
5421 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5422 {
5423 	uint32_t data;
5424 	unsigned i;
5425 
5426 	data = RLC_SAFE_MODE__CMD_MASK;
5427 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5428 
5429 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
5430 
5431 	/* wait for RLC_SAFE_MODE */
5432 	for (i = 0; i < adev->usec_timeout; i++) {
5433 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
5434 				   RLC_SAFE_MODE, CMD))
5435 			break;
5436 		udelay(1);
5437 	}
5438 }
5439 
5440 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5441 {
5442 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
5443 }
5444 
5445 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
5446 				      bool enable)
5447 {
5448 	uint32_t def, data;
5449 
5450 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
5451 		return;
5452 
5453 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5454 
5455 	if (enable)
5456 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5457 	else
5458 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5459 
5460 	if (def != data)
5461 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5462 }
5463 
5464 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
5465 				       bool enable)
5466 {
5467 	uint32_t def, data;
5468 
5469 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
5470 		return;
5471 
5472 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5473 
5474 	if (enable)
5475 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5476 	else
5477 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5478 
5479 	if (def != data)
5480 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5481 }
5482 
5483 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
5484 					   bool enable)
5485 {
5486 	uint32_t def, data;
5487 
5488 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
5489 		return;
5490 
5491 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5492 
5493 	if (enable)
5494 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5495 	else
5496 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5497 
5498 	if (def != data)
5499 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5500 }
5501 
5502 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5503 						       bool enable)
5504 {
5505 	uint32_t data, def;
5506 
5507 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
5508 		return;
5509 
5510 	/* It is disabled by HW by default */
5511 	if (enable) {
5512 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5513 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
5514 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5515 
5516 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5517 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5518 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5519 
5520 			if (def != data)
5521 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5522 		}
5523 	} else {
5524 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5525 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5526 
5527 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5528 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5529 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5530 
5531 			if (def != data)
5532 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5533 		}
5534 	}
5535 }
5536 
5537 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5538 						       bool enable)
5539 {
5540 	uint32_t def, data;
5541 
5542 	if (!(adev->cg_flags &
5543 	      (AMD_CG_SUPPORT_GFX_CGCG |
5544 	      AMD_CG_SUPPORT_GFX_CGLS |
5545 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
5546 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
5547 		return;
5548 
5549 	if (enable) {
5550 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5551 
5552 		/* unset CGCG override */
5553 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5554 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5555 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5556 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5557 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
5558 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5559 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5560 
5561 		/* update CGCG override bits */
5562 		if (def != data)
5563 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5564 
5565 		/* enable cgcg FSM(0x0000363F) */
5566 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5567 
5568 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5569 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
5570 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5571 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5572 		}
5573 
5574 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5575 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
5576 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5577 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5578 		}
5579 
5580 		if (def != data)
5581 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5582 
5583 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5584 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5585 
5586 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5587 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5588 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5589 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5590 		}
5591 
5592 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5593 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5594 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5595 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5596 		}
5597 
5598 		if (def != data)
5599 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5600 
5601 		/* set IDLE_POLL_COUNT(0x00900100) */
5602 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5603 
5604 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5605 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5606 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5607 
5608 		if (def != data)
5609 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5610 
5611 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5612 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5613 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5614 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5615 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5616 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5617 
5618 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5619 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5620 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5621 
5622 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5623 		if (adev->sdma.num_instances > 1) {
5624 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5625 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5626 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5627 		}
5628 	} else {
5629 		/* Program RLC_CGCG_CGLS_CTRL */
5630 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5631 
5632 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5633 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5634 
5635 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5636 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5637 
5638 		if (def != data)
5639 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5640 
5641 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5642 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5643 
5644 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5645 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5646 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5647 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5648 
5649 		if (def != data)
5650 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5651 
5652 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5653 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5654 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5655 
5656 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5657 		if (adev->sdma.num_instances > 1) {
5658 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5659 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5660 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5661 		}
5662 	}
5663 }
5664 
5665 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5666 					    bool enable)
5667 {
5668 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5669 
5670 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5671 
5672 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5673 
5674 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5675 
5676 	gfx_v11_0_update_sram_fgcg(adev, enable);
5677 
5678 	gfx_v11_0_update_perf_clk(adev, enable);
5679 
5680 	if (adev->cg_flags &
5681 	    (AMD_CG_SUPPORT_GFX_MGCG |
5682 	     AMD_CG_SUPPORT_GFX_CGLS |
5683 	     AMD_CG_SUPPORT_GFX_CGCG |
5684 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5685 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5686 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5687 
5688 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5689 
5690 	return 0;
5691 }
5692 
5693 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
5694 		struct amdgpu_ring *ring, unsigned vmid)
5695 {
5696 	u32 reg, pre_data, data;
5697 
5698 	amdgpu_gfx_off_ctrl(adev, false);
5699 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5700 	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
5701 		pre_data = RREG32_NO_KIQ(reg);
5702 	else
5703 		pre_data = RREG32(reg);
5704 
5705 	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
5706 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5707 
5708 	if (pre_data != data) {
5709 		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
5710 			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5711 		} else
5712 			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5713 	}
5714 	amdgpu_gfx_off_ctrl(adev, true);
5715 
5716 	if (ring
5717 		&& amdgpu_sriov_is_pp_one_vf(adev)
5718 		&& (pre_data != data)
5719 		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
5720 			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
5721 		amdgpu_ring_emit_wreg(ring, reg, data);
5722 	}
5723 }
5724 
5725 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5726 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5727 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5728 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5729 	.init = gfx_v11_0_rlc_init,
5730 	.get_csb_size = gfx_v11_0_get_csb_size,
5731 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5732 	.resume = gfx_v11_0_rlc_resume,
5733 	.stop = gfx_v11_0_rlc_stop,
5734 	.reset = gfx_v11_0_rlc_reset,
5735 	.start = gfx_v11_0_rlc_start,
5736 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5737 };
5738 
5739 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5740 {
5741 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5742 
5743 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5744 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5745 	else
5746 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5747 
5748 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5749 
5750 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5751 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5752 		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5753 		case IP_VERSION(11, 0, 1):
5754 		case IP_VERSION(11, 0, 4):
5755 		case IP_VERSION(11, 5, 0):
5756 		case IP_VERSION(11, 5, 1):
5757 		case IP_VERSION(11, 5, 2):
5758 		case IP_VERSION(11, 5, 3):
5759 	        case IP_VERSION(11, 5, 4):
5760 		case IP_VERSION(11, 5, 6):
5761 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5762 			break;
5763 		default:
5764 			break;
5765 		}
5766 	}
5767 }
5768 
5769 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5770 {
5771 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5772 
5773 	gfx_v11_cntl_power_gating(adev, enable);
5774 
5775 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5776 }
5777 
5778 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5779 					   enum amd_powergating_state state)
5780 {
5781 	struct amdgpu_device *adev = ip_block->adev;
5782 	bool enable = (state == AMD_PG_STATE_GATE);
5783 
5784 	if (amdgpu_sriov_vf(adev))
5785 		return 0;
5786 
5787 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5788 	case IP_VERSION(11, 0, 0):
5789 	case IP_VERSION(11, 0, 2):
5790 	case IP_VERSION(11, 0, 3):
5791 		amdgpu_gfx_off_ctrl(adev, enable);
5792 		break;
5793 	case IP_VERSION(11, 0, 1):
5794 	case IP_VERSION(11, 0, 4):
5795 	case IP_VERSION(11, 5, 0):
5796 	case IP_VERSION(11, 5, 1):
5797 	case IP_VERSION(11, 5, 2):
5798 	case IP_VERSION(11, 5, 3):
5799 	case IP_VERSION(11, 5, 4):
5800 	case IP_VERSION(11, 5, 6):
5801 		if (!enable)
5802 			amdgpu_gfx_off_ctrl(adev, false);
5803 
5804 		gfx_v11_cntl_pg(adev, enable);
5805 
5806 		if (enable)
5807 			amdgpu_gfx_off_ctrl(adev, true);
5808 
5809 		break;
5810 	default:
5811 		break;
5812 	}
5813 
5814 	return 0;
5815 }
5816 
5817 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5818 					  enum amd_clockgating_state state)
5819 {
5820 	struct amdgpu_device *adev = ip_block->adev;
5821 
5822 	if (amdgpu_sriov_vf(adev))
5823 	        return 0;
5824 
5825 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5826 	case IP_VERSION(11, 0, 0):
5827 	case IP_VERSION(11, 0, 1):
5828 	case IP_VERSION(11, 0, 2):
5829 	case IP_VERSION(11, 0, 3):
5830 	case IP_VERSION(11, 0, 4):
5831 	case IP_VERSION(11, 5, 0):
5832 	case IP_VERSION(11, 5, 1):
5833 	case IP_VERSION(11, 5, 2):
5834 	case IP_VERSION(11, 5, 3):
5835 	case IP_VERSION(11, 5, 4):
5836 	case IP_VERSION(11, 5, 6):
5837 	        gfx_v11_0_update_gfx_clock_gating(adev,
5838 	                        state ==  AMD_CG_STATE_GATE);
5839 	        break;
5840 	default:
5841 	        break;
5842 	}
5843 
5844 	return 0;
5845 }
5846 
5847 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5848 {
5849 	struct amdgpu_device *adev = ip_block->adev;
5850 	int data;
5851 
5852 	/* AMD_CG_SUPPORT_GFX_MGCG */
5853 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5854 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5855 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5856 
5857 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5858 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5859 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5860 
5861 	/* AMD_CG_SUPPORT_GFX_FGCG */
5862 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5863 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5864 
5865 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5866 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5867 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5868 
5869 	/* AMD_CG_SUPPORT_GFX_CGCG */
5870 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5871 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5872 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5873 
5874 	/* AMD_CG_SUPPORT_GFX_CGLS */
5875 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5876 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5877 
5878 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5879 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5880 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5881 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5882 
5883 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5884 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5885 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5886 }
5887 
5888 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5889 {
5890 	/* gfx11 is 32bit rptr*/
5891 	return *(uint32_t *)ring->rptr_cpu_addr;
5892 }
5893 
5894 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5895 {
5896 	struct amdgpu_device *adev = ring->adev;
5897 	u64 wptr;
5898 
5899 	/* XXX check if swapping is necessary on BE */
5900 	if (ring->use_doorbell) {
5901 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5902 	} else {
5903 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5904 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5905 	}
5906 
5907 	return wptr;
5908 }
5909 
5910 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5911 {
5912 	struct amdgpu_device *adev = ring->adev;
5913 
5914 	if (ring->use_doorbell) {
5915 		/* XXX check if swapping is necessary on BE */
5916 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5917 			     ring->wptr);
5918 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5919 	} else {
5920 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5921 			     lower_32_bits(ring->wptr));
5922 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5923 			     upper_32_bits(ring->wptr));
5924 	}
5925 }
5926 
5927 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5928 {
5929 	/* gfx11 hardware is 32bit rptr */
5930 	return *(uint32_t *)ring->rptr_cpu_addr;
5931 }
5932 
5933 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5934 {
5935 	u64 wptr;
5936 
5937 	/* XXX check if swapping is necessary on BE */
5938 	if (ring->use_doorbell)
5939 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5940 	else
5941 		BUG();
5942 	return wptr;
5943 }
5944 
5945 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5946 {
5947 	struct amdgpu_device *adev = ring->adev;
5948 
5949 	/* XXX check if swapping is necessary on BE */
5950 	if (ring->use_doorbell) {
5951 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5952 			     ring->wptr);
5953 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5954 	} else {
5955 		BUG(); /* only DOORBELL method supported on gfx11 now */
5956 	}
5957 }
5958 
5959 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5960 {
5961 	struct amdgpu_device *adev = ring->adev;
5962 	u32 ref_and_mask, reg_mem_engine;
5963 
5964 	if (!adev->gfx.funcs->get_hdp_flush_mask) {
5965 		dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
5966 		return;
5967 	}
5968 
5969 	adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
5970 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5971 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5972 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5973 			       ref_and_mask, ref_and_mask, 0x20);
5974 }
5975 
5976 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5977 				       struct amdgpu_job *job,
5978 				       struct amdgpu_ib *ib,
5979 				       uint32_t flags)
5980 {
5981 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5982 	u32 header, control = 0;
5983 
5984 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5985 
5986 	control |= ib->length_dw | (vmid << 24);
5987 
5988 	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5989 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5990 
5991 		if (flags & AMDGPU_IB_PREEMPTED)
5992 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5993 
5994 		if (vmid && !ring->adev->gfx.rs64_enable)
5995 			gfx_v11_0_ring_emit_de_meta(ring,
5996 				!amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
5997 	}
5998 
5999 	amdgpu_ring_write(ring, header);
6000 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
6001 	amdgpu_ring_write(ring,
6002 #ifdef __BIG_ENDIAN
6003 		(2 << 0) |
6004 #endif
6005 		lower_32_bits(ib->gpu_addr));
6006 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
6007 	amdgpu_ring_write(ring, control);
6008 }
6009 
6010 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6011 					   struct amdgpu_job *job,
6012 					   struct amdgpu_ib *ib,
6013 					   uint32_t flags)
6014 {
6015 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6016 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6017 
6018 	/* Currently, there is a high possibility to get wave ID mismatch
6019 	 * between ME and GDS, leading to a hw deadlock, because ME generates
6020 	 * different wave IDs than the GDS expects. This situation happens
6021 	 * randomly when at least 5 compute pipes use GDS ordered append.
6022 	 * The wave IDs generated by ME are also wrong after suspend/resume.
6023 	 * Those are probably bugs somewhere else in the kernel driver.
6024 	 *
6025 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6026 	 * GDS to 0 for this ring (me/pipe).
6027 	 */
6028 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6029 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6030 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
6031 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6032 	}
6033 
6034 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6035 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
6036 	amdgpu_ring_write(ring,
6037 #ifdef __BIG_ENDIAN
6038 				(2 << 0) |
6039 #endif
6040 				lower_32_bits(ib->gpu_addr));
6041 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
6042 	amdgpu_ring_write(ring, control);
6043 }
6044 
6045 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
6046 				     u64 seq, unsigned flags)
6047 {
6048 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6049 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6050 
6051 	/* RELEASE_MEM - flush caches, send int */
6052 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
6053 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
6054 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
6055 				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
6056 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
6057 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
6058 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6059 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
6060 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
6061 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
6062 
6063 	/*
6064 	 * the address should be Qword aligned if 64bit write, Dword
6065 	 * aligned if only send 32bit data low (discard data high)
6066 	 */
6067 	if (write64bit)
6068 		BUG_ON(addr & 0x7);
6069 	else
6070 		BUG_ON(addr & 0x3);
6071 	amdgpu_ring_write(ring, lower_32_bits(addr));
6072 	amdgpu_ring_write(ring, upper_32_bits(addr));
6073 	amdgpu_ring_write(ring, lower_32_bits(seq));
6074 	amdgpu_ring_write(ring, upper_32_bits(seq));
6075 	amdgpu_ring_write(ring, 0);
6076 }
6077 
6078 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6079 {
6080 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6081 	uint32_t seq = ring->fence_drv.sync_seq;
6082 	uint64_t addr = ring->fence_drv.gpu_addr;
6083 
6084 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
6085 			       upper_32_bits(addr), seq, 0xffffffff, 4);
6086 }
6087 
6088 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
6089 				   uint16_t pasid, uint32_t flush_type,
6090 				   bool all_hub, uint8_t dst_sel)
6091 {
6092 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
6093 	amdgpu_ring_write(ring,
6094 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
6095 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
6096 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
6097 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
6098 }
6099 
6100 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6101 					 unsigned vmid, uint64_t pd_addr)
6102 {
6103 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6104 
6105 	/* compute doesn't have PFP */
6106 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
6107 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6108 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6109 		amdgpu_ring_write(ring, 0x0);
6110 	}
6111 
6112 	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
6113 	 * changed in any way.
6114 	 */
6115 	ring->set_q_mode_offs = 0;
6116 	ring->set_q_mode_ptr = NULL;
6117 }
6118 
6119 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6120 					  u64 seq, unsigned int flags)
6121 {
6122 	struct amdgpu_device *adev = ring->adev;
6123 
6124 	/* we only allocate 32bit for each seq wb address */
6125 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6126 
6127 	/* write fence seq to the "addr" */
6128 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6129 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6130 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6131 	amdgpu_ring_write(ring, lower_32_bits(addr));
6132 	amdgpu_ring_write(ring, upper_32_bits(addr));
6133 	amdgpu_ring_write(ring, lower_32_bits(seq));
6134 
6135 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6136 		/* set register to trigger INT */
6137 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6138 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6139 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6140 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
6141 		amdgpu_ring_write(ring, 0);
6142 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6143 	}
6144 }
6145 
6146 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
6147 					 uint32_t flags)
6148 {
6149 	uint32_t dw2 = 0;
6150 
6151 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6152 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6153 		/* set load_global_config & load_global_uconfig */
6154 		dw2 |= 0x8001;
6155 		/* set load_cs_sh_regs */
6156 		dw2 |= 0x01000000;
6157 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6158 		dw2 |= 0x10002;
6159 	}
6160 
6161 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6162 	amdgpu_ring_write(ring, dw2);
6163 	amdgpu_ring_write(ring, 0);
6164 }
6165 
6166 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6167 						   uint64_t addr)
6168 {
6169 	unsigned ret;
6170 
6171 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6172 	amdgpu_ring_write(ring, lower_32_bits(addr));
6173 	amdgpu_ring_write(ring, upper_32_bits(addr));
6174 	/* discard following DWs if *cond_exec_gpu_addr==0 */
6175 	amdgpu_ring_write(ring, 0);
6176 	ret = ring->wptr & ring->buf_mask;
6177 	/* patch dummy value later */
6178 	amdgpu_ring_write(ring, 0);
6179 
6180 	return ret;
6181 }
6182 
6183 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
6184 					   u64 shadow_va, u64 csa_va,
6185 					   u64 gds_va, bool init_shadow,
6186 					   int vmid)
6187 {
6188 	struct amdgpu_device *adev = ring->adev;
6189 	unsigned int offs, end;
6190 
6191 	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
6192 		return;
6193 
6194 	/*
6195 	 * The logic here isn't easy to understand because we need to keep state
6196 	 * accross multiple executions of the function as well as between the
6197 	 * CPU and GPU. The general idea is that the newly written GPU command
6198 	 * has a condition on the previous one and only executed if really
6199 	 * necessary.
6200 	 */
6201 
6202 	/*
6203 	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
6204 	 * executed or not. Reserve 64bits just to be on the save side.
6205 	 */
6206 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
6207 	offs = ring->wptr & ring->buf_mask;
6208 
6209 	/*
6210 	 * We start with skipping the prefix SET_Q_MODE and always executing
6211 	 * the postfix SET_Q_MODE packet. This is changed below with a
6212 	 * WRITE_DATA command when the postfix executed.
6213 	 */
6214 	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
6215 	amdgpu_ring_write(ring, 0);
6216 
6217 	if (ring->set_q_mode_offs) {
6218 		uint64_t addr;
6219 
6220 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
6221 		addr += ring->set_q_mode_offs << 2;
6222 		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
6223 	}
6224 
6225 	/*
6226 	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
6227 	 * next prefix SET_Q_MODE packet executes as well.
6228 	 */
6229 	if (!shadow_va) {
6230 		uint64_t addr;
6231 
6232 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
6233 		addr += offs << 2;
6234 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6235 		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
6236 		amdgpu_ring_write(ring, lower_32_bits(addr));
6237 		amdgpu_ring_write(ring, upper_32_bits(addr));
6238 		amdgpu_ring_write(ring, 0x1);
6239 	}
6240 
6241 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
6242 	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
6243 	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
6244 	amdgpu_ring_write(ring, lower_32_bits(gds_va));
6245 	amdgpu_ring_write(ring, upper_32_bits(gds_va));
6246 	amdgpu_ring_write(ring, lower_32_bits(csa_va));
6247 	amdgpu_ring_write(ring, upper_32_bits(csa_va));
6248 	amdgpu_ring_write(ring, shadow_va ?
6249 			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
6250 	amdgpu_ring_write(ring, init_shadow ?
6251 			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
6252 
6253 	if (ring->set_q_mode_offs)
6254 		amdgpu_ring_patch_cond_exec(ring, end);
6255 
6256 	if (shadow_va) {
6257 		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
6258 
6259 		/*
6260 		 * If the tokens match try to skip the last postfix SET_Q_MODE
6261 		 * packet to avoid saving/restoring the state all the time.
6262 		 */
6263 		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
6264 			*ring->set_q_mode_ptr = 0;
6265 
6266 		ring->set_q_mode_token = token;
6267 	} else {
6268 		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
6269 	}
6270 
6271 	ring->set_q_mode_offs = offs;
6272 }
6273 
6274 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
6275 {
6276 	struct amdgpu_device *adev = ring->adev;
6277 	struct v10_de_ib_state de_payload = {0};
6278 	uint64_t offset, gds_addr, de_payload_gpu_addr;
6279 	void *de_payload_cpu_addr;
6280 	int cnt;
6281 
6282 	offset = offsetof(struct v10_gfx_meta_data, de_payload);
6283 	de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
6284 	de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
6285 
6286 	gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
6287 			 AMDGPU_CSA_SIZE - adev->gds.gds_size,
6288 			 PAGE_SIZE);
6289 
6290 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
6291 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
6292 
6293 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
6294 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
6295 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6296 				 WRITE_DATA_DST_SEL(8) |
6297 				 WR_CONFIRM) |
6298 				 WRITE_DATA_CACHE_POLICY(0));
6299 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
6300 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
6301 
6302 	if (resume)
6303 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
6304 					   sizeof(de_payload) >> 2);
6305 	else
6306 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
6307 					   sizeof(de_payload) >> 2);
6308 }
6309 
6310 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
6311 				    bool secure)
6312 {
6313 	uint32_t v = secure ? FRAME_TMZ : 0;
6314 
6315 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
6316 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
6317 }
6318 
6319 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6320 				     uint32_t reg_val_offs)
6321 {
6322 	struct amdgpu_device *adev = ring->adev;
6323 
6324 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6325 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6326 				(5 << 8) |	/* dst: memory */
6327 				(1 << 20));	/* write confirm */
6328 	amdgpu_ring_write(ring, reg);
6329 	amdgpu_ring_write(ring, 0);
6330 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6331 				reg_val_offs * 4));
6332 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6333 				reg_val_offs * 4));
6334 }
6335 
6336 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6337 				   uint32_t val)
6338 {
6339 	uint32_t cmd = 0;
6340 
6341 	switch (ring->funcs->type) {
6342 	case AMDGPU_RING_TYPE_GFX:
6343 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6344 		break;
6345 	case AMDGPU_RING_TYPE_KIQ:
6346 		cmd = (1 << 16); /* no inc addr */
6347 		break;
6348 	default:
6349 		cmd = WR_CONFIRM;
6350 		break;
6351 	}
6352 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6353 	amdgpu_ring_write(ring, cmd);
6354 	amdgpu_ring_write(ring, reg);
6355 	amdgpu_ring_write(ring, 0);
6356 	amdgpu_ring_write(ring, val);
6357 }
6358 
6359 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6360 					uint32_t val, uint32_t mask)
6361 {
6362 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6363 }
6364 
6365 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
6366 						   uint32_t reg0, uint32_t reg1,
6367 						   uint32_t ref, uint32_t mask)
6368 {
6369 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6370 
6371 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
6372 			       ref, mask, 0x20);
6373 }
6374 
6375 static void
6376 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6377 				      uint32_t me, uint32_t pipe,
6378 				      enum amdgpu_interrupt_state state)
6379 {
6380 	uint32_t cp_int_cntl, cp_int_cntl_reg;
6381 
6382 	if (!me) {
6383 		switch (pipe) {
6384 		case 0:
6385 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
6386 			break;
6387 		case 1:
6388 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
6389 			break;
6390 		default:
6391 			DRM_DEBUG("invalid pipe %d\n", pipe);
6392 			return;
6393 		}
6394 	} else {
6395 		DRM_DEBUG("invalid me %d\n", me);
6396 		return;
6397 	}
6398 
6399 	switch (state) {
6400 	case AMDGPU_IRQ_STATE_DISABLE:
6401 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6402 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6403 					    TIME_STAMP_INT_ENABLE, 0);
6404 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6405 					    GENERIC0_INT_ENABLE, 0);
6406 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6407 		break;
6408 	case AMDGPU_IRQ_STATE_ENABLE:
6409 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6410 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6411 					    TIME_STAMP_INT_ENABLE, 1);
6412 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6413 					    GENERIC0_INT_ENABLE, 1);
6414 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6415 		break;
6416 	default:
6417 		break;
6418 	}
6419 }
6420 
6421 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6422 						     int me, int pipe,
6423 						     enum amdgpu_interrupt_state state)
6424 {
6425 	u32 mec_int_cntl, mec_int_cntl_reg;
6426 
6427 	/*
6428 	 * amdgpu controls only the first MEC. That's why this function only
6429 	 * handles the setting of interrupts for this specific MEC. All other
6430 	 * pipes' interrupts are set by amdkfd.
6431 	 */
6432 
6433 	if (me == 1) {
6434 		switch (pipe) {
6435 		case 0:
6436 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6437 			break;
6438 		case 1:
6439 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
6440 			break;
6441 		case 2:
6442 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
6443 			break;
6444 		case 3:
6445 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
6446 			break;
6447 		default:
6448 			DRM_DEBUG("invalid pipe %d\n", pipe);
6449 			return;
6450 		}
6451 	} else {
6452 		DRM_DEBUG("invalid me %d\n", me);
6453 		return;
6454 	}
6455 
6456 	switch (state) {
6457 	case AMDGPU_IRQ_STATE_DISABLE:
6458 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6459 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6460 					     TIME_STAMP_INT_ENABLE, 0);
6461 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6462 					     GENERIC0_INT_ENABLE, 0);
6463 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6464 		break;
6465 	case AMDGPU_IRQ_STATE_ENABLE:
6466 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6467 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6468 					     TIME_STAMP_INT_ENABLE, 1);
6469 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6470 					     GENERIC0_INT_ENABLE, 1);
6471 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6472 		break;
6473 	default:
6474 		break;
6475 	}
6476 }
6477 
6478 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6479 					    struct amdgpu_irq_src *src,
6480 					    unsigned type,
6481 					    enum amdgpu_interrupt_state state)
6482 {
6483 	switch (type) {
6484 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6485 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
6486 		break;
6487 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
6488 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
6489 		break;
6490 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6491 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6492 		break;
6493 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6494 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6495 		break;
6496 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6497 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6498 		break;
6499 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6500 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6501 		break;
6502 	default:
6503 		break;
6504 	}
6505 	return 0;
6506 }
6507 
6508 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
6509 			     struct amdgpu_irq_src *source,
6510 			     struct amdgpu_iv_entry *entry)
6511 {
6512 	u32 doorbell_offset = entry->src_data[0];
6513 	u8 me_id, pipe_id, queue_id;
6514 	struct amdgpu_ring *ring;
6515 	int i;
6516 
6517 	DRM_DEBUG("IH: CP EOP\n");
6518 
6519 	if (adev->enable_mes && doorbell_offset) {
6520 		amdgpu_userq_process_fence_irq(adev, doorbell_offset);
6521 	} else {
6522 		me_id = (entry->ring_id & 0x0c) >> 2;
6523 		pipe_id = (entry->ring_id & 0x03) >> 0;
6524 		queue_id = (entry->ring_id & 0x70) >> 4;
6525 
6526 		switch (me_id) {
6527 		case 0:
6528 			if (pipe_id == 0)
6529 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6530 			else
6531 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6532 			break;
6533 		case 1:
6534 		case 2:
6535 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6536 				ring = &adev->gfx.compute_ring[i];
6537 				/* Per-queue interrupt is supported for MEC starting from VI.
6538 				 * The interrupt can only be enabled/disabled per pipe instead
6539 				 * of per queue.
6540 				 */
6541 				if ((ring->me == me_id) &&
6542 				    (ring->pipe == pipe_id) &&
6543 				    (ring->queue == queue_id))
6544 					amdgpu_fence_process(ring);
6545 			}
6546 			break;
6547 		}
6548 	}
6549 
6550 	return 0;
6551 }
6552 
6553 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6554 					      struct amdgpu_irq_src *source,
6555 					      unsigned int type,
6556 					      enum amdgpu_interrupt_state state)
6557 {
6558 	u32 cp_int_cntl_reg, cp_int_cntl;
6559 	int i, j;
6560 
6561 	switch (state) {
6562 	case AMDGPU_IRQ_STATE_DISABLE:
6563 	case AMDGPU_IRQ_STATE_ENABLE:
6564 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6565 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6566 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6567 
6568 				if (cp_int_cntl_reg) {
6569 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6570 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6571 								    PRIV_REG_INT_ENABLE,
6572 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6573 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6574 				}
6575 			}
6576 		}
6577 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6578 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6579 				/* MECs start at 1 */
6580 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6581 
6582 				if (cp_int_cntl_reg) {
6583 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6584 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6585 								    PRIV_REG_INT_ENABLE,
6586 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6587 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6588 				}
6589 			}
6590 		}
6591 		break;
6592 	default:
6593 		break;
6594 	}
6595 
6596 	return 0;
6597 }
6598 
6599 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6600 					    struct amdgpu_irq_src *source,
6601 					    unsigned type,
6602 					    enum amdgpu_interrupt_state state)
6603 {
6604 	u32 cp_int_cntl_reg, cp_int_cntl;
6605 	int i, j;
6606 
6607 	switch (state) {
6608 	case AMDGPU_IRQ_STATE_DISABLE:
6609 	case AMDGPU_IRQ_STATE_ENABLE:
6610 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6611 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6612 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6613 
6614 				if (cp_int_cntl_reg) {
6615 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6616 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6617 								    OPCODE_ERROR_INT_ENABLE,
6618 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6619 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6620 				}
6621 			}
6622 		}
6623 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6624 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6625 				/* MECs start at 1 */
6626 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6627 
6628 				if (cp_int_cntl_reg) {
6629 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6630 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6631 								    OPCODE_ERROR_INT_ENABLE,
6632 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6633 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6634 				}
6635 			}
6636 		}
6637 		break;
6638 	default:
6639 		break;
6640 	}
6641 	return 0;
6642 }
6643 
6644 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6645 					       struct amdgpu_irq_src *source,
6646 					       unsigned int type,
6647 					       enum amdgpu_interrupt_state state)
6648 {
6649 	u32 cp_int_cntl_reg, cp_int_cntl;
6650 	int i, j;
6651 
6652 	switch (state) {
6653 	case AMDGPU_IRQ_STATE_DISABLE:
6654 	case AMDGPU_IRQ_STATE_ENABLE:
6655 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6656 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6657 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6658 
6659 				if (cp_int_cntl_reg) {
6660 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6661 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6662 								    PRIV_INSTR_INT_ENABLE,
6663 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6664 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6665 				}
6666 			}
6667 		}
6668 		break;
6669 	default:
6670 		break;
6671 	}
6672 
6673 	return 0;
6674 }
6675 
6676 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6677 					struct amdgpu_iv_entry *entry)
6678 {
6679 	u8 me_id, pipe_id, queue_id;
6680 	struct amdgpu_ring *ring;
6681 	int i;
6682 
6683 	me_id = (entry->ring_id & 0x0c) >> 2;
6684 	pipe_id = (entry->ring_id & 0x03) >> 0;
6685 	queue_id = (entry->ring_id & 0x70) >> 4;
6686 
6687 	if (!adev->gfx.disable_kq) {
6688 		switch (me_id) {
6689 		case 0:
6690 			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6691 				ring = &adev->gfx.gfx_ring[i];
6692 				if (ring->me == me_id && ring->pipe == pipe_id &&
6693 				    ring->queue == queue_id)
6694 					drm_sched_fault(&ring->sched);
6695 			}
6696 			break;
6697 		case 1:
6698 		case 2:
6699 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6700 				ring = &adev->gfx.compute_ring[i];
6701 				if (ring->me == me_id && ring->pipe == pipe_id &&
6702 				    ring->queue == queue_id)
6703 					drm_sched_fault(&ring->sched);
6704 			}
6705 			break;
6706 		default:
6707 			BUG();
6708 			break;
6709 		}
6710 	}
6711 }
6712 
6713 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6714 				  struct amdgpu_irq_src *source,
6715 				  struct amdgpu_iv_entry *entry)
6716 {
6717 	DRM_ERROR("Illegal register access in command stream\n");
6718 	gfx_v11_0_handle_priv_fault(adev, entry);
6719 	return 0;
6720 }
6721 
6722 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
6723 				struct amdgpu_irq_src *source,
6724 				struct amdgpu_iv_entry *entry)
6725 {
6726 	DRM_ERROR("Illegal opcode in command stream\n");
6727 	gfx_v11_0_handle_priv_fault(adev, entry);
6728 	return 0;
6729 }
6730 
6731 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6732 				   struct amdgpu_irq_src *source,
6733 				   struct amdgpu_iv_entry *entry)
6734 {
6735 	DRM_ERROR("Illegal instruction in command stream\n");
6736 	gfx_v11_0_handle_priv_fault(adev, entry);
6737 	return 0;
6738 }
6739 
6740 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
6741 				  struct amdgpu_irq_src *source,
6742 				  struct amdgpu_iv_entry *entry)
6743 {
6744 	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
6745 		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
6746 
6747 	return 0;
6748 }
6749 
6750 #if 0
6751 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6752 					     struct amdgpu_irq_src *src,
6753 					     unsigned int type,
6754 					     enum amdgpu_interrupt_state state)
6755 {
6756 	uint32_t tmp, target;
6757 	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6758 
6759 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6760 	target += ring->pipe;
6761 
6762 	switch (type) {
6763 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6764 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6765 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6766 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6767 					    GENERIC2_INT_ENABLE, 0);
6768 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6769 
6770 			tmp = RREG32_SOC15_IP(GC, target);
6771 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6772 					    GENERIC2_INT_ENABLE, 0);
6773 			WREG32_SOC15_IP(GC, target, tmp);
6774 		} else {
6775 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6776 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6777 					    GENERIC2_INT_ENABLE, 1);
6778 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6779 
6780 			tmp = RREG32_SOC15_IP(GC, target);
6781 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6782 					    GENERIC2_INT_ENABLE, 1);
6783 			WREG32_SOC15_IP(GC, target, tmp);
6784 		}
6785 		break;
6786 	default:
6787 		BUG(); /* kiq only support GENERIC2_INT now */
6788 		break;
6789 	}
6790 	return 0;
6791 }
6792 #endif
6793 
6794 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6795 {
6796 	const unsigned int gcr_cntl =
6797 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6798 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6799 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6800 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6801 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6802 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6803 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6804 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6805 
6806 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6807 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6808 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6809 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6810 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6811 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6812 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6813 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6814 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6815 }
6816 
6817 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
6818 {
6819 	/* Disable the pipe reset until the CPFW fully support it.*/
6820 	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
6821 	return false;
6822 }
6823 
6824 
6825 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
6826 {
6827 	struct amdgpu_device *adev = ring->adev;
6828 	uint32_t reset_pipe = 0, clean_pipe = 0;
6829 	int r;
6830 
6831 	if (!gfx_v11_pipe_reset_support(adev))
6832 		return -EOPNOTSUPP;
6833 
6834 	gfx_v11_0_set_safe_mode(adev, 0);
6835 	mutex_lock(&adev->srbm_mutex);
6836 	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6837 
6838 	switch (ring->pipe) {
6839 	case 0:
6840 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6841 					   PFP_PIPE0_RESET, 1);
6842 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6843 					   ME_PIPE0_RESET, 1);
6844 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6845 					   PFP_PIPE0_RESET, 0);
6846 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6847 					   ME_PIPE0_RESET, 0);
6848 		break;
6849 	case 1:
6850 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6851 					   PFP_PIPE1_RESET, 1);
6852 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6853 					   ME_PIPE1_RESET, 1);
6854 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6855 					   PFP_PIPE1_RESET, 0);
6856 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6857 					   ME_PIPE1_RESET, 0);
6858 		break;
6859 	default:
6860 		break;
6861 	}
6862 
6863 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
6864 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
6865 
6866 	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
6867 						RS64_FW_UC_START_ADDR_LO;
6868 	soc21_grbm_select(adev, 0, 0, 0, 0);
6869 	mutex_unlock(&adev->srbm_mutex);
6870 	gfx_v11_0_unset_safe_mode(adev, 0);
6871 
6872 	dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
6873 			r == 0 ? "successfully" : "failed");
6874 	/* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
6875 	 * so the pipe reset status relies on the later gfx ring test result.
6876 	 */
6877 	return 0;
6878 }
6879 
6880 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
6881 			       unsigned int vmid,
6882 			       struct amdgpu_fence *timedout_fence)
6883 {
6884 	struct amdgpu_device *adev = ring->adev;
6885 	bool use_mmio = false;
6886 	int r;
6887 
6888 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
6889 
6890 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0);
6891 	if (r) {
6892 
6893 		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
6894 		r = gfx_v11_reset_gfx_pipe(ring);
6895 		if (r)
6896 			return r;
6897 	}
6898 
6899 	if (use_mmio) {
6900 		r = gfx_v11_0_kgq_init_queue(ring, true);
6901 		if (r) {
6902 			dev_err(adev->dev, "failed to init kgq\n");
6903 			return r;
6904 		}
6905 
6906 		r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
6907 		if (r) {
6908 			dev_err(adev->dev, "failed to remap kgq\n");
6909 			return r;
6910 		}
6911 	}
6912 
6913 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
6914 }
6915 
6916 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
6917 {
6918 
6919 	struct amdgpu_device *adev = ring->adev;
6920 	uint32_t reset_pipe = 0, clean_pipe = 0;
6921 	int r;
6922 
6923 	if (!gfx_v11_pipe_reset_support(adev))
6924 		return -EOPNOTSUPP;
6925 
6926 	gfx_v11_0_set_safe_mode(adev, 0);
6927 	mutex_lock(&adev->srbm_mutex);
6928 	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6929 
6930 	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
6931 	clean_pipe = reset_pipe;
6932 
6933 	if (adev->gfx.rs64_enable) {
6934 
6935 		switch (ring->pipe) {
6936 		case 0:
6937 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6938 						   MEC_PIPE0_RESET, 1);
6939 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6940 						   MEC_PIPE0_RESET, 0);
6941 			break;
6942 		case 1:
6943 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6944 						   MEC_PIPE1_RESET, 1);
6945 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6946 						   MEC_PIPE1_RESET, 0);
6947 			break;
6948 		case 2:
6949 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6950 						   MEC_PIPE2_RESET, 1);
6951 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6952 						   MEC_PIPE2_RESET, 0);
6953 			break;
6954 		case 3:
6955 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6956 						   MEC_PIPE3_RESET, 1);
6957 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6958 						   MEC_PIPE3_RESET, 0);
6959 			break;
6960 		default:
6961 			break;
6962 		}
6963 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
6964 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
6965 		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
6966 					RS64_FW_UC_START_ADDR_LO;
6967 	} else {
6968 		if (ring->me == 1) {
6969 			switch (ring->pipe) {
6970 			case 0:
6971 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6972 							   MEC_ME1_PIPE0_RESET, 1);
6973 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6974 							   MEC_ME1_PIPE0_RESET, 0);
6975 				break;
6976 			case 1:
6977 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6978 							   MEC_ME1_PIPE1_RESET, 1);
6979 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6980 							   MEC_ME1_PIPE1_RESET, 0);
6981 				break;
6982 			case 2:
6983 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6984 							   MEC_ME1_PIPE2_RESET, 1);
6985 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6986 							   MEC_ME1_PIPE2_RESET, 0);
6987 				break;
6988 			case 3:
6989 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6990 							   MEC_ME1_PIPE3_RESET, 1);
6991 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6992 							   MEC_ME1_PIPE3_RESET, 0);
6993 				break;
6994 			default:
6995 				break;
6996 			}
6997 			/* mec1 fw pc: CP_MEC1_INSTR_PNTR */
6998 		} else {
6999 			switch (ring->pipe) {
7000 			case 0:
7001 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7002 							   MEC_ME2_PIPE0_RESET, 1);
7003 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7004 							   MEC_ME2_PIPE0_RESET, 0);
7005 				break;
7006 			case 1:
7007 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7008 							   MEC_ME2_PIPE1_RESET, 1);
7009 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7010 							   MEC_ME2_PIPE1_RESET, 0);
7011 				break;
7012 			case 2:
7013 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7014 							   MEC_ME2_PIPE2_RESET, 1);
7015 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7016 							   MEC_ME2_PIPE2_RESET, 0);
7017 				break;
7018 			case 3:
7019 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7020 							   MEC_ME2_PIPE3_RESET, 1);
7021 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7022 							   MEC_ME2_PIPE3_RESET, 0);
7023 				break;
7024 			default:
7025 				break;
7026 			}
7027 			/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
7028 		}
7029 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
7030 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
7031 		r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
7032 	}
7033 
7034 	soc21_grbm_select(adev, 0, 0, 0, 0);
7035 	mutex_unlock(&adev->srbm_mutex);
7036 	gfx_v11_0_unset_safe_mode(adev, 0);
7037 
7038 	dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
7039 			r == 0 ? "successfully" : "failed");
7040 	/*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
7041 	 * reset status relies on the compute ring test result.
7042 	 */
7043 	return 0;
7044 }
7045 
7046 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
7047 			       unsigned int vmid,
7048 			       struct amdgpu_fence *timedout_fence)
7049 {
7050 	struct amdgpu_device *adev = ring->adev;
7051 	int r = 0;
7052 
7053 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
7054 
7055 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
7056 	if (r) {
7057 		dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
7058 		r = gfx_v11_0_reset_compute_pipe(ring);
7059 		if (r)
7060 			return r;
7061 	}
7062 
7063 	r = gfx_v11_0_kcq_init_queue(ring, true);
7064 	if (r) {
7065 		dev_err(adev->dev, "fail to init kcq\n");
7066 		return r;
7067 	}
7068 	r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
7069 	if (r) {
7070 		dev_err(adev->dev, "failed to remap kcq\n");
7071 		return r;
7072 	}
7073 
7074 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
7075 }
7076 
7077 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7078 {
7079 	struct amdgpu_device *adev = ip_block->adev;
7080 	uint32_t i, j, k, reg, index = 0;
7081 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
7082 
7083 	if (!adev->gfx.ip_dump_core)
7084 		return;
7085 
7086 	for (i = 0; i < reg_count; i++)
7087 		drm_printf(p, "%-50s \t 0x%08x\n",
7088 			   gc_reg_list_11_0[i].reg_name,
7089 			   adev->gfx.ip_dump_core[i]);
7090 
7091 	/* print compute queue registers for all instances */
7092 	if (!adev->gfx.ip_dump_compute_queues)
7093 		return;
7094 
7095 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
7096 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7097 		   adev->gfx.mec.num_mec,
7098 		   adev->gfx.mec.num_pipe_per_mec,
7099 		   adev->gfx.mec.num_queue_per_pipe);
7100 
7101 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7102 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7103 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7104 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7105 				for (reg = 0; reg < reg_count; reg++) {
7106 					if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
7107 						drm_printf(p, "%-50s \t 0x%08x\n",
7108 							   "regCP_MEC_ME2_HEADER_DUMP",
7109 							   adev->gfx.ip_dump_compute_queues[index + reg]);
7110 					else
7111 						drm_printf(p, "%-50s \t 0x%08x\n",
7112 							   gc_cp_reg_list_11[reg].reg_name,
7113 							   adev->gfx.ip_dump_compute_queues[index + reg]);
7114 				}
7115 				index += reg_count;
7116 			}
7117 		}
7118 	}
7119 
7120 	/* print gfx queue registers for all instances */
7121 	if (!adev->gfx.ip_dump_gfx_queues)
7122 		return;
7123 
7124 	index = 0;
7125 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
7126 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
7127 		   adev->gfx.me.num_me,
7128 		   adev->gfx.me.num_pipe_per_me,
7129 		   adev->gfx.me.num_queue_per_pipe);
7130 
7131 	for (i = 0; i < adev->gfx.me.num_me; i++) {
7132 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
7133 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
7134 				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
7135 				for (reg = 0; reg < reg_count; reg++) {
7136 					drm_printf(p, "%-50s \t 0x%08x\n",
7137 						   gc_gfx_queue_reg_list_11[reg].reg_name,
7138 						   adev->gfx.ip_dump_gfx_queues[index + reg]);
7139 				}
7140 				index += reg_count;
7141 			}
7142 		}
7143 	}
7144 }
7145 
7146 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
7147 {
7148 	struct amdgpu_device *adev = ip_block->adev;
7149 	uint32_t i, j, k, reg, index = 0;
7150 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
7151 
7152 	if (!adev->gfx.ip_dump_core)
7153 		return;
7154 
7155 	amdgpu_gfx_off_ctrl(adev, false);
7156 	for (i = 0; i < reg_count; i++)
7157 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
7158 	amdgpu_gfx_off_ctrl(adev, true);
7159 
7160 	/* dump compute queue registers for all instances */
7161 	if (!adev->gfx.ip_dump_compute_queues)
7162 		return;
7163 
7164 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
7165 	amdgpu_gfx_off_ctrl(adev, false);
7166 	mutex_lock(&adev->srbm_mutex);
7167 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7168 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7169 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7170 				/* ME0 is for GFX so start from 1 for CP */
7171 				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
7172 				for (reg = 0; reg < reg_count; reg++) {
7173 					if (i &&
7174 					    gc_cp_reg_list_11[reg].reg_offset ==
7175 						    regCP_MEC_ME1_HEADER_DUMP)
7176 						adev->gfx.ip_dump_compute_queues[index + reg] =
7177 							RREG32(SOC15_REG_OFFSET(GC, 0,
7178 							       regCP_MEC_ME2_HEADER_DUMP));
7179 					else
7180 						adev->gfx.ip_dump_compute_queues[index + reg] =
7181 							RREG32(SOC15_REG_ENTRY_OFFSET(
7182 								       gc_cp_reg_list_11[reg]));
7183 				}
7184 				index += reg_count;
7185 			}
7186 		}
7187 	}
7188 	soc21_grbm_select(adev, 0, 0, 0, 0);
7189 	mutex_unlock(&adev->srbm_mutex);
7190 	amdgpu_gfx_off_ctrl(adev, true);
7191 
7192 	/* dump gfx queue registers for all instances */
7193 	if (!adev->gfx.ip_dump_gfx_queues)
7194 		return;
7195 
7196 	index = 0;
7197 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
7198 	amdgpu_gfx_off_ctrl(adev, false);
7199 	mutex_lock(&adev->srbm_mutex);
7200 	for (i = 0; i < adev->gfx.me.num_me; i++) {
7201 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
7202 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
7203 				soc21_grbm_select(adev, i, j, k, 0);
7204 
7205 				for (reg = 0; reg < reg_count; reg++) {
7206 					adev->gfx.ip_dump_gfx_queues[index + reg] =
7207 						RREG32(SOC15_REG_ENTRY_OFFSET(
7208 							gc_gfx_queue_reg_list_11[reg]));
7209 				}
7210 				index += reg_count;
7211 			}
7212 		}
7213 	}
7214 	soc21_grbm_select(adev, 0, 0, 0, 0);
7215 	mutex_unlock(&adev->srbm_mutex);
7216 	amdgpu_gfx_off_ctrl(adev, true);
7217 }
7218 
7219 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7220 {
7221 	/* Emit the cleaner shader */
7222 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7223 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7224 }
7225 
7226 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
7227 {
7228 	amdgpu_gfx_profile_ring_begin_use(ring);
7229 
7230 	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7231 }
7232 
7233 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
7234 {
7235 	amdgpu_gfx_profile_ring_end_use(ring);
7236 
7237 	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7238 }
7239 
7240 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
7241 	.name = "gfx_v11_0",
7242 	.early_init = gfx_v11_0_early_init,
7243 	.sw_init = gfx_v11_0_sw_init,
7244 	.sw_fini = gfx_v11_0_sw_fini,
7245 	.hw_init = gfx_v11_0_hw_init,
7246 	.hw_fini = gfx_v11_0_hw_fini,
7247 	.suspend = gfx_v11_0_suspend,
7248 	.resume = gfx_v11_0_resume,
7249 	.is_idle = gfx_v11_0_is_idle,
7250 	.wait_for_idle = gfx_v11_0_wait_for_idle,
7251 	.soft_reset = gfx_v11_0_soft_reset,
7252 	.check_soft_reset = gfx_v11_0_check_soft_reset,
7253 	.post_soft_reset = gfx_v11_0_post_soft_reset,
7254 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
7255 	.set_powergating_state = gfx_v11_0_set_powergating_state,
7256 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
7257 	.dump_ip_state = gfx_v11_ip_dump,
7258 	.print_ip_state = gfx_v11_ip_print,
7259 };
7260 
7261 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
7262 	.type = AMDGPU_RING_TYPE_GFX,
7263 	.align_mask = 0xff,
7264 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7265 	.support_64bit_ptrs = true,
7266 	.secure_submission_supported = true,
7267 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
7268 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
7269 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
7270 	.emit_frame_size = /* totally 247 maximum if 16 IBs */
7271 		5 + /* update_spm_vmid */
7272 		5 + /* COND_EXEC */
7273 		22 + /* SET_Q_PREEMPTION_MODE */
7274 		7 + /* PIPELINE_SYNC */
7275 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7276 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7277 		4 + /* VM_FLUSH */
7278 		8 + /* FENCE for VM_FLUSH */
7279 		20 + /* GDS switch */
7280 		5 + /* COND_EXEC */
7281 		7 + /* HDP_flush */
7282 		4 + /* VGT_flush */
7283 		31 + /*	DE_META */
7284 		3 + /* CNTX_CTRL */
7285 		5 + /* HDP_INVL */
7286 		22 + /* SET_Q_PREEMPTION_MODE */
7287 		8 + 8 + /* FENCE x2 */
7288 		8 + /* gfx_v11_0_emit_mem_sync */
7289 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
7290 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
7291 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
7292 	.emit_fence = gfx_v11_0_ring_emit_fence,
7293 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
7294 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
7295 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
7296 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7297 	.test_ring = gfx_v11_0_ring_test_ring,
7298 	.test_ib = gfx_v11_0_ring_test_ib,
7299 	.insert_nop = gfx_v11_ring_insert_nop,
7300 	.pad_ib = amdgpu_ring_generic_pad_ib,
7301 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
7302 	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
7303 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
7304 	.preempt_ib = amdgpu_gfx_ring_preempt_ib,
7305 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
7306 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7307 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7308 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7309 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
7310 	.reset = gfx_v11_0_reset_kgq,
7311 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
7312 	.begin_use = gfx_v11_0_ring_begin_use,
7313 	.end_use = gfx_v11_0_ring_end_use,
7314 };
7315 
7316 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
7317 	.type = AMDGPU_RING_TYPE_COMPUTE,
7318 	.align_mask = 0xff,
7319 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7320 	.support_64bit_ptrs = true,
7321 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
7322 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
7323 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
7324 	.emit_frame_size =
7325 		5 + /* update_spm_vmid */
7326 		20 + /* gfx_v11_0_ring_emit_gds_switch */
7327 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
7328 		5 + /* hdp invalidate */
7329 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
7330 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7331 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7332 		2 + /* gfx_v11_0_ring_emit_vm_flush */
7333 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
7334 		8 + /* gfx_v11_0_emit_mem_sync */
7335 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
7336 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
7337 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7338 	.emit_fence = gfx_v11_0_ring_emit_fence,
7339 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
7340 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
7341 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
7342 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7343 	.test_ring = gfx_v11_0_ring_test_ring,
7344 	.test_ib = gfx_v11_0_ring_test_ib,
7345 	.insert_nop = gfx_v11_ring_insert_nop,
7346 	.pad_ib = amdgpu_ring_generic_pad_ib,
7347 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7348 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7349 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7350 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
7351 	.reset = gfx_v11_0_reset_kcq,
7352 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
7353 	.begin_use = gfx_v11_0_ring_begin_use,
7354 	.end_use = gfx_v11_0_ring_end_use,
7355 };
7356 
7357 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
7358 	.type = AMDGPU_RING_TYPE_KIQ,
7359 	.align_mask = 0xff,
7360 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7361 	.support_64bit_ptrs = true,
7362 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
7363 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
7364 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
7365 	.emit_frame_size =
7366 		20 + /* gfx_v11_0_ring_emit_gds_switch */
7367 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
7368 		5 + /*hdp invalidate */
7369 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
7370 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7371 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7372 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7373 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
7374 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7375 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
7376 	.test_ring = gfx_v11_0_ring_test_ring,
7377 	.test_ib = gfx_v11_0_ring_test_ib,
7378 	.insert_nop = amdgpu_ring_insert_nop,
7379 	.pad_ib = amdgpu_ring_generic_pad_ib,
7380 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
7381 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7382 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7383 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7384 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7385 };
7386 
7387 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
7388 {
7389 	int i;
7390 
7391 	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
7392 
7393 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7394 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
7395 
7396 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7397 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
7398 }
7399 
7400 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
7401 	.set = gfx_v11_0_set_eop_interrupt_state,
7402 	.process = gfx_v11_0_eop_irq,
7403 };
7404 
7405 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
7406 	.set = gfx_v11_0_set_priv_reg_fault_state,
7407 	.process = gfx_v11_0_priv_reg_irq,
7408 };
7409 
7410 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
7411 	.set = gfx_v11_0_set_bad_op_fault_state,
7412 	.process = gfx_v11_0_bad_op_irq,
7413 };
7414 
7415 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
7416 	.set = gfx_v11_0_set_priv_inst_fault_state,
7417 	.process = gfx_v11_0_priv_inst_irq,
7418 };
7419 
7420 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
7421 	.process = gfx_v11_0_rlc_gc_fed_irq,
7422 };
7423 
7424 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
7425 {
7426 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7427 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
7428 
7429 	adev->gfx.priv_reg_irq.num_types = 1;
7430 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
7431 
7432 	adev->gfx.bad_op_irq.num_types = 1;
7433 	adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
7434 
7435 	adev->gfx.priv_inst_irq.num_types = 1;
7436 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
7437 
7438 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
7439 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
7440 
7441 }
7442 
7443 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
7444 {
7445 	if (adev->flags & AMD_IS_APU)
7446 		adev->gfx.imu.mode = MISSION_MODE;
7447 	else
7448 		adev->gfx.imu.mode = DEBUG_MODE;
7449 
7450 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
7451 }
7452 
7453 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
7454 {
7455 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
7456 }
7457 
7458 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
7459 {
7460 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
7461 			    adev->gfx.config.max_sh_per_se *
7462 			    adev->gfx.config.max_shader_engines;
7463 
7464 	adev->gds.gds_size = 0x1000;
7465 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
7466 	adev->gds.gws_size = 64;
7467 	adev->gds.oa_size = 16;
7468 }
7469 
7470 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
7471 {
7472 	/* set gfx eng mqd */
7473 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
7474 		sizeof(struct v11_gfx_mqd);
7475 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
7476 		gfx_v11_0_gfx_mqd_init;
7477 	/* set compute eng mqd */
7478 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
7479 		sizeof(struct v11_compute_mqd);
7480 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
7481 		gfx_v11_0_compute_mqd_init;
7482 }
7483 
7484 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
7485 							  u32 bitmap)
7486 {
7487 	u32 data;
7488 
7489 	if (!bitmap)
7490 		return;
7491 
7492 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7493 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7494 
7495 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
7496 }
7497 
7498 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
7499 {
7500 	u32 data, wgp_bitmask;
7501 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
7502 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
7503 
7504 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7505 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7506 
7507 	wgp_bitmask =
7508 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
7509 
7510 	return (~data) & wgp_bitmask;
7511 }
7512 
7513 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
7514 {
7515 	u32 wgp_idx, wgp_active_bitmap;
7516 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
7517 
7518 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
7519 	cu_active_bitmap = 0;
7520 
7521 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
7522 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
7523 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
7524 		if (wgp_active_bitmap & (1 << wgp_idx))
7525 			cu_active_bitmap |= cu_bitmap_per_wgp;
7526 	}
7527 
7528 	return cu_active_bitmap;
7529 }
7530 
7531 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
7532 				 struct amdgpu_cu_info *cu_info)
7533 {
7534 	int i, j, k, counter, active_cu_number = 0;
7535 	u32 mask, bitmap;
7536 	unsigned disable_masks[8 * 2];
7537 
7538 	if (!adev || !cu_info)
7539 		return -EINVAL;
7540 
7541 	amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2);
7542 
7543 	mutex_lock(&adev->grbm_idx_mutex);
7544 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7545 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7546 			bitmap = i * adev->gfx.config.max_sh_per_se + j;
7547 			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
7548 				continue;
7549 			mask = 1;
7550 			counter = 0;
7551 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7552 			if (i < 8 && j < 2)
7553 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
7554 					adev, disable_masks[i * 2 + j]);
7555 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
7556 
7557 			/**
7558 			 * GFX11 could support more than 4 SEs, while the bitmap
7559 			 * in cu_info struct is 4x4 and ioctl interface struct
7560 			 * drm_amdgpu_info_device should keep stable.
7561 			 * So we use last two columns of bitmap to store cu mask for
7562 			 * SEs 4 to 7, the layout of the bitmap is as below:
7563 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
7564 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
7565 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
7566 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
7567 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
7568 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
7569 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
7570 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
7571 			 */
7572 			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
7573 
7574 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
7575 				if (bitmap & mask)
7576 					counter++;
7577 
7578 				mask <<= 1;
7579 			}
7580 			active_cu_number += counter;
7581 		}
7582 	}
7583 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7584 	mutex_unlock(&adev->grbm_idx_mutex);
7585 
7586 	cu_info->number = active_cu_number;
7587 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7588 
7589 	return 0;
7590 }
7591 
7592 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
7593 {
7594 	.type = AMD_IP_BLOCK_TYPE_GFX,
7595 	.major = 11,
7596 	.minor = 0,
7597 	.rev = 0,
7598 	.funcs = &gfx_v11_0_ip_funcs,
7599 };
7600