xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision 5ea5880764cbb164afb17a62e76ca75dc371409d)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "imu_v11_0.h"
33 #include "soc21.h"
34 #include "nvd.h"
35 
36 #include "gc/gc_11_0_0_offset.h"
37 #include "gc/gc_11_0_0_sh_mask.h"
38 #include "smuio/smuio_13_0_6_offset.h"
39 #include "smuio/smuio_13_0_6_sh_mask.h"
40 #include "navi10_enum.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
42 
43 #include "soc15.h"
44 #include "clearstate_gfx11.h"
45 #include "v11_structs.h"
46 #include "gfx_v11_0.h"
47 #include "gfx_v11_0_cleaner_shader.h"
48 #include "gfx_v11_0_3.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 #include "mes_userqueue.h"
52 #include "amdgpu_userq_fence.h"
53 
54 #define GFX11_NUM_GFX_RINGS		1
55 #define GFX11_MEC_HPD_SIZE	2048
56 
57 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
59 
60 #define regCGTT_WD_CLK_CTRL		0x5086
61 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
64 #define regPC_CONFIG_CNTL_1		0x194d
65 #define regPC_CONFIG_CNTL_1_BASE_IDX	1
66 
67 #define regCP_GFX_MQD_CONTROL_DEFAULT                                             0x00000100
68 #define regCP_GFX_HQD_VMID_DEFAULT                                                0x00000000
69 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT                                      0x00000000
70 #define regCP_GFX_HQD_QUANTUM_DEFAULT                                             0x00000a01
71 #define regCP_GFX_HQD_CNTL_DEFAULT                                                0x00a00000
72 #define regCP_RB_DOORBELL_CONTROL_DEFAULT                                         0x00000000
73 #define regCP_GFX_HQD_RPTR_DEFAULT                                                0x00000000
74 
75 #define regCP_HQD_EOP_CONTROL_DEFAULT                                             0x00000006
76 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
77 #define regCP_MQD_CONTROL_DEFAULT                                                 0x00000100
78 #define regCP_HQD_PQ_CONTROL_DEFAULT                                              0x00308509
79 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
80 #define regCP_HQD_PQ_RPTR_DEFAULT                                                 0x00000000
81 #define regCP_HQD_PERSISTENT_STATE_DEFAULT                                        0x0be05501
82 #define regCP_HQD_IB_CONTROL_DEFAULT                                              0x00300000
83 
84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
88 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin");
89 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
93 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
94 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
97 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
98 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
101 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
102 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
105 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
106 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
108 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
109 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
110 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
113 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
114 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
117 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
118 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
120 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
121 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
122 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
123 MODULE_FIRMWARE("amdgpu/gc_11_5_4_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/gc_11_5_4_me.bin");
125 MODULE_FIRMWARE("amdgpu/gc_11_5_4_mec.bin");
126 MODULE_FIRMWARE("amdgpu/gc_11_5_4_rlc.bin");
127 
128 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
129 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
130 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
131 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
132 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
133 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
134 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
135 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
136 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
137 	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
138 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
139 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
140 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
141 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
142 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
143 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
144 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
145 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
146 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
147 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
148 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
149 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
150 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
151 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
152 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
153 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
154 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
155 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
156 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
157 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
158 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
159 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
160 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
161 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
162 	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
163 	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
164 	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
165 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
166 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
167 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
168 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
169 	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
170 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
171 	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
172 	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
173 	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
174 	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
175 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
176 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
177 	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
178 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
179 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
180 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
181 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
182 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
183 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
184 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
185 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
186 	/* cp header registers */
187 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
188 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
189 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
190 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
191 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
192 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
193 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
194 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
195 	/* SE status registers */
196 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
197 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
198 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
199 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
200 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
201 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
202 };
203 
204 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
205 	/* compute registers */
206 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
207 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
208 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
209 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
210 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
211 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
212 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
213 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
214 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
215 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
216 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
217 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
218 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
219 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
220 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
221 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
222 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
223 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
224 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
225 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
226 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
227 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
228 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
229 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
230 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
231 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
232 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
233 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
234 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
235 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
236 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
237 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
238 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
239 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
240 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
241 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
242 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
243 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
244 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS),
245 	/* cp header registers */
246 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
247 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
248 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
249 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
250 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
251 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
252 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
253 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
254 };
255 
256 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
257 	/* gfx queue registers */
258 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
259 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
260 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
261 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
262 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
263 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
264 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
265 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
266 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
267 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
268 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
269 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
270 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
271 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
272 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
273 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
274 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
275 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
276 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
277 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
278 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
279 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
280 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
281 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
282 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
283 	/* cp header registers */
284 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
285 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
286 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
287 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
288 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
289 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
290 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
291 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
292 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
293 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
294 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
295 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
296 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
297 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
298 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
299 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
300 };
301 
302 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
303 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
304 };
305 
306 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
307 {
308 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
309 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
310 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
311 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
312 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
313 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
314 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
315 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
316 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
317 };
318 
319 #define DEFAULT_SH_MEM_CONFIG \
320 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
321 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
322 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
323 
324 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
325 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
326 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
327 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
328 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
329 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
330 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
331 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
332                                  struct amdgpu_cu_info *cu_info);
333 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
334 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
335 				   u32 sh_num, u32 instance, int xcc_id);
336 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
337 
338 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
339 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
340 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
341 				     uint32_t val);
342 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
343 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
344 					   uint16_t pasid, uint32_t flush_type,
345 					   bool all_hub, uint8_t dst_sel);
346 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
347 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
348 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
349 				      bool enable);
350 
351 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
352 {
353 	struct amdgpu_device *adev = kiq_ring->adev;
354 	u64 shader_mc_addr;
355 
356 	/* Cleaner shader MC address */
357 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
358 
359 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
360 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
361 			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
362 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
363 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
364 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
365 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
366 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
367 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
368 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
369 }
370 
371 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
372 				 struct amdgpu_ring *ring)
373 {
374 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
375 	uint64_t wptr_addr = ring->wptr_gpu_addr;
376 	uint32_t me = 0, eng_sel = 0;
377 
378 	switch (ring->funcs->type) {
379 	case AMDGPU_RING_TYPE_COMPUTE:
380 		me = 1;
381 		eng_sel = 0;
382 		break;
383 	case AMDGPU_RING_TYPE_GFX:
384 		me = 0;
385 		eng_sel = 4;
386 		break;
387 	case AMDGPU_RING_TYPE_MES:
388 		me = 2;
389 		eng_sel = 5;
390 		break;
391 	default:
392 		WARN_ON(1);
393 	}
394 
395 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
396 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
397 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
398 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
399 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
400 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
401 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
402 			  PACKET3_MAP_QUEUES_ME((me)) |
403 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
404 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
405 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
406 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
407 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
408 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
409 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
410 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
411 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
412 }
413 
414 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
415 				   struct amdgpu_ring *ring,
416 				   enum amdgpu_unmap_queues_action action,
417 				   u64 gpu_addr, u64 seq)
418 {
419 	struct amdgpu_device *adev = kiq_ring->adev;
420 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
421 
422 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
423 		amdgpu_mes_unmap_legacy_queue(adev, ring, action,
424 					      gpu_addr, seq, 0);
425 		return;
426 	}
427 
428 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
429 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
430 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
431 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
432 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
433 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
434 	amdgpu_ring_write(kiq_ring,
435 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
436 
437 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
438 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
439 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
440 		amdgpu_ring_write(kiq_ring, seq);
441 	} else {
442 		amdgpu_ring_write(kiq_ring, 0);
443 		amdgpu_ring_write(kiq_ring, 0);
444 		amdgpu_ring_write(kiq_ring, 0);
445 	}
446 }
447 
448 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
449 				   struct amdgpu_ring *ring,
450 				   u64 addr,
451 				   u64 seq)
452 {
453 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
454 
455 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
456 	amdgpu_ring_write(kiq_ring,
457 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
458 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
459 			  PACKET3_QUERY_STATUS_COMMAND(2));
460 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
461 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
462 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
463 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
464 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
465 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
466 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
467 }
468 
469 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
470 				uint16_t pasid, uint32_t flush_type,
471 				bool all_hub)
472 {
473 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
474 }
475 
476 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
477 	.kiq_set_resources = gfx11_kiq_set_resources,
478 	.kiq_map_queues = gfx11_kiq_map_queues,
479 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
480 	.kiq_query_status = gfx11_kiq_query_status,
481 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
482 	.set_resources_size = 8,
483 	.map_queues_size = 7,
484 	.unmap_queues_size = 6,
485 	.query_status_size = 7,
486 	.invalidate_tlbs_size = 2,
487 };
488 
489 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
490 {
491 	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
492 }
493 
494 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
495 {
496 	if (amdgpu_sriov_vf(adev))
497 		return;
498 
499 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
500 	case IP_VERSION(11, 0, 1):
501 	case IP_VERSION(11, 0, 4):
502 		soc15_program_register_sequence(adev,
503 						golden_settings_gc_11_0_1,
504 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
505 		break;
506 	default:
507 		break;
508 	}
509 	soc15_program_register_sequence(adev,
510 					golden_settings_gc_11_0,
511 					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
512 
513 }
514 
515 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
516 				       bool wc, uint32_t reg, uint32_t val)
517 {
518 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
519 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
520 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
521 	amdgpu_ring_write(ring, reg);
522 	amdgpu_ring_write(ring, 0);
523 	amdgpu_ring_write(ring, val);
524 }
525 
526 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
527 				  int mem_space, int opt, uint32_t addr0,
528 				  uint32_t addr1, uint32_t ref, uint32_t mask,
529 				  uint32_t inv)
530 {
531 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
532 	amdgpu_ring_write(ring,
533 			  /* memory (1) or register (0) */
534 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
535 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
536 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
537 			   WAIT_REG_MEM_ENGINE(eng_sel)));
538 
539 	if (mem_space)
540 		BUG_ON(addr0 & 0x3); /* Dword align */
541 	amdgpu_ring_write(ring, addr0);
542 	amdgpu_ring_write(ring, addr1);
543 	amdgpu_ring_write(ring, ref);
544 	amdgpu_ring_write(ring, mask);
545 	amdgpu_ring_write(ring, inv); /* poll interval */
546 }
547 
548 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
549 {
550 	/* Header itself is a NOP packet */
551 	if (num_nop == 1) {
552 		amdgpu_ring_write(ring, ring->funcs->nop);
553 		return;
554 	}
555 
556 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
557 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
558 
559 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
560 	amdgpu_ring_insert_nop(ring, num_nop - 1);
561 }
562 
563 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
564 {
565 	struct amdgpu_device *adev = ring->adev;
566 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
567 	uint32_t tmp = 0;
568 	unsigned i;
569 	int r;
570 
571 	WREG32(scratch, 0xCAFEDEAD);
572 	r = amdgpu_ring_alloc(ring, 5);
573 	if (r) {
574 		drm_err(adev_to_drm(adev), "cp failed to lock ring %d (%d).\n",
575 			ring->idx, r);
576 		return r;
577 	}
578 
579 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
580 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
581 	} else {
582 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
583 		amdgpu_ring_write(ring, scratch -
584 				  PACKET3_SET_UCONFIG_REG_START);
585 		amdgpu_ring_write(ring, 0xDEADBEEF);
586 	}
587 	amdgpu_ring_commit(ring);
588 
589 	for (i = 0; i < adev->usec_timeout; i++) {
590 		tmp = RREG32(scratch);
591 		if (tmp == 0xDEADBEEF)
592 			break;
593 		if (amdgpu_emu_mode == 1)
594 			msleep(1);
595 		else
596 			udelay(1);
597 	}
598 
599 	if (i >= adev->usec_timeout)
600 		r = -ETIMEDOUT;
601 	return r;
602 }
603 
604 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
605 {
606 	struct amdgpu_device *adev = ring->adev;
607 	struct amdgpu_ib ib;
608 	struct dma_fence *f = NULL;
609 	unsigned index;
610 	uint64_t gpu_addr;
611 	uint32_t *cpu_ptr;
612 	long r;
613 
614 	/* MES KIQ fw hasn't indirect buffer support for now */
615 	if (adev->enable_mes_kiq &&
616 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
617 		return 0;
618 
619 	memset(&ib, 0, sizeof(ib));
620 
621 	r = amdgpu_device_wb_get(adev, &index);
622 	if (r)
623 		return r;
624 
625 	gpu_addr = adev->wb.gpu_addr + (index * 4);
626 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
627 	cpu_ptr = &adev->wb.wb[index];
628 
629 	r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
630 	if (r) {
631 		drm_err(adev_to_drm(adev), "failed to get ib (%ld).\n", r);
632 		goto err1;
633 	}
634 
635 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
636 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
637 	ib.ptr[2] = lower_32_bits(gpu_addr);
638 	ib.ptr[3] = upper_32_bits(gpu_addr);
639 	ib.ptr[4] = 0xDEADBEEF;
640 	ib.length_dw = 5;
641 
642 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
643 	if (r)
644 		goto err2;
645 
646 	r = dma_fence_wait_timeout(f, false, timeout);
647 	if (r == 0) {
648 		r = -ETIMEDOUT;
649 		goto err2;
650 	} else if (r < 0) {
651 		goto err2;
652 	}
653 
654 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
655 		r = 0;
656 	else
657 		r = -EINVAL;
658 err2:
659 	amdgpu_ib_free(&ib, NULL);
660 	dma_fence_put(f);
661 err1:
662 	amdgpu_device_wb_free(adev, index);
663 	return r;
664 }
665 
666 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
667 {
668 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
669 	amdgpu_ucode_release(&adev->gfx.me_fw);
670 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
671 	amdgpu_ucode_release(&adev->gfx.mec_fw);
672 
673 	kfree(adev->gfx.rlc.register_list_format);
674 }
675 
676 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
677 {
678 	const struct psp_firmware_header_v1_0 *toc_hdr;
679 	int err = 0;
680 
681 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
682 				   AMDGPU_UCODE_REQUIRED,
683 				   "amdgpu/%s_toc.bin", ucode_prefix);
684 	if (err)
685 		goto out;
686 
687 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
688 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
689 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
690 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
691 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
692 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
693 	return 0;
694 out:
695 	amdgpu_ucode_release(&adev->psp.toc_fw);
696 	return err;
697 }
698 
699 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
700 {
701 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
702 	case IP_VERSION(11, 0, 0):
703 	case IP_VERSION(11, 0, 2):
704 	case IP_VERSION(11, 0, 3):
705 		if ((adev->gfx.me_fw_version >= 1505) &&
706 		    (adev->gfx.pfp_fw_version >= 1600) &&
707 		    (adev->gfx.mec_fw_version >= 512)) {
708 			if (amdgpu_sriov_vf(adev))
709 				adev->gfx.cp_gfx_shadow = true;
710 			else
711 				adev->gfx.cp_gfx_shadow = false;
712 		}
713 		break;
714 	default:
715 		adev->gfx.cp_gfx_shadow = false;
716 		break;
717 	}
718 }
719 
720 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
721 {
722 	char ucode_prefix[25];
723 	int err;
724 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
725 	uint16_t version_major;
726 	uint16_t version_minor;
727 
728 	DRM_DEBUG("\n");
729 
730 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
731 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
732 				   AMDGPU_UCODE_REQUIRED,
733 				   "amdgpu/%s_pfp.bin", ucode_prefix);
734 	if (err)
735 		goto out;
736 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
737 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
738 				(union amdgpu_firmware_header *)
739 				adev->gfx.pfp_fw->data, 2, 0);
740 	if (adev->gfx.rs64_enable) {
741 		dev_info(adev->dev, "CP RS64 enable\n");
742 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
743 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
744 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
745 	} else {
746 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
747 	}
748 
749 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
750 				   AMDGPU_UCODE_REQUIRED,
751 				   "amdgpu/%s_me.bin", ucode_prefix);
752 	if (err)
753 		goto out;
754 	if (adev->gfx.rs64_enable) {
755 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
756 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
757 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
758 	} else {
759 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
760 	}
761 
762 	if (!amdgpu_sriov_vf(adev)) {
763 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
764 		    adev->pdev->revision == 0xCE)
765 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
766 						   AMDGPU_UCODE_REQUIRED,
767 						   "amdgpu/gc_11_0_0_rlc_1.bin");
768 		else if (amdgpu_is_kicker_fw(adev))
769 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
770 						   AMDGPU_UCODE_REQUIRED,
771 						   "amdgpu/%s_rlc_kicker.bin", ucode_prefix);
772 		else
773 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
774 						   AMDGPU_UCODE_REQUIRED,
775 						   "amdgpu/%s_rlc.bin", ucode_prefix);
776 		if (err)
777 			goto out;
778 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
779 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
780 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
781 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
782 		if (err)
783 			goto out;
784 	}
785 
786 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
787 				   AMDGPU_UCODE_REQUIRED,
788 				   "amdgpu/%s_mec.bin", ucode_prefix);
789 	if (err)
790 		goto out;
791 	if (adev->gfx.rs64_enable) {
792 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
793 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
794 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
795 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
796 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
797 	} else {
798 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
799 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
800 	}
801 
802 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
803 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
804 
805 	/* only one MEC for gfx 11.0.0. */
806 	adev->gfx.mec2_fw = NULL;
807 
808 	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
809 
810 	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
811 		err = adev->gfx.imu.funcs->init_microcode(adev);
812 		if (err)
813 			DRM_ERROR("Failed to init imu firmware!\n");
814 		return err;
815 	}
816 
817 out:
818 	if (err) {
819 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
820 		amdgpu_ucode_release(&adev->gfx.me_fw);
821 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
822 		amdgpu_ucode_release(&adev->gfx.mec_fw);
823 	}
824 
825 	return err;
826 }
827 
828 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
829 {
830 	u32 count = 0;
831 	const struct cs_section_def *sect = NULL;
832 	const struct cs_extent_def *ext = NULL;
833 
834 	/* begin clear state */
835 	count += 2;
836 	/* context control state */
837 	count += 3;
838 
839 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
840 		for (ext = sect->section; ext->extent != NULL; ++ext) {
841 			if (sect->id == SECT_CONTEXT)
842 				count += 2 + ext->reg_count;
843 			else
844 				return 0;
845 		}
846 	}
847 
848 	/* set PA_SC_TILE_STEERING_OVERRIDE */
849 	count += 3;
850 	/* end clear state */
851 	count += 2;
852 	/* clear state */
853 	count += 2;
854 
855 	return count;
856 }
857 
858 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
859 {
860 	u32 count = 0;
861 	int ctx_reg_offset;
862 
863 	if (adev->gfx.rlc.cs_data == NULL)
864 		return;
865 	if (buffer == NULL)
866 		return;
867 
868 	count = amdgpu_gfx_csb_preamble_start(buffer);
869 	count = amdgpu_gfx_csb_data_parser(adev, buffer, count);
870 
871 	ctx_reg_offset = SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
872 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
873 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
874 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
875 
876 	amdgpu_gfx_csb_preamble_end(buffer, count);
877 }
878 
879 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
880 {
881 	/* clear state block */
882 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
883 			&adev->gfx.rlc.clear_state_gpu_addr,
884 			(void **)&adev->gfx.rlc.cs_ptr);
885 
886 	/* jump table block */
887 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
888 			&adev->gfx.rlc.cp_table_gpu_addr,
889 			(void **)&adev->gfx.rlc.cp_table_ptr);
890 }
891 
892 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
893 {
894 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
895 
896 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
897 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
898 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
899 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
900 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
901 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
902 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
903 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
904 	adev->gfx.rlc.rlcg_reg_access_supported = true;
905 }
906 
907 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
908 {
909 	const struct cs_section_def *cs_data;
910 	int r;
911 
912 	adev->gfx.rlc.cs_data = gfx11_cs_data;
913 
914 	cs_data = adev->gfx.rlc.cs_data;
915 
916 	if (cs_data) {
917 		/* init clear state block */
918 		r = amdgpu_gfx_rlc_init_csb(adev);
919 		if (r)
920 			return r;
921 	}
922 
923 	/* init spm vmid with 0xf */
924 	if (adev->gfx.rlc.funcs->update_spm_vmid)
925 		adev->gfx.rlc.funcs->update_spm_vmid(adev, 0, NULL, 0xf);
926 
927 	return 0;
928 }
929 
930 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
931 {
932 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
933 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
934 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
935 }
936 
937 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
938 {
939 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
940 
941 	amdgpu_gfx_graphics_queue_acquire(adev);
942 }
943 
944 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
945 {
946 	int r;
947 	u32 *hpd;
948 	size_t mec_hpd_size;
949 
950 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
951 
952 	/* take ownership of the relevant compute queues */
953 	amdgpu_gfx_compute_queue_acquire(adev);
954 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
955 
956 	if (mec_hpd_size) {
957 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
958 					      AMDGPU_GEM_DOMAIN_GTT,
959 					      &adev->gfx.mec.hpd_eop_obj,
960 					      &adev->gfx.mec.hpd_eop_gpu_addr,
961 					      (void **)&hpd);
962 		if (r) {
963 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
964 			gfx_v11_0_mec_fini(adev);
965 			return r;
966 		}
967 
968 		memset(hpd, 0, mec_hpd_size);
969 
970 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
971 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
972 	}
973 
974 	return 0;
975 }
976 
977 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
978 {
979 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
980 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
981 		(address << SQ_IND_INDEX__INDEX__SHIFT));
982 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
983 }
984 
985 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
986 			   uint32_t thread, uint32_t regno,
987 			   uint32_t num, uint32_t *out)
988 {
989 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
990 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
991 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
992 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
993 		(SQ_IND_INDEX__AUTO_INCR_MASK));
994 	while (num--)
995 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
996 }
997 
998 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
999 {
1000 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
1001 	 * field when performing a select_se_sh so it should be
1002 	 * zero here */
1003 	WARN_ON(simd != 0);
1004 
1005 	/* type 3 wave data */
1006 	dst[(*no_fields)++] = 3;
1007 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
1008 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
1009 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
1010 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
1011 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
1012 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
1013 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
1014 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
1015 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
1016 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
1017 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
1018 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
1019 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
1020 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
1021 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
1022 }
1023 
1024 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1025 				     uint32_t wave, uint32_t start,
1026 				     uint32_t size, uint32_t *dst)
1027 {
1028 	WARN_ON(simd != 0);
1029 
1030 	wave_read_regs(
1031 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
1032 		dst);
1033 }
1034 
1035 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1036 				      uint32_t wave, uint32_t thread,
1037 				      uint32_t start, uint32_t size,
1038 				      uint32_t *dst)
1039 {
1040 	wave_read_regs(
1041 		adev, wave, thread,
1042 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1043 }
1044 
1045 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
1046 					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1047 {
1048 	soc21_grbm_select(adev, me, pipe, q, vm);
1049 }
1050 
1051 /* all sizes are in bytes */
1052 #define MQD_SHADOW_BASE_SIZE      73728
1053 #define MQD_SHADOW_BASE_ALIGNMENT 256
1054 #define MQD_FWWORKAREA_SIZE       484
1055 #define MQD_FWWORKAREA_ALIGNMENT  256
1056 
1057 static void gfx_v11_0_get_gfx_shadow_info_nocheck(struct amdgpu_device *adev,
1058 					 struct amdgpu_gfx_shadow_info *shadow_info)
1059 {
1060 	/* for gfx */
1061 	shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
1062 	shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
1063 	shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
1064 	shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
1065 	/* for compute */
1066 	shadow_info->eop_size = GFX11_MEC_HPD_SIZE;
1067 	shadow_info->eop_alignment = 256;
1068 }
1069 
1070 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
1071 					 struct amdgpu_gfx_shadow_info *shadow_info,
1072 					 bool skip_check)
1073 {
1074 	if (adev->gfx.cp_gfx_shadow || skip_check) {
1075 		gfx_v11_0_get_gfx_shadow_info_nocheck(adev, shadow_info);
1076 		return 0;
1077 	} else {
1078 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
1079 		return -ENOTSUPP;
1080 	}
1081 }
1082 
1083 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
1084 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
1085 	.select_se_sh = &gfx_v11_0_select_se_sh,
1086 	.read_wave_data = &gfx_v11_0_read_wave_data,
1087 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
1088 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
1089 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1090 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1091 	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
1092 	.get_hdp_flush_mask = &amdgpu_gfx_get_hdp_flush_mask,
1093 };
1094 
1095 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1096 {
1097 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1098 	case IP_VERSION(11, 0, 0):
1099 	case IP_VERSION(11, 0, 2):
1100 		adev->gfx.config.max_hw_contexts = 8;
1101 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1102 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1103 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1104 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1105 		break;
1106 	case IP_VERSION(11, 0, 3):
1107 		adev->gfx.ras = &gfx_v11_0_3_ras;
1108 		adev->gfx.config.max_hw_contexts = 8;
1109 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1110 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1111 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1112 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1113 		break;
1114 	case IP_VERSION(11, 0, 1):
1115 	case IP_VERSION(11, 0, 4):
1116 	case IP_VERSION(11, 5, 0):
1117 	case IP_VERSION(11, 5, 1):
1118 	case IP_VERSION(11, 5, 2):
1119 	case IP_VERSION(11, 5, 3):
1120 	case IP_VERSION(11, 5, 4):
1121 		adev->gfx.config.max_hw_contexts = 8;
1122 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1123 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1124 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1125 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1126 		break;
1127 	default:
1128 		BUG();
1129 		break;
1130 	}
1131 
1132 	return 0;
1133 }
1134 
1135 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1136 				   int me, int pipe, int queue)
1137 {
1138 	struct amdgpu_ring *ring;
1139 	unsigned int irq_type;
1140 	unsigned int hw_prio;
1141 
1142 	ring = &adev->gfx.gfx_ring[ring_id];
1143 
1144 	ring->me = me;
1145 	ring->pipe = pipe;
1146 	ring->queue = queue;
1147 
1148 	ring->ring_obj = NULL;
1149 	ring->use_doorbell = true;
1150 	if (adev->gfx.disable_kq) {
1151 		ring->no_scheduler = true;
1152 		ring->no_user_submission = true;
1153 	}
1154 
1155 	if (!ring_id)
1156 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1157 	else
1158 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1159 	ring->vm_hub = AMDGPU_GFXHUB(0);
1160 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1161 
1162 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1163 	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
1164 		AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1165 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1166 				hw_prio, NULL);
1167 }
1168 
1169 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1170 				       int mec, int pipe, int queue)
1171 {
1172 	int r;
1173 	unsigned irq_type;
1174 	struct amdgpu_ring *ring;
1175 	unsigned int hw_prio;
1176 
1177 	ring = &adev->gfx.compute_ring[ring_id];
1178 
1179 	/* mec0 is me1 */
1180 	ring->me = mec + 1;
1181 	ring->pipe = pipe;
1182 	ring->queue = queue;
1183 
1184 	ring->ring_obj = NULL;
1185 	ring->use_doorbell = true;
1186 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1187 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1188 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1189 	ring->vm_hub = AMDGPU_GFXHUB(0);
1190 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1191 
1192 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1193 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1194 		+ ring->pipe;
1195 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1196 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1197 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1198 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1199 			     hw_prio, NULL);
1200 	if (r)
1201 		return r;
1202 
1203 	return 0;
1204 }
1205 
1206 static struct {
1207 	SOC21_FIRMWARE_ID	id;
1208 	unsigned int		offset;
1209 	unsigned int		size;
1210 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1211 
1212 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1213 {
1214 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1215 
1216 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1217 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1218 		rlc_autoload_info[ucode->id].id = ucode->id;
1219 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1220 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1221 
1222 		ucode++;
1223 	}
1224 }
1225 
1226 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1227 {
1228 	uint32_t total_size = 0;
1229 	SOC21_FIRMWARE_ID id;
1230 
1231 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1232 
1233 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1234 		total_size += rlc_autoload_info[id].size;
1235 
1236 	/* In case the offset in rlc toc ucode is aligned */
1237 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1238 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1239 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1240 
1241 	return total_size;
1242 }
1243 
1244 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1245 {
1246 	int r;
1247 	uint32_t total_size;
1248 
1249 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1250 
1251 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1252 				      AMDGPU_GEM_DOMAIN_VRAM |
1253 				      AMDGPU_GEM_DOMAIN_GTT,
1254 				      &adev->gfx.rlc.rlc_autoload_bo,
1255 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
1256 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1257 
1258 	if (r) {
1259 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1260 		return r;
1261 	}
1262 
1263 	return 0;
1264 }
1265 
1266 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1267 					      SOC21_FIRMWARE_ID id,
1268 			    		      const void *fw_data,
1269 					      uint32_t fw_size,
1270 					      uint32_t *fw_autoload_mask)
1271 {
1272 	uint32_t toc_offset;
1273 	uint32_t toc_fw_size;
1274 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1275 
1276 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1277 		return;
1278 
1279 	toc_offset = rlc_autoload_info[id].offset;
1280 	toc_fw_size = rlc_autoload_info[id].size;
1281 
1282 	if (fw_size == 0)
1283 		fw_size = toc_fw_size;
1284 
1285 	if (fw_size > toc_fw_size)
1286 		fw_size = toc_fw_size;
1287 
1288 	memcpy(ptr + toc_offset, fw_data, fw_size);
1289 
1290 	if (fw_size < toc_fw_size)
1291 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1292 
1293 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1294 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1295 }
1296 
1297 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1298 							uint32_t *fw_autoload_mask)
1299 {
1300 	void *data;
1301 	uint32_t size;
1302 	uint64_t *toc_ptr;
1303 
1304 	*(uint64_t *)fw_autoload_mask |= 0x1;
1305 
1306 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1307 
1308 	data = adev->psp.toc.start_addr;
1309 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1310 
1311 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1312 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1313 
1314 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1315 					data, size, fw_autoload_mask);
1316 }
1317 
1318 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1319 							uint32_t *fw_autoload_mask)
1320 {
1321 	const __le32 *fw_data;
1322 	uint32_t fw_size;
1323 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1324 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1325 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1326 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1327 	uint16_t version_major, version_minor;
1328 
1329 	if (adev->gfx.rs64_enable) {
1330 		/* pfp ucode */
1331 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1332 			adev->gfx.pfp_fw->data;
1333 		/* instruction */
1334 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1335 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1336 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1337 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1338 						fw_data, fw_size, fw_autoload_mask);
1339 		/* data */
1340 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1341 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1342 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1343 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1344 						fw_data, fw_size, fw_autoload_mask);
1345 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1346 						fw_data, fw_size, fw_autoload_mask);
1347 		/* me ucode */
1348 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1349 			adev->gfx.me_fw->data;
1350 		/* instruction */
1351 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1352 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1353 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1354 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1355 						fw_data, fw_size, fw_autoload_mask);
1356 		/* data */
1357 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1358 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1359 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1360 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1361 						fw_data, fw_size, fw_autoload_mask);
1362 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1363 						fw_data, fw_size, fw_autoload_mask);
1364 		/* mec ucode */
1365 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1366 			adev->gfx.mec_fw->data;
1367 		/* instruction */
1368 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1369 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1370 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1371 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1372 						fw_data, fw_size, fw_autoload_mask);
1373 		/* data */
1374 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1375 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1376 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1377 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1378 						fw_data, fw_size, fw_autoload_mask);
1379 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1380 						fw_data, fw_size, fw_autoload_mask);
1381 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1382 						fw_data, fw_size, fw_autoload_mask);
1383 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1384 						fw_data, fw_size, fw_autoload_mask);
1385 	} else {
1386 		/* pfp ucode */
1387 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1388 			adev->gfx.pfp_fw->data;
1389 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1390 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1391 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1392 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1393 						fw_data, fw_size, fw_autoload_mask);
1394 
1395 		/* me ucode */
1396 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1397 			adev->gfx.me_fw->data;
1398 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1399 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1400 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1401 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1402 						fw_data, fw_size, fw_autoload_mask);
1403 
1404 		/* mec ucode */
1405 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1406 			adev->gfx.mec_fw->data;
1407 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1408 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1409 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1410 			cp_hdr->jt_size * 4;
1411 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1412 						fw_data, fw_size, fw_autoload_mask);
1413 	}
1414 
1415 	/* rlc ucode */
1416 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1417 		adev->gfx.rlc_fw->data;
1418 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1419 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1420 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1421 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1422 					fw_data, fw_size, fw_autoload_mask);
1423 
1424 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1425 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1426 	if (version_major == 2) {
1427 		if (version_minor >= 2) {
1428 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1429 
1430 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1431 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1432 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1433 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1434 					fw_data, fw_size, fw_autoload_mask);
1435 
1436 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1437 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1438 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1439 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1440 					fw_data, fw_size, fw_autoload_mask);
1441 		}
1442 	}
1443 }
1444 
1445 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1446 							uint32_t *fw_autoload_mask)
1447 {
1448 	const __le32 *fw_data;
1449 	uint32_t fw_size;
1450 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1451 
1452 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1453 		adev->sdma.instance[0].fw->data;
1454 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1455 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1456 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1457 
1458 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1459 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1460 
1461 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1462 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1463 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1464 
1465 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1466 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1467 }
1468 
1469 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1470 							uint32_t *fw_autoload_mask)
1471 {
1472 	const __le32 *fw_data;
1473 	unsigned fw_size;
1474 	const struct mes_firmware_header_v1_0 *mes_hdr;
1475 	int pipe, ucode_id, data_id;
1476 
1477 	for (pipe = 0; pipe < 2; pipe++) {
1478 		if (pipe==0) {
1479 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1480 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1481 		} else {
1482 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1483 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1484 		}
1485 
1486 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1487 			adev->mes.fw[pipe]->data;
1488 
1489 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1490 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1491 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1492 
1493 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1494 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1495 
1496 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1497 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1498 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1499 
1500 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1501 				data_id, fw_data, fw_size, fw_autoload_mask);
1502 	}
1503 }
1504 
1505 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1506 {
1507 	uint32_t rlc_g_offset, rlc_g_size;
1508 	uint64_t gpu_addr;
1509 	uint32_t autoload_fw_id[2];
1510 
1511 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1512 
1513 	/* RLC autoload sequence 2: copy ucode */
1514 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1515 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1516 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1517 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1518 
1519 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1520 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1521 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1522 
1523 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1524 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1525 
1526 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1527 
1528 	/* RLC autoload sequence 3: load IMU fw */
1529 	if (adev->gfx.imu.funcs->load_microcode)
1530 		adev->gfx.imu.funcs->load_microcode(adev);
1531 	/* RLC autoload sequence 4 init IMU fw */
1532 	if (adev->gfx.imu.funcs->setup_imu)
1533 		adev->gfx.imu.funcs->setup_imu(adev);
1534 	if (adev->gfx.imu.funcs->start_imu)
1535 		adev->gfx.imu.funcs->start_imu(adev);
1536 
1537 	/* RLC autoload sequence 5 disable gpa mode */
1538 	gfx_v11_0_disable_gpa_mode(adev);
1539 
1540 	return 0;
1541 }
1542 
1543 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
1544 {
1545 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
1546 	uint32_t *ptr;
1547 	uint32_t inst;
1548 
1549 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1550 	if (!ptr) {
1551 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1552 		adev->gfx.ip_dump_core = NULL;
1553 	} else {
1554 		adev->gfx.ip_dump_core = ptr;
1555 	}
1556 
1557 	/* Allocate memory for compute queue registers for all the instances */
1558 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
1559 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1560 		adev->gfx.mec.num_queue_per_pipe;
1561 
1562 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1563 	if (!ptr) {
1564 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1565 		adev->gfx.ip_dump_compute_queues = NULL;
1566 	} else {
1567 		adev->gfx.ip_dump_compute_queues = ptr;
1568 	}
1569 
1570 	/* Allocate memory for gfx queue registers for all the instances */
1571 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
1572 	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1573 		adev->gfx.me.num_queue_per_pipe;
1574 
1575 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1576 	if (!ptr) {
1577 		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1578 		adev->gfx.ip_dump_gfx_queues = NULL;
1579 	} else {
1580 		adev->gfx.ip_dump_gfx_queues = ptr;
1581 	}
1582 }
1583 
1584 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
1585 {
1586 	int i, j, k, r, ring_id;
1587 	int xcc_id = 0;
1588 	struct amdgpu_device *adev = ip_block->adev;
1589 	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
1590 
1591 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
1592 
1593 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1594 	case IP_VERSION(11, 0, 0):
1595 	case IP_VERSION(11, 0, 1):
1596 	case IP_VERSION(11, 0, 2):
1597 	case IP_VERSION(11, 0, 3):
1598 	case IP_VERSION(11, 0, 4):
1599 	case IP_VERSION(11, 5, 0):
1600 	case IP_VERSION(11, 5, 1):
1601 	case IP_VERSION(11, 5, 2):
1602 	case IP_VERSION(11, 5, 3):
1603 	case IP_VERSION(11, 5, 4):
1604 		adev->gfx.me.num_me = 1;
1605 		adev->gfx.me.num_pipe_per_me = 1;
1606 		adev->gfx.me.num_queue_per_pipe = 2;
1607 		adev->gfx.mec.num_mec = 1;
1608 		adev->gfx.mec.num_pipe_per_mec = 4;
1609 		adev->gfx.mec.num_queue_per_pipe = 4;
1610 		break;
1611 	default:
1612 		adev->gfx.me.num_me = 1;
1613 		adev->gfx.me.num_pipe_per_me = 1;
1614 		adev->gfx.me.num_queue_per_pipe = 1;
1615 		adev->gfx.mec.num_mec = 1;
1616 		adev->gfx.mec.num_pipe_per_mec = 4;
1617 		adev->gfx.mec.num_queue_per_pipe = 8;
1618 		break;
1619 	}
1620 
1621 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1622 	case IP_VERSION(11, 0, 0):
1623 	case IP_VERSION(11, 0, 2):
1624 	case IP_VERSION(11, 0, 3):
1625 		if (!adev->gfx.disable_uq &&
1626 		    adev->gfx.me_fw_version  >= 2420 &&
1627 		    adev->gfx.pfp_fw_version >= 2580 &&
1628 		    adev->gfx.mec_fw_version >= 2650 &&
1629 		    adev->mes.fw_version[0] >= 120) {
1630 			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
1631 			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
1632 		}
1633 		break;
1634 	case IP_VERSION(11, 0, 1):
1635 	case IP_VERSION(11, 0, 4):
1636 	case IP_VERSION(11, 5, 0):
1637 	case IP_VERSION(11, 5, 1):
1638 	case IP_VERSION(11, 5, 2):
1639 	case IP_VERSION(11, 5, 3):
1640 		/* add firmware version checks here */
1641 		if (0 && !adev->gfx.disable_uq) {
1642 			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;
1643 			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs;
1644 		}
1645 		break;
1646 	default:
1647 		break;
1648 	}
1649 
1650 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1651 	case IP_VERSION(11, 0, 0):
1652 	case IP_VERSION(11, 0, 2):
1653 	case IP_VERSION(11, 0, 3):
1654 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1655 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1656 		if (adev->gfx.me_fw_version  >= 2280 &&
1657 		    adev->gfx.pfp_fw_version >= 2370 &&
1658 		    adev->gfx.mec_fw_version >= 2450  &&
1659 		    adev->mes.fw_version[0] >= 99) {
1660 			adev->gfx.enable_cleaner_shader = true;
1661 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1662 			if (r) {
1663 				adev->gfx.enable_cleaner_shader = false;
1664 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1665 			}
1666 		}
1667 		break;
1668 	case IP_VERSION(11, 0, 1):
1669 	case IP_VERSION(11, 0, 4):
1670 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1671 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1672 		if (adev->gfx.pfp_fw_version >= 102 &&
1673 		    adev->gfx.mec_fw_version >= 66 &&
1674 		    adev->mes.fw_version[0] >= 128) {
1675 			adev->gfx.enable_cleaner_shader = true;
1676 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1677 			if (r) {
1678 				adev->gfx.enable_cleaner_shader = false;
1679 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1680 			}
1681 		}
1682 		break;
1683 	case IP_VERSION(11, 5, 0):
1684 	case IP_VERSION(11, 5, 1):
1685 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1686 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1687 		if (adev->gfx.mec_fw_version >= 26 &&
1688 		    adev->mes.fw_version[0] >= 114) {
1689 			adev->gfx.enable_cleaner_shader = true;
1690 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1691 			if (r) {
1692 				adev->gfx.enable_cleaner_shader = false;
1693 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1694 			}
1695 		}
1696 		break;
1697 	case IP_VERSION(11, 5, 2):
1698 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1699 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1700 		if (adev->gfx.me_fw_version  >= 12 &&
1701 		    adev->gfx.pfp_fw_version >= 15 &&
1702 		    adev->gfx.mec_fw_version >= 15) {
1703 			adev->gfx.enable_cleaner_shader = true;
1704 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1705 			if (r) {
1706 				adev->gfx.enable_cleaner_shader = false;
1707 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1708 			}
1709 		}
1710 		break;
1711 	case IP_VERSION(11, 5, 3):
1712 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1713 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1714 		if (adev->gfx.me_fw_version  >= 7 &&
1715 		    adev->gfx.pfp_fw_version >= 8 &&
1716 		    adev->gfx.mec_fw_version >= 8) {
1717 			adev->gfx.enable_cleaner_shader = true;
1718 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1719 			if (r) {
1720 				adev->gfx.enable_cleaner_shader = false;
1721 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1722 			}
1723 		}
1724 		break;
1725 	case IP_VERSION(11, 5, 4):
1726 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1727 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1728 		if (adev->gfx.me_fw_version  >= 4 &&
1729 		    adev->gfx.pfp_fw_version >= 7 &&
1730 		    adev->gfx.mec_fw_version >= 5) {
1731 			adev->gfx.enable_cleaner_shader = true;
1732 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1733 			if (r) {
1734 				adev->gfx.enable_cleaner_shader = false;
1735 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1736 			}
1737 		}
1738 		break;
1739 	default:
1740 		adev->gfx.enable_cleaner_shader = false;
1741 		break;
1742 	}
1743 
1744 	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1745 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1746 	    amdgpu_sriov_is_pp_one_vf(adev))
1747 		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1748 
1749 	/* EOP Event */
1750 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1751 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1752 			      &adev->gfx.eop_irq);
1753 	if (r)
1754 		return r;
1755 
1756 	/* Bad opcode Event */
1757 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1758 			      GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1759 			      &adev->gfx.bad_op_irq);
1760 	if (r)
1761 		return r;
1762 
1763 	/* Privileged reg */
1764 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1765 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1766 			      &adev->gfx.priv_reg_irq);
1767 	if (r)
1768 		return r;
1769 
1770 	/* Privileged inst */
1771 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1772 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1773 			      &adev->gfx.priv_inst_irq);
1774 	if (r)
1775 		return r;
1776 
1777 	/* FED error */
1778 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1779 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1780 				  &adev->gfx.rlc_gc_fed_irq);
1781 	if (r)
1782 		return r;
1783 
1784 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1785 
1786 	gfx_v11_0_me_init(adev);
1787 
1788 	r = gfx_v11_0_rlc_init(adev);
1789 	if (r) {
1790 		DRM_ERROR("Failed to init rlc BOs!\n");
1791 		return r;
1792 	}
1793 
1794 	r = gfx_v11_0_mec_init(adev);
1795 	if (r) {
1796 		DRM_ERROR("Failed to init MEC BOs!\n");
1797 		return r;
1798 	}
1799 
1800 	if (adev->gfx.num_gfx_rings) {
1801 		ring_id = 0;
1802 		/* set up the gfx ring */
1803 		for (i = 0; i < adev->gfx.me.num_me; i++) {
1804 			for (j = 0; j < num_queue_per_pipe; j++) {
1805 				for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1806 					if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1807 						continue;
1808 
1809 					r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1810 								    i, k, j);
1811 					if (r)
1812 						return r;
1813 					ring_id++;
1814 				}
1815 			}
1816 		}
1817 	}
1818 
1819 	if (adev->gfx.num_compute_rings) {
1820 		ring_id = 0;
1821 		/* set up the compute queues - allocate horizontally across pipes */
1822 		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1823 			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1824 				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1825 					if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1826 									     k, j))
1827 						continue;
1828 
1829 					r = gfx_v11_0_compute_ring_init(adev, ring_id,
1830 									i, k, j);
1831 					if (r)
1832 						return r;
1833 
1834 					ring_id++;
1835 				}
1836 			}
1837 		}
1838 	}
1839 
1840 	adev->gfx.gfx_supported_reset =
1841 		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
1842 	adev->gfx.compute_supported_reset =
1843 		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
1844 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1845 	case IP_VERSION(11, 0, 0):
1846 	case IP_VERSION(11, 0, 2):
1847 	case IP_VERSION(11, 0, 3):
1848 		if ((adev->gfx.me_fw_version >= 2280) &&
1849 		    (adev->gfx.mec_fw_version >= 2410) &&
1850 		    !amdgpu_sriov_vf(adev) &&
1851 		    !adev->debug_disable_gpu_ring_reset) {
1852 			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1853 			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1854 		}
1855 		break;
1856 	default:
1857 		if (!amdgpu_sriov_vf(adev) &&
1858 		    !adev->debug_disable_gpu_ring_reset) {
1859 			adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1860 			adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1861 		}
1862 		break;
1863 	}
1864 
1865 	if (!adev->enable_mes_kiq) {
1866 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1867 		if (r) {
1868 			DRM_ERROR("Failed to init KIQ BOs!\n");
1869 			return r;
1870 		}
1871 
1872 		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1873 		if (r)
1874 			return r;
1875 	}
1876 
1877 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1878 	if (r)
1879 		return r;
1880 
1881 	/* allocate visible FB for rlc auto-loading fw */
1882 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1883 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1884 		if (r)
1885 			return r;
1886 	}
1887 
1888 	r = gfx_v11_0_gpu_early_init(adev);
1889 	if (r)
1890 		return r;
1891 
1892 	if (amdgpu_gfx_ras_sw_init(adev)) {
1893 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1894 		return -EINVAL;
1895 	}
1896 
1897 	gfx_v11_0_alloc_ip_dump(adev);
1898 
1899 	r = amdgpu_gfx_sysfs_init(adev);
1900 	if (r)
1901 		return r;
1902 
1903 	return 0;
1904 }
1905 
1906 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1907 {
1908 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1909 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1910 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1911 
1912 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1913 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1914 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1915 }
1916 
1917 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1918 {
1919 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1920 			      &adev->gfx.me.me_fw_gpu_addr,
1921 			      (void **)&adev->gfx.me.me_fw_ptr);
1922 
1923 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1924 			       &adev->gfx.me.me_fw_data_gpu_addr,
1925 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1926 }
1927 
1928 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1929 {
1930 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1931 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1932 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1933 }
1934 
1935 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
1936 {
1937 	int i;
1938 	struct amdgpu_device *adev = ip_block->adev;
1939 
1940 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1941 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1942 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1943 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1944 
1945 	amdgpu_gfx_mqd_sw_fini(adev, 0);
1946 
1947 	if (!adev->enable_mes_kiq) {
1948 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1949 		amdgpu_gfx_kiq_fini(adev, 0);
1950 	}
1951 
1952 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
1953 
1954 	gfx_v11_0_pfp_fini(adev);
1955 	gfx_v11_0_me_fini(adev);
1956 	gfx_v11_0_rlc_fini(adev);
1957 	gfx_v11_0_mec_fini(adev);
1958 
1959 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1960 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1961 
1962 	gfx_v11_0_free_microcode(adev);
1963 
1964 	amdgpu_gfx_sysfs_fini(adev);
1965 
1966 	kfree(adev->gfx.ip_dump_core);
1967 	kfree(adev->gfx.ip_dump_compute_queues);
1968 	kfree(adev->gfx.ip_dump_gfx_queues);
1969 
1970 	return 0;
1971 }
1972 
1973 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1974 				   u32 sh_num, u32 instance, int xcc_id)
1975 {
1976 	u32 data;
1977 
1978 	if (instance == 0xffffffff)
1979 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1980 				     INSTANCE_BROADCAST_WRITES, 1);
1981 	else
1982 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1983 				     instance);
1984 
1985 	if (se_num == 0xffffffff)
1986 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1987 				     1);
1988 	else
1989 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1990 
1991 	if (sh_num == 0xffffffff)
1992 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1993 				     1);
1994 	else
1995 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1996 
1997 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1998 }
1999 
2000 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
2001 {
2002 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
2003 
2004 	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
2005 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
2006 					   CC_GC_SA_UNIT_DISABLE,
2007 					   SA_DISABLE);
2008 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
2009 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
2010 						 GC_USER_SA_UNIT_DISABLE,
2011 						 SA_DISABLE);
2012 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
2013 					    adev->gfx.config.max_shader_engines);
2014 
2015 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
2016 }
2017 
2018 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2019 {
2020 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
2021 	u32 rb_mask;
2022 
2023 	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
2024 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
2025 					    CC_RB_BACKEND_DISABLE,
2026 					    BACKEND_DISABLE);
2027 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
2028 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
2029 						 GC_USER_RB_BACKEND_DISABLE,
2030 						 BACKEND_DISABLE);
2031 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
2032 					    adev->gfx.config.max_shader_engines);
2033 
2034 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
2035 }
2036 
2037 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
2038 {
2039 	u32 rb_bitmap_per_sa;
2040 	u32 rb_bitmap_width_per_sa;
2041 	u32 max_sa;
2042 	u32 active_sa_bitmap;
2043 	u32 global_active_rb_bitmap;
2044 	u32 active_rb_bitmap = 0;
2045 	u32 i;
2046 
2047 	/* query sa bitmap from SA_UNIT_DISABLE registers */
2048 	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
2049 	/* query rb bitmap from RB_BACKEND_DISABLE registers */
2050 	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
2051 
2052 	/* generate active rb bitmap according to active sa bitmap */
2053 	max_sa = adev->gfx.config.max_shader_engines *
2054 		 adev->gfx.config.max_sh_per_se;
2055 	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
2056 				 adev->gfx.config.max_sh_per_se;
2057 	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
2058 
2059 	for (i = 0; i < max_sa; i++) {
2060 		if (active_sa_bitmap & (1 << i))
2061 			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
2062 	}
2063 
2064 	active_rb_bitmap &= global_active_rb_bitmap;
2065 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
2066 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
2067 }
2068 
2069 #define DEFAULT_SH_MEM_BASES	(0x6000)
2070 #define LDS_APP_BASE           0x1
2071 #define SCRATCH_APP_BASE       0x2
2072 
2073 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
2074 {
2075 	int i;
2076 	uint32_t sh_mem_bases;
2077 	uint32_t data;
2078 
2079 	/*
2080 	 * Configure apertures:
2081 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2082 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2083 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2084 	 */
2085 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
2086 			SCRATCH_APP_BASE;
2087 
2088 	mutex_lock(&adev->srbm_mutex);
2089 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2090 		soc21_grbm_select(adev, 0, 0, 0, i);
2091 		/* CP and shaders */
2092 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2093 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
2094 
2095 		/* Enable trap for each kfd vmid. */
2096 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
2097 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
2098 		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
2099 	}
2100 	soc21_grbm_select(adev, 0, 0, 0, 0);
2101 	mutex_unlock(&adev->srbm_mutex);
2102 
2103 	/*
2104 	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
2105 	 * access. These should be enabled by FW for target VMIDs.
2106 	 */
2107 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2108 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
2109 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
2110 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
2111 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
2112 	}
2113 }
2114 
2115 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
2116 {
2117 	int vmid;
2118 
2119 	/*
2120 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2121 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2122 	 * the driver can enable them for graphics. VMID0 should maintain
2123 	 * access so that HWS firmware can save/restore entries.
2124 	 */
2125 	for (vmid = 1; vmid < 16; vmid++) {
2126 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
2127 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
2128 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
2129 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
2130 	}
2131 }
2132 
2133 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
2134 {
2135 	/* TODO: harvest feature to be added later. */
2136 }
2137 
2138 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
2139 {
2140 	/* TCCs are global (not instanced). */
2141 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
2142 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
2143 
2144 	adev->gfx.config.tcc_disabled_mask =
2145 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
2146 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
2147 }
2148 
2149 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
2150 {
2151 	u32 tmp;
2152 	int i;
2153 
2154 	if (!amdgpu_sriov_vf(adev))
2155 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2156 
2157 	gfx_v11_0_setup_rb(adev);
2158 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
2159 	gfx_v11_0_get_tcc_info(adev);
2160 	adev->gfx.config.pa_sc_tile_steering_override = 0;
2161 
2162 	/* Set whether texture coordinate truncation is conformant. */
2163 	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
2164 	adev->gfx.config.ta_cntl2_truncate_coord_mode =
2165 		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
2166 
2167 	/* XXX SH_MEM regs */
2168 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2169 	mutex_lock(&adev->srbm_mutex);
2170 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2171 		soc21_grbm_select(adev, 0, 0, 0, i);
2172 		/* CP and shaders */
2173 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2174 		if (i != 0) {
2175 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2176 				(adev->gmc.private_aperture_start >> 48));
2177 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2178 				(adev->gmc.shared_aperture_start >> 48));
2179 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
2180 		}
2181 	}
2182 	soc21_grbm_select(adev, 0, 0, 0, 0);
2183 
2184 	mutex_unlock(&adev->srbm_mutex);
2185 
2186 	gfx_v11_0_init_compute_vmid(adev);
2187 	gfx_v11_0_init_gds_vmid(adev);
2188 }
2189 
2190 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
2191 				      int me, int pipe)
2192 {
2193 	if (me != 0)
2194 		return 0;
2195 
2196 	switch (pipe) {
2197 	case 0:
2198 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
2199 	case 1:
2200 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
2201 	default:
2202 		return 0;
2203 	}
2204 }
2205 
2206 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
2207 				      int me, int pipe)
2208 {
2209 	/*
2210 	 * amdgpu controls only the first MEC. That's why this function only
2211 	 * handles the setting of interrupts for this specific MEC. All other
2212 	 * pipes' interrupts are set by amdkfd.
2213 	 */
2214 	if (me != 1)
2215 		return 0;
2216 
2217 	switch (pipe) {
2218 	case 0:
2219 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
2220 	case 1:
2221 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
2222 	case 2:
2223 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
2224 	case 3:
2225 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
2226 	default:
2227 		return 0;
2228 	}
2229 }
2230 
2231 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2232 					       bool enable)
2233 {
2234 	u32 tmp, cp_int_cntl_reg;
2235 	int i, j;
2236 
2237 	if (amdgpu_sriov_vf(adev))
2238 		return;
2239 
2240 	for (i = 0; i < adev->gfx.me.num_me; i++) {
2241 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
2242 			cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
2243 
2244 			if (cp_int_cntl_reg) {
2245 				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
2246 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
2247 						    enable ? 1 : 0);
2248 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
2249 						    enable ? 1 : 0);
2250 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
2251 						    enable ? 1 : 0);
2252 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
2253 						    enable ? 1 : 0);
2254 				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
2255 			}
2256 		}
2257 	}
2258 }
2259 
2260 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
2261 {
2262 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2263 
2264 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
2265 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2266 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
2267 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2268 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
2269 
2270 	return 0;
2271 }
2272 
2273 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
2274 {
2275 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
2276 
2277 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2278 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
2279 }
2280 
2281 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
2282 {
2283 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2284 	udelay(50);
2285 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2286 	udelay(50);
2287 }
2288 
2289 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
2290 					     bool enable)
2291 {
2292 	uint32_t rlc_pg_cntl;
2293 
2294 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
2295 
2296 	if (!enable) {
2297 		/* RLC_PG_CNTL[23] = 0 (default)
2298 		 * RLC will wait for handshake acks with SMU
2299 		 * GFXOFF will be enabled
2300 		 * RLC_PG_CNTL[23] = 1
2301 		 * RLC will not issue any message to SMU
2302 		 * hence no handshake between SMU & RLC
2303 		 * GFXOFF will be disabled
2304 		 */
2305 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2306 	} else
2307 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2308 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
2309 }
2310 
2311 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
2312 {
2313 	/* TODO: enable rlc & smu handshake until smu
2314 	 * and gfxoff feature works as expected */
2315 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
2316 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
2317 
2318 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2319 	udelay(50);
2320 }
2321 
2322 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
2323 {
2324 	uint32_t tmp;
2325 
2326 	/* enable Save Restore Machine */
2327 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
2328 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2329 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
2330 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
2331 }
2332 
2333 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
2334 {
2335 	const struct rlc_firmware_header_v2_0 *hdr;
2336 	const __le32 *fw_data;
2337 	unsigned i, fw_size;
2338 
2339 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2340 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2341 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2342 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2343 
2344 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
2345 		     RLCG_UCODE_LOADING_START_ADDRESS);
2346 
2347 	for (i = 0; i < fw_size; i++)
2348 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
2349 			     le32_to_cpup(fw_data++));
2350 
2351 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2352 }
2353 
2354 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
2355 {
2356 	const struct rlc_firmware_header_v2_2 *hdr;
2357 	const __le32 *fw_data;
2358 	unsigned i, fw_size;
2359 	u32 tmp;
2360 
2361 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
2362 
2363 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2364 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
2365 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
2366 
2367 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
2368 
2369 	for (i = 0; i < fw_size; i++) {
2370 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2371 			msleep(1);
2372 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
2373 				le32_to_cpup(fw_data++));
2374 	}
2375 
2376 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2377 
2378 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2379 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
2380 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
2381 
2382 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
2383 	for (i = 0; i < fw_size; i++) {
2384 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2385 			msleep(1);
2386 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
2387 				le32_to_cpup(fw_data++));
2388 	}
2389 
2390 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2391 
2392 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
2393 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
2394 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
2395 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
2396 }
2397 
2398 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
2399 {
2400 	const struct rlc_firmware_header_v2_3 *hdr;
2401 	const __le32 *fw_data;
2402 	unsigned i, fw_size;
2403 	u32 tmp;
2404 
2405 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
2406 
2407 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2408 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
2409 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
2410 
2411 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
2412 
2413 	for (i = 0; i < fw_size; i++) {
2414 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2415 			msleep(1);
2416 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
2417 				le32_to_cpup(fw_data++));
2418 	}
2419 
2420 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2421 
2422 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2423 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2424 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2425 
2426 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2427 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2428 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2429 
2430 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2431 
2432 	for (i = 0; i < fw_size; i++) {
2433 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2434 			msleep(1);
2435 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2436 				le32_to_cpup(fw_data++));
2437 	}
2438 
2439 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2440 
2441 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2442 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2443 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2444 }
2445 
2446 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2447 {
2448 	const struct rlc_firmware_header_v2_0 *hdr;
2449 	uint16_t version_major;
2450 	uint16_t version_minor;
2451 
2452 	if (!adev->gfx.rlc_fw)
2453 		return -EINVAL;
2454 
2455 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2456 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2457 
2458 	version_major = le16_to_cpu(hdr->header.header_version_major);
2459 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2460 
2461 	if (version_major == 2) {
2462 		gfx_v11_0_load_rlcg_microcode(adev);
2463 		if (amdgpu_dpm == 1) {
2464 			if (version_minor >= 2)
2465 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2466 			if (version_minor == 3)
2467 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2468 		}
2469 
2470 		return 0;
2471 	}
2472 
2473 	return -EINVAL;
2474 }
2475 
2476 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2477 {
2478 	int r;
2479 
2480 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2481 		gfx_v11_0_init_csb(adev);
2482 
2483 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2484 			gfx_v11_0_rlc_enable_srm(adev);
2485 	} else {
2486 		if (amdgpu_sriov_vf(adev)) {
2487 			gfx_v11_0_init_csb(adev);
2488 			return 0;
2489 		}
2490 
2491 		adev->gfx.rlc.funcs->stop(adev);
2492 
2493 		/* disable CG */
2494 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2495 
2496 		/* disable PG */
2497 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2498 
2499 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2500 			/* legacy rlc firmware loading */
2501 			r = gfx_v11_0_rlc_load_microcode(adev);
2502 			if (r)
2503 				return r;
2504 		}
2505 
2506 		gfx_v11_0_init_csb(adev);
2507 
2508 		adev->gfx.rlc.funcs->start(adev);
2509 	}
2510 	return 0;
2511 }
2512 
2513 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2514 {
2515 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2516 	uint32_t tmp;
2517 	int i;
2518 
2519 	/* Trigger an invalidation of the L1 instruction caches */
2520 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2521 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2522 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2523 
2524 	/* Wait for invalidation complete */
2525 	for (i = 0; i < usec_timeout; i++) {
2526 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2527 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2528 					INVALIDATE_CACHE_COMPLETE))
2529 			break;
2530 		udelay(1);
2531 	}
2532 
2533 	if (i >= usec_timeout) {
2534 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2535 		return -EINVAL;
2536 	}
2537 
2538 	if (amdgpu_emu_mode == 1)
2539 		amdgpu_device_flush_hdp(adev, NULL);
2540 
2541 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2542 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2543 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2544 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2545 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2546 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2547 
2548 	/* Program me ucode address into intruction cache address register */
2549 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2550 			lower_32_bits(addr) & 0xFFFFF000);
2551 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2552 			upper_32_bits(addr));
2553 
2554 	return 0;
2555 }
2556 
2557 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2558 {
2559 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2560 	uint32_t tmp;
2561 	int i;
2562 
2563 	/* Trigger an invalidation of the L1 instruction caches */
2564 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2565 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2566 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2567 
2568 	/* Wait for invalidation complete */
2569 	for (i = 0; i < usec_timeout; i++) {
2570 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2571 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2572 					INVALIDATE_CACHE_COMPLETE))
2573 			break;
2574 		udelay(1);
2575 	}
2576 
2577 	if (i >= usec_timeout) {
2578 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2579 		return -EINVAL;
2580 	}
2581 
2582 	if (amdgpu_emu_mode == 1)
2583 		amdgpu_device_flush_hdp(adev, NULL);
2584 
2585 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2586 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2587 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2588 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2589 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2590 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2591 
2592 	/* Program pfp ucode address into intruction cache address register */
2593 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2594 			lower_32_bits(addr) & 0xFFFFF000);
2595 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2596 			upper_32_bits(addr));
2597 
2598 	return 0;
2599 }
2600 
2601 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2602 {
2603 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2604 	uint32_t tmp;
2605 	int i;
2606 
2607 	/* Trigger an invalidation of the L1 instruction caches */
2608 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2609 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2610 
2611 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2612 
2613 	/* Wait for invalidation complete */
2614 	for (i = 0; i < usec_timeout; i++) {
2615 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2616 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2617 					INVALIDATE_CACHE_COMPLETE))
2618 			break;
2619 		udelay(1);
2620 	}
2621 
2622 	if (i >= usec_timeout) {
2623 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2624 		return -EINVAL;
2625 	}
2626 
2627 	if (amdgpu_emu_mode == 1)
2628 		amdgpu_device_flush_hdp(adev, NULL);
2629 
2630 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2631 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2632 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2633 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2634 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2635 
2636 	/* Program mec1 ucode address into intruction cache address register */
2637 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2638 			lower_32_bits(addr) & 0xFFFFF000);
2639 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2640 			upper_32_bits(addr));
2641 
2642 	return 0;
2643 }
2644 
2645 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2646 {
2647 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2648 	uint32_t tmp;
2649 	unsigned i, pipe_id;
2650 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2651 
2652 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2653 		adev->gfx.pfp_fw->data;
2654 
2655 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2656 		lower_32_bits(addr));
2657 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2658 		upper_32_bits(addr));
2659 
2660 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2661 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2662 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2663 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2664 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2665 
2666 	/*
2667 	 * Programming any of the CP_PFP_IC_BASE registers
2668 	 * forces invalidation of the ME L1 I$. Wait for the
2669 	 * invalidation complete
2670 	 */
2671 	for (i = 0; i < usec_timeout; i++) {
2672 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2673 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2674 			INVALIDATE_CACHE_COMPLETE))
2675 			break;
2676 		udelay(1);
2677 	}
2678 
2679 	if (i >= usec_timeout) {
2680 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2681 		return -EINVAL;
2682 	}
2683 
2684 	/* Prime the L1 instruction caches */
2685 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2686 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2687 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2688 	/* Waiting for cache primed*/
2689 	for (i = 0; i < usec_timeout; i++) {
2690 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2691 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2692 			ICACHE_PRIMED))
2693 			break;
2694 		udelay(1);
2695 	}
2696 
2697 	if (i >= usec_timeout) {
2698 		dev_err(adev->dev, "failed to prime instruction cache\n");
2699 		return -EINVAL;
2700 	}
2701 
2702 	mutex_lock(&adev->srbm_mutex);
2703 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2704 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2705 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2706 			(pfp_hdr->ucode_start_addr_hi << 30) |
2707 			(pfp_hdr->ucode_start_addr_lo >> 2));
2708 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2709 			pfp_hdr->ucode_start_addr_hi >> 2);
2710 
2711 		/*
2712 		 * Program CP_ME_CNTL to reset given PIPE to take
2713 		 * effect of CP_PFP_PRGRM_CNTR_START.
2714 		 */
2715 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2716 		if (pipe_id == 0)
2717 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2718 					PFP_PIPE0_RESET, 1);
2719 		else
2720 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2721 					PFP_PIPE1_RESET, 1);
2722 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2723 
2724 		/* Clear pfp pipe0 reset bit. */
2725 		if (pipe_id == 0)
2726 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2727 					PFP_PIPE0_RESET, 0);
2728 		else
2729 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2730 					PFP_PIPE1_RESET, 0);
2731 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2732 
2733 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2734 			lower_32_bits(addr2));
2735 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2736 			upper_32_bits(addr2));
2737 	}
2738 	soc21_grbm_select(adev, 0, 0, 0, 0);
2739 	mutex_unlock(&adev->srbm_mutex);
2740 
2741 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2742 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2743 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2744 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2745 
2746 	/* Invalidate the data caches */
2747 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2748 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2749 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2750 
2751 	for (i = 0; i < usec_timeout; i++) {
2752 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2753 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2754 			INVALIDATE_DCACHE_COMPLETE))
2755 			break;
2756 		udelay(1);
2757 	}
2758 
2759 	if (i >= usec_timeout) {
2760 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2761 		return -EINVAL;
2762 	}
2763 
2764 	return 0;
2765 }
2766 
2767 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2768 {
2769 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2770 	uint32_t tmp;
2771 	unsigned i, pipe_id;
2772 	const struct gfx_firmware_header_v2_0 *me_hdr;
2773 
2774 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2775 		adev->gfx.me_fw->data;
2776 
2777 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2778 		lower_32_bits(addr));
2779 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2780 		upper_32_bits(addr));
2781 
2782 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2783 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2784 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2785 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2786 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2787 
2788 	/*
2789 	 * Programming any of the CP_ME_IC_BASE registers
2790 	 * forces invalidation of the ME L1 I$. Wait for the
2791 	 * invalidation complete
2792 	 */
2793 	for (i = 0; i < usec_timeout; i++) {
2794 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2795 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2796 			INVALIDATE_CACHE_COMPLETE))
2797 			break;
2798 		udelay(1);
2799 	}
2800 
2801 	if (i >= usec_timeout) {
2802 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2803 		return -EINVAL;
2804 	}
2805 
2806 	/* Prime the instruction caches */
2807 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2808 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2809 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2810 
2811 	/* Waiting for instruction cache primed*/
2812 	for (i = 0; i < usec_timeout; i++) {
2813 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2814 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2815 			ICACHE_PRIMED))
2816 			break;
2817 		udelay(1);
2818 	}
2819 
2820 	if (i >= usec_timeout) {
2821 		dev_err(adev->dev, "failed to prime instruction cache\n");
2822 		return -EINVAL;
2823 	}
2824 
2825 	mutex_lock(&adev->srbm_mutex);
2826 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2827 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2828 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2829 			(me_hdr->ucode_start_addr_hi << 30) |
2830 			(me_hdr->ucode_start_addr_lo >> 2) );
2831 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2832 			me_hdr->ucode_start_addr_hi>>2);
2833 
2834 		/*
2835 		 * Program CP_ME_CNTL to reset given PIPE to take
2836 		 * effect of CP_PFP_PRGRM_CNTR_START.
2837 		 */
2838 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2839 		if (pipe_id == 0)
2840 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2841 					ME_PIPE0_RESET, 1);
2842 		else
2843 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2844 					ME_PIPE1_RESET, 1);
2845 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2846 
2847 		/* Clear pfp pipe0 reset bit. */
2848 		if (pipe_id == 0)
2849 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2850 					ME_PIPE0_RESET, 0);
2851 		else
2852 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2853 					ME_PIPE1_RESET, 0);
2854 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2855 
2856 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2857 			lower_32_bits(addr2));
2858 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2859 			upper_32_bits(addr2));
2860 	}
2861 	soc21_grbm_select(adev, 0, 0, 0, 0);
2862 	mutex_unlock(&adev->srbm_mutex);
2863 
2864 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2865 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2866 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2867 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2868 
2869 	/* Invalidate the data caches */
2870 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2871 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2872 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2873 
2874 	for (i = 0; i < usec_timeout; i++) {
2875 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2876 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2877 			INVALIDATE_DCACHE_COMPLETE))
2878 			break;
2879 		udelay(1);
2880 	}
2881 
2882 	if (i >= usec_timeout) {
2883 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2884 		return -EINVAL;
2885 	}
2886 
2887 	return 0;
2888 }
2889 
2890 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2891 {
2892 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2893 	uint32_t tmp;
2894 	unsigned i;
2895 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2896 
2897 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2898 		adev->gfx.mec_fw->data;
2899 
2900 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2901 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2902 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2903 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2904 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2905 
2906 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2907 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2908 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2909 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2910 
2911 	mutex_lock(&adev->srbm_mutex);
2912 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2913 		soc21_grbm_select(adev, 1, i, 0, 0);
2914 
2915 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2916 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2917 		     upper_32_bits(addr2));
2918 
2919 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2920 					mec_hdr->ucode_start_addr_lo >> 2 |
2921 					mec_hdr->ucode_start_addr_hi << 30);
2922 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2923 					mec_hdr->ucode_start_addr_hi >> 2);
2924 
2925 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2926 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2927 		     upper_32_bits(addr));
2928 	}
2929 	mutex_unlock(&adev->srbm_mutex);
2930 	soc21_grbm_select(adev, 0, 0, 0, 0);
2931 
2932 	/* Trigger an invalidation of the L1 instruction caches */
2933 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2934 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2935 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2936 
2937 	/* Wait for invalidation complete */
2938 	for (i = 0; i < usec_timeout; i++) {
2939 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2940 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2941 				       INVALIDATE_DCACHE_COMPLETE))
2942 			break;
2943 		udelay(1);
2944 	}
2945 
2946 	if (i >= usec_timeout) {
2947 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2948 		return -EINVAL;
2949 	}
2950 
2951 	/* Trigger an invalidation of the L1 instruction caches */
2952 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2953 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2954 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2955 
2956 	/* Wait for invalidation complete */
2957 	for (i = 0; i < usec_timeout; i++) {
2958 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2959 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2960 				       INVALIDATE_CACHE_COMPLETE))
2961 			break;
2962 		udelay(1);
2963 	}
2964 
2965 	if (i >= usec_timeout) {
2966 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2967 		return -EINVAL;
2968 	}
2969 
2970 	return 0;
2971 }
2972 
2973 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2974 {
2975 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2976 	const struct gfx_firmware_header_v2_0 *me_hdr;
2977 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2978 	uint32_t pipe_id, tmp;
2979 
2980 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2981 		adev->gfx.mec_fw->data;
2982 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2983 		adev->gfx.me_fw->data;
2984 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2985 		adev->gfx.pfp_fw->data;
2986 
2987 	/* config pfp program start addr */
2988 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2989 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2990 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2991 			(pfp_hdr->ucode_start_addr_hi << 30) |
2992 			(pfp_hdr->ucode_start_addr_lo >> 2));
2993 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2994 			pfp_hdr->ucode_start_addr_hi >> 2);
2995 	}
2996 	soc21_grbm_select(adev, 0, 0, 0, 0);
2997 
2998 	/* reset pfp pipe */
2999 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3000 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
3001 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
3002 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3003 
3004 	/* clear pfp pipe reset */
3005 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
3006 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
3007 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3008 
3009 	/* config me program start addr */
3010 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
3011 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3012 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3013 			(me_hdr->ucode_start_addr_hi << 30) |
3014 			(me_hdr->ucode_start_addr_lo >> 2) );
3015 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3016 			me_hdr->ucode_start_addr_hi>>2);
3017 	}
3018 	soc21_grbm_select(adev, 0, 0, 0, 0);
3019 
3020 	/* reset me pipe */
3021 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3022 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
3023 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
3024 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3025 
3026 	/* clear me pipe reset */
3027 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
3028 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
3029 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3030 
3031 	/* config mec program start addr */
3032 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
3033 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
3034 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3035 					mec_hdr->ucode_start_addr_lo >> 2 |
3036 					mec_hdr->ucode_start_addr_hi << 30);
3037 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3038 					mec_hdr->ucode_start_addr_hi >> 2);
3039 	}
3040 	soc21_grbm_select(adev, 0, 0, 0, 0);
3041 
3042 	/* reset mec pipe */
3043 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3044 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
3045 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
3046 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
3047 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
3048 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
3049 
3050 	/* clear mec pipe reset */
3051 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
3052 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
3053 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
3054 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
3055 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
3056 }
3057 
3058 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
3059 {
3060 	uint32_t cp_status;
3061 	uint32_t bootload_status;
3062 	int i, r;
3063 	uint64_t addr, addr2;
3064 
3065 	for (i = 0; i < adev->usec_timeout; i++) {
3066 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
3067 
3068 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3069 			    IP_VERSION(11, 0, 1) ||
3070 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
3071 			    IP_VERSION(11, 0, 4) ||
3072 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
3073 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
3074 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
3075 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3) ||
3076 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 4))
3077 			bootload_status = RREG32_SOC15(GC, 0,
3078 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
3079 		else
3080 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
3081 
3082 		if ((cp_status == 0) &&
3083 		    (REG_GET_FIELD(bootload_status,
3084 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
3085 			break;
3086 		}
3087 		udelay(1);
3088 	}
3089 
3090 	if (i >= adev->usec_timeout) {
3091 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
3092 		return -ETIMEDOUT;
3093 	}
3094 
3095 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
3096 		if (adev->gfx.rs64_enable) {
3097 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3098 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
3099 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3100 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
3101 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
3102 			if (r)
3103 				return r;
3104 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3105 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
3106 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3107 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
3108 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
3109 			if (r)
3110 				return r;
3111 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3112 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
3113 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3114 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
3115 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
3116 			if (r)
3117 				return r;
3118 		} else {
3119 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3120 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
3121 			r = gfx_v11_0_config_me_cache(adev, addr);
3122 			if (r)
3123 				return r;
3124 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3125 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
3126 			r = gfx_v11_0_config_pfp_cache(adev, addr);
3127 			if (r)
3128 				return r;
3129 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3130 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
3131 			r = gfx_v11_0_config_mec_cache(adev, addr);
3132 			if (r)
3133 				return r;
3134 		}
3135 	}
3136 
3137 	return 0;
3138 }
3139 
3140 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3141 {
3142 	int i;
3143 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3144 
3145 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3146 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3147 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3148 
3149 	for (i = 0; i < adev->usec_timeout; i++) {
3150 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
3151 			break;
3152 		udelay(1);
3153 	}
3154 
3155 	if (i >= adev->usec_timeout)
3156 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
3157 
3158 	return 0;
3159 }
3160 
3161 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
3162 {
3163 	int r;
3164 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3165 	const __le32 *fw_data;
3166 	unsigned i, fw_size;
3167 
3168 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3169 		adev->gfx.pfp_fw->data;
3170 
3171 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3172 
3173 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3174 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3175 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
3176 
3177 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
3178 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3179 				      &adev->gfx.pfp.pfp_fw_obj,
3180 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3181 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3182 	if (r) {
3183 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
3184 		gfx_v11_0_pfp_fini(adev);
3185 		return r;
3186 	}
3187 
3188 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
3189 
3190 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3191 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3192 
3193 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
3194 
3195 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
3196 
3197 	for (i = 0; i < pfp_hdr->jt_size; i++)
3198 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
3199 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
3200 
3201 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3202 
3203 	return 0;
3204 }
3205 
3206 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
3207 {
3208 	int r;
3209 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
3210 	const __le32 *fw_ucode, *fw_data;
3211 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3212 	uint32_t tmp;
3213 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3214 
3215 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
3216 		adev->gfx.pfp_fw->data;
3217 
3218 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3219 
3220 	/* instruction */
3221 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
3222 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
3223 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
3224 	/* data */
3225 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3226 		le32_to_cpu(pfp_hdr->data_offset_bytes));
3227 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
3228 
3229 	/* 64kb align */
3230 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3231 				      64 * 1024,
3232 				      AMDGPU_GEM_DOMAIN_VRAM |
3233 				      AMDGPU_GEM_DOMAIN_GTT,
3234 				      &adev->gfx.pfp.pfp_fw_obj,
3235 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3236 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3237 	if (r) {
3238 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
3239 		gfx_v11_0_pfp_fini(adev);
3240 		return r;
3241 	}
3242 
3243 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3244 				      64 * 1024,
3245 				      AMDGPU_GEM_DOMAIN_VRAM |
3246 				      AMDGPU_GEM_DOMAIN_GTT,
3247 				      &adev->gfx.pfp.pfp_fw_data_obj,
3248 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
3249 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
3250 	if (r) {
3251 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
3252 		gfx_v11_0_pfp_fini(adev);
3253 		return r;
3254 	}
3255 
3256 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
3257 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
3258 
3259 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3260 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
3261 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3262 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
3263 
3264 	if (amdgpu_emu_mode == 1)
3265 		amdgpu_device_flush_hdp(adev, NULL);
3266 
3267 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
3268 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3269 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
3270 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3271 
3272 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
3273 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
3274 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
3275 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
3276 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
3277 
3278 	/*
3279 	 * Programming any of the CP_PFP_IC_BASE registers
3280 	 * forces invalidation of the ME L1 I$. Wait for the
3281 	 * invalidation complete
3282 	 */
3283 	for (i = 0; i < usec_timeout; i++) {
3284 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3285 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3286 			INVALIDATE_CACHE_COMPLETE))
3287 			break;
3288 		udelay(1);
3289 	}
3290 
3291 	if (i >= usec_timeout) {
3292 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3293 		return -EINVAL;
3294 	}
3295 
3296 	/* Prime the L1 instruction caches */
3297 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3298 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
3299 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
3300 	/* Waiting for cache primed*/
3301 	for (i = 0; i < usec_timeout; i++) {
3302 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3303 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3304 			ICACHE_PRIMED))
3305 			break;
3306 		udelay(1);
3307 	}
3308 
3309 	if (i >= usec_timeout) {
3310 		dev_err(adev->dev, "failed to prime instruction cache\n");
3311 		return -EINVAL;
3312 	}
3313 
3314 	mutex_lock(&adev->srbm_mutex);
3315 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3316 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3317 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3318 			(pfp_hdr->ucode_start_addr_hi << 30) |
3319 			(pfp_hdr->ucode_start_addr_lo >> 2) );
3320 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3321 			pfp_hdr->ucode_start_addr_hi>>2);
3322 
3323 		/*
3324 		 * Program CP_ME_CNTL to reset given PIPE to take
3325 		 * effect of CP_PFP_PRGRM_CNTR_START.
3326 		 */
3327 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3328 		if (pipe_id == 0)
3329 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3330 					PFP_PIPE0_RESET, 1);
3331 		else
3332 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3333 					PFP_PIPE1_RESET, 1);
3334 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3335 
3336 		/* Clear pfp pipe0 reset bit. */
3337 		if (pipe_id == 0)
3338 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3339 					PFP_PIPE0_RESET, 0);
3340 		else
3341 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3342 					PFP_PIPE1_RESET, 0);
3343 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3344 
3345 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
3346 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3347 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
3348 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3349 	}
3350 	soc21_grbm_select(adev, 0, 0, 0, 0);
3351 	mutex_unlock(&adev->srbm_mutex);
3352 
3353 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3354 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3355 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3356 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3357 
3358 	/* Invalidate the data caches */
3359 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3360 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3361 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3362 
3363 	for (i = 0; i < usec_timeout; i++) {
3364 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3365 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3366 			INVALIDATE_DCACHE_COMPLETE))
3367 			break;
3368 		udelay(1);
3369 	}
3370 
3371 	if (i >= usec_timeout) {
3372 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3373 		return -EINVAL;
3374 	}
3375 
3376 	return 0;
3377 }
3378 
3379 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
3380 {
3381 	int r;
3382 	const struct gfx_firmware_header_v1_0 *me_hdr;
3383 	const __le32 *fw_data;
3384 	unsigned i, fw_size;
3385 
3386 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3387 		adev->gfx.me_fw->data;
3388 
3389 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3390 
3391 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3392 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3393 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
3394 
3395 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
3396 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3397 				      &adev->gfx.me.me_fw_obj,
3398 				      &adev->gfx.me.me_fw_gpu_addr,
3399 				      (void **)&adev->gfx.me.me_fw_ptr);
3400 	if (r) {
3401 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
3402 		gfx_v11_0_me_fini(adev);
3403 		return r;
3404 	}
3405 
3406 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
3407 
3408 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3409 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3410 
3411 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
3412 
3413 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
3414 
3415 	for (i = 0; i < me_hdr->jt_size; i++)
3416 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
3417 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
3418 
3419 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
3420 
3421 	return 0;
3422 }
3423 
3424 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
3425 {
3426 	int r;
3427 	const struct gfx_firmware_header_v2_0 *me_hdr;
3428 	const __le32 *fw_ucode, *fw_data;
3429 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3430 	uint32_t tmp;
3431 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3432 
3433 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
3434 		adev->gfx.me_fw->data;
3435 
3436 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3437 
3438 	/* instruction */
3439 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
3440 		le32_to_cpu(me_hdr->ucode_offset_bytes));
3441 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
3442 	/* data */
3443 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3444 		le32_to_cpu(me_hdr->data_offset_bytes));
3445 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
3446 
3447 	/* 64kb align*/
3448 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3449 				      64 * 1024,
3450 				      AMDGPU_GEM_DOMAIN_VRAM |
3451 				      AMDGPU_GEM_DOMAIN_GTT,
3452 				      &adev->gfx.me.me_fw_obj,
3453 				      &adev->gfx.me.me_fw_gpu_addr,
3454 				      (void **)&adev->gfx.me.me_fw_ptr);
3455 	if (r) {
3456 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3457 		gfx_v11_0_me_fini(adev);
3458 		return r;
3459 	}
3460 
3461 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3462 				      64 * 1024,
3463 				      AMDGPU_GEM_DOMAIN_VRAM |
3464 				      AMDGPU_GEM_DOMAIN_GTT,
3465 				      &adev->gfx.me.me_fw_data_obj,
3466 				      &adev->gfx.me.me_fw_data_gpu_addr,
3467 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3468 	if (r) {
3469 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3470 		gfx_v11_0_pfp_fini(adev);
3471 		return r;
3472 	}
3473 
3474 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3475 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3476 
3477 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3478 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3479 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3480 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3481 
3482 	if (amdgpu_emu_mode == 1)
3483 		amdgpu_device_flush_hdp(adev, NULL);
3484 
3485 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3486 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3487 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3488 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3489 
3490 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3491 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3492 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3493 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3494 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3495 
3496 	/*
3497 	 * Programming any of the CP_ME_IC_BASE registers
3498 	 * forces invalidation of the ME L1 I$. Wait for the
3499 	 * invalidation complete
3500 	 */
3501 	for (i = 0; i < usec_timeout; i++) {
3502 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3503 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3504 			INVALIDATE_CACHE_COMPLETE))
3505 			break;
3506 		udelay(1);
3507 	}
3508 
3509 	if (i >= usec_timeout) {
3510 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3511 		return -EINVAL;
3512 	}
3513 
3514 	/* Prime the instruction caches */
3515 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3516 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3517 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3518 
3519 	/* Waiting for instruction cache primed*/
3520 	for (i = 0; i < usec_timeout; i++) {
3521 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3522 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3523 			ICACHE_PRIMED))
3524 			break;
3525 		udelay(1);
3526 	}
3527 
3528 	if (i >= usec_timeout) {
3529 		dev_err(adev->dev, "failed to prime instruction cache\n");
3530 		return -EINVAL;
3531 	}
3532 
3533 	mutex_lock(&adev->srbm_mutex);
3534 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3535 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3536 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3537 			(me_hdr->ucode_start_addr_hi << 30) |
3538 			(me_hdr->ucode_start_addr_lo >> 2) );
3539 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3540 			me_hdr->ucode_start_addr_hi>>2);
3541 
3542 		/*
3543 		 * Program CP_ME_CNTL to reset given PIPE to take
3544 		 * effect of CP_PFP_PRGRM_CNTR_START.
3545 		 */
3546 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3547 		if (pipe_id == 0)
3548 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3549 					ME_PIPE0_RESET, 1);
3550 		else
3551 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3552 					ME_PIPE1_RESET, 1);
3553 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3554 
3555 		/* Clear pfp pipe0 reset bit. */
3556 		if (pipe_id == 0)
3557 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3558 					ME_PIPE0_RESET, 0);
3559 		else
3560 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3561 					ME_PIPE1_RESET, 0);
3562 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3563 
3564 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3565 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3566 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3567 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3568 	}
3569 	soc21_grbm_select(adev, 0, 0, 0, 0);
3570 	mutex_unlock(&adev->srbm_mutex);
3571 
3572 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3573 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3574 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3575 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3576 
3577 	/* Invalidate the data caches */
3578 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3579 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3580 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3581 
3582 	for (i = 0; i < usec_timeout; i++) {
3583 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3584 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3585 			INVALIDATE_DCACHE_COMPLETE))
3586 			break;
3587 		udelay(1);
3588 	}
3589 
3590 	if (i >= usec_timeout) {
3591 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3592 		return -EINVAL;
3593 	}
3594 
3595 	return 0;
3596 }
3597 
3598 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3599 {
3600 	int r;
3601 
3602 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3603 		return -EINVAL;
3604 
3605 	gfx_v11_0_cp_gfx_enable(adev, false);
3606 
3607 	if (adev->gfx.rs64_enable)
3608 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3609 	else
3610 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3611 	if (r) {
3612 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3613 		return r;
3614 	}
3615 
3616 	if (adev->gfx.rs64_enable)
3617 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3618 	else
3619 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3620 	if (r) {
3621 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3622 		return r;
3623 	}
3624 
3625 	return 0;
3626 }
3627 
3628 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3629 {
3630 	struct amdgpu_ring *ring;
3631 	const struct cs_section_def *sect = NULL;
3632 	const struct cs_extent_def *ext = NULL;
3633 	int r, i;
3634 	int ctx_reg_offset;
3635 
3636 	/* init the CP */
3637 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3638 		     adev->gfx.config.max_hw_contexts - 1);
3639 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3640 
3641 	if (!amdgpu_async_gfx_ring)
3642 		gfx_v11_0_cp_gfx_enable(adev, true);
3643 
3644 	ring = &adev->gfx.gfx_ring[0];
3645 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3646 	if (r) {
3647 		drm_err(&adev->ddev, "cp failed to lock ring (%d).\n", r);
3648 		return r;
3649 	}
3650 
3651 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3652 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3653 
3654 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3655 	amdgpu_ring_write(ring, 0x80000000);
3656 	amdgpu_ring_write(ring, 0x80000000);
3657 
3658 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3659 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3660 			if (sect->id == SECT_CONTEXT) {
3661 				amdgpu_ring_write(ring,
3662 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3663 							  ext->reg_count));
3664 				amdgpu_ring_write(ring, ext->reg_index -
3665 						  PACKET3_SET_CONTEXT_REG_START);
3666 				for (i = 0; i < ext->reg_count; i++)
3667 					amdgpu_ring_write(ring, ext->extent[i]);
3668 			}
3669 		}
3670 	}
3671 
3672 	ctx_reg_offset =
3673 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3674 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3675 	amdgpu_ring_write(ring, ctx_reg_offset);
3676 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3677 
3678 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3679 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3680 
3681 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3682 	amdgpu_ring_write(ring, 0);
3683 
3684 	amdgpu_ring_commit(ring);
3685 
3686 	/* submit cs packet to copy state 0 to next available state */
3687 	if (adev->gfx.num_gfx_rings > 1) {
3688 		/* maximum supported gfx ring is 2 */
3689 		ring = &adev->gfx.gfx_ring[1];
3690 		r = amdgpu_ring_alloc(ring, 2);
3691 		if (r) {
3692 			drm_err(adev_to_drm(adev), "cp failed to lock ring (%d).\n", r);
3693 			return r;
3694 		}
3695 
3696 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3697 		amdgpu_ring_write(ring, 0);
3698 
3699 		amdgpu_ring_commit(ring);
3700 	}
3701 	return 0;
3702 }
3703 
3704 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3705 					 CP_PIPE_ID pipe)
3706 {
3707 	u32 tmp;
3708 
3709 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3710 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3711 
3712 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3713 }
3714 
3715 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3716 					  struct amdgpu_ring *ring)
3717 {
3718 	u32 tmp;
3719 
3720 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3721 	if (ring->use_doorbell) {
3722 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3723 				    DOORBELL_OFFSET, ring->doorbell_index);
3724 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3725 				    DOORBELL_EN, 1);
3726 	} else {
3727 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3728 				    DOORBELL_EN, 0);
3729 	}
3730 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3731 
3732 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3733 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3734 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3735 
3736 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3737 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3738 }
3739 
3740 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3741 {
3742 	struct amdgpu_ring *ring;
3743 	u32 tmp;
3744 	u32 rb_bufsz;
3745 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3746 
3747 	/* Set the write pointer delay */
3748 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3749 
3750 	/* set the RB to use vmid 0 */
3751 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3752 
3753 	/* Init gfx ring 0 for pipe 0 */
3754 	mutex_lock(&adev->srbm_mutex);
3755 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3756 
3757 	/* Set ring buffer size */
3758 	ring = &adev->gfx.gfx_ring[0];
3759 	rb_bufsz = order_base_2(ring->ring_size / 8);
3760 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3761 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3762 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3763 
3764 	/* Initialize the ring buffer's write pointers */
3765 	ring->wptr = 0;
3766 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3767 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3768 
3769 	/* set the wb address whether it's enabled or not */
3770 	rptr_addr = ring->rptr_gpu_addr;
3771 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3772 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3773 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3774 
3775 	wptr_gpu_addr = ring->wptr_gpu_addr;
3776 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3777 		     lower_32_bits(wptr_gpu_addr));
3778 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3779 		     upper_32_bits(wptr_gpu_addr));
3780 
3781 	mdelay(1);
3782 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3783 
3784 	rb_addr = ring->gpu_addr >> 8;
3785 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3786 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3787 
3788 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3789 
3790 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3791 	mutex_unlock(&adev->srbm_mutex);
3792 
3793 	/* Init gfx ring 1 for pipe 1 */
3794 	if (adev->gfx.num_gfx_rings > 1) {
3795 		mutex_lock(&adev->srbm_mutex);
3796 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3797 		/* maximum supported gfx ring is 2 */
3798 		ring = &adev->gfx.gfx_ring[1];
3799 		rb_bufsz = order_base_2(ring->ring_size / 8);
3800 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3801 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3802 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3803 		/* Initialize the ring buffer's write pointers */
3804 		ring->wptr = 0;
3805 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3806 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3807 		/* Set the wb address whether it's enabled or not */
3808 		rptr_addr = ring->rptr_gpu_addr;
3809 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3810 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3811 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3812 		wptr_gpu_addr = ring->wptr_gpu_addr;
3813 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3814 			     lower_32_bits(wptr_gpu_addr));
3815 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3816 			     upper_32_bits(wptr_gpu_addr));
3817 
3818 		mdelay(1);
3819 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3820 
3821 		rb_addr = ring->gpu_addr >> 8;
3822 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3823 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3824 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3825 
3826 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3827 		mutex_unlock(&adev->srbm_mutex);
3828 	}
3829 	/* Switch to pipe 0 */
3830 	mutex_lock(&adev->srbm_mutex);
3831 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3832 	mutex_unlock(&adev->srbm_mutex);
3833 
3834 	/* start the ring */
3835 	gfx_v11_0_cp_gfx_start(adev);
3836 
3837 	return 0;
3838 }
3839 
3840 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3841 {
3842 	u32 data;
3843 
3844 	if (adev->gfx.rs64_enable) {
3845 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3846 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3847 							 enable ? 0 : 1);
3848 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3849 							 enable ? 0 : 1);
3850 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3851 							 enable ? 0 : 1);
3852 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3853 							 enable ? 0 : 1);
3854 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3855 							 enable ? 0 : 1);
3856 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3857 							 enable ? 1 : 0);
3858 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3859 				                         enable ? 1 : 0);
3860 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3861 							 enable ? 1 : 0);
3862 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3863 							 enable ? 1 : 0);
3864 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3865 							 enable ? 0 : 1);
3866 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3867 	} else {
3868 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3869 
3870 		if (enable) {
3871 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3872 			if (!adev->enable_mes_kiq)
3873 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3874 						     MEC_ME2_HALT, 0);
3875 		} else {
3876 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3877 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3878 		}
3879 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3880 	}
3881 
3882 	udelay(50);
3883 }
3884 
3885 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3886 {
3887 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3888 	const __le32 *fw_data;
3889 	unsigned i, fw_size;
3890 	u32 *fw = NULL;
3891 	int r;
3892 
3893 	if (!adev->gfx.mec_fw)
3894 		return -EINVAL;
3895 
3896 	gfx_v11_0_cp_compute_enable(adev, false);
3897 
3898 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3899 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3900 
3901 	fw_data = (const __le32 *)
3902 		(adev->gfx.mec_fw->data +
3903 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3904 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3905 
3906 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3907 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3908 					  &adev->gfx.mec.mec_fw_obj,
3909 					  &adev->gfx.mec.mec_fw_gpu_addr,
3910 					  (void **)&fw);
3911 	if (r) {
3912 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3913 		gfx_v11_0_mec_fini(adev);
3914 		return r;
3915 	}
3916 
3917 	memcpy(fw, fw_data, fw_size);
3918 
3919 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3920 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3921 
3922 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3923 
3924 	/* MEC1 */
3925 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3926 
3927 	for (i = 0; i < mec_hdr->jt_size; i++)
3928 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3929 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3930 
3931 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3932 
3933 	return 0;
3934 }
3935 
3936 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3937 {
3938 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3939 	const __le32 *fw_ucode, *fw_data;
3940 	u32 tmp, fw_ucode_size, fw_data_size;
3941 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3942 	u32 *fw_ucode_ptr, *fw_data_ptr;
3943 	int r;
3944 
3945 	if (!adev->gfx.mec_fw)
3946 		return -EINVAL;
3947 
3948 	gfx_v11_0_cp_compute_enable(adev, false);
3949 
3950 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3951 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3952 
3953 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3954 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3955 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3956 
3957 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3958 				le32_to_cpu(mec_hdr->data_offset_bytes));
3959 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3960 
3961 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3962 				      64 * 1024,
3963 				      AMDGPU_GEM_DOMAIN_VRAM |
3964 				      AMDGPU_GEM_DOMAIN_GTT,
3965 				      &adev->gfx.mec.mec_fw_obj,
3966 				      &adev->gfx.mec.mec_fw_gpu_addr,
3967 				      (void **)&fw_ucode_ptr);
3968 	if (r) {
3969 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3970 		gfx_v11_0_mec_fini(adev);
3971 		return r;
3972 	}
3973 
3974 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3975 				      64 * 1024,
3976 				      AMDGPU_GEM_DOMAIN_VRAM |
3977 				      AMDGPU_GEM_DOMAIN_GTT,
3978 				      &adev->gfx.mec.mec_fw_data_obj,
3979 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3980 				      (void **)&fw_data_ptr);
3981 	if (r) {
3982 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3983 		gfx_v11_0_mec_fini(adev);
3984 		return r;
3985 	}
3986 
3987 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3988 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3989 
3990 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3991 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3992 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3993 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3994 
3995 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3996 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3997 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3998 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3999 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
4000 
4001 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
4002 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
4003 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
4004 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
4005 
4006 	mutex_lock(&adev->srbm_mutex);
4007 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
4008 		soc21_grbm_select(adev, 1, i, 0, 0);
4009 
4010 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
4011 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
4012 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
4013 
4014 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
4015 					mec_hdr->ucode_start_addr_lo >> 2 |
4016 					mec_hdr->ucode_start_addr_hi << 30);
4017 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
4018 					mec_hdr->ucode_start_addr_hi >> 2);
4019 
4020 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
4021 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
4022 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
4023 	}
4024 	mutex_unlock(&adev->srbm_mutex);
4025 	soc21_grbm_select(adev, 0, 0, 0, 0);
4026 
4027 	/* Trigger an invalidation of the L1 instruction caches */
4028 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
4029 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
4030 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
4031 
4032 	/* Wait for invalidation complete */
4033 	for (i = 0; i < usec_timeout; i++) {
4034 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
4035 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
4036 				       INVALIDATE_DCACHE_COMPLETE))
4037 			break;
4038 		udelay(1);
4039 	}
4040 
4041 	if (i >= usec_timeout) {
4042 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
4043 		return -EINVAL;
4044 	}
4045 
4046 	/* Trigger an invalidation of the L1 instruction caches */
4047 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
4048 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
4049 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
4050 
4051 	/* Wait for invalidation complete */
4052 	for (i = 0; i < usec_timeout; i++) {
4053 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
4054 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
4055 				       INVALIDATE_CACHE_COMPLETE))
4056 			break;
4057 		udelay(1);
4058 	}
4059 
4060 	if (i >= usec_timeout) {
4061 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
4062 		return -EINVAL;
4063 	}
4064 
4065 	return 0;
4066 }
4067 
4068 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
4069 {
4070 	uint32_t tmp;
4071 	struct amdgpu_device *adev = ring->adev;
4072 
4073 	/* tell RLC which is KIQ queue */
4074 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
4075 	tmp &= 0xffffff00;
4076 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4077 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
4078 }
4079 
4080 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
4081 {
4082 	/* set graphics engine doorbell range */
4083 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
4084 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
4085 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
4086 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
4087 
4088 	/* set compute engine doorbell range */
4089 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4090 		     (adev->doorbell_index.kiq * 2) << 2);
4091 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4092 		     (adev->doorbell_index.userqueue_end * 2) << 2);
4093 }
4094 
4095 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
4096 					   struct v11_gfx_mqd *mqd,
4097 					   struct amdgpu_mqd_prop *prop)
4098 {
4099 	bool priority = 0;
4100 	u32 tmp;
4101 
4102 	/* set up default queue priority level
4103 	 * 0x0 = low priority, 0x1 = high priority
4104 	 */
4105 	if (prop->hqd_queue_priority == AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM)
4106 		priority = 1;
4107 
4108 	tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
4109 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
4110 	mqd->cp_gfx_hqd_queue_priority = tmp;
4111 }
4112 
4113 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
4114 				  struct amdgpu_mqd_prop *prop)
4115 {
4116 	struct v11_gfx_mqd *mqd = m;
4117 	uint64_t hqd_gpu_addr, wb_gpu_addr;
4118 	uint32_t tmp;
4119 	uint32_t rb_bufsz;
4120 
4121 	/* set up gfx hqd wptr */
4122 	mqd->cp_gfx_hqd_wptr = 0;
4123 	mqd->cp_gfx_hqd_wptr_hi = 0;
4124 
4125 	/* set the pointer to the MQD */
4126 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
4127 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4128 
4129 	/* set up mqd control */
4130 	tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
4131 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
4132 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
4133 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
4134 	mqd->cp_gfx_mqd_control = tmp;
4135 
4136 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
4137 	tmp = regCP_GFX_HQD_VMID_DEFAULT;
4138 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
4139 	mqd->cp_gfx_hqd_vmid = 0;
4140 
4141 	/* set up gfx queue priority */
4142 	gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
4143 
4144 	/* set up time quantum */
4145 	tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
4146 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
4147 	mqd->cp_gfx_hqd_quantum = tmp;
4148 
4149 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
4150 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4151 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
4152 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
4153 
4154 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
4155 	wb_gpu_addr = prop->rptr_gpu_addr;
4156 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
4157 	mqd->cp_gfx_hqd_rptr_addr_hi =
4158 		upper_32_bits(wb_gpu_addr) & 0xffff;
4159 
4160 	/* set up rb_wptr_poll addr */
4161 	wb_gpu_addr = prop->wptr_gpu_addr;
4162 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4163 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4164 
4165 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
4166 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
4167 	tmp = regCP_GFX_HQD_CNTL_DEFAULT;
4168 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
4169 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
4170 #ifdef __BIG_ENDIAN
4171 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
4172 #endif
4173 	if (prop->tmz_queue)
4174 		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1);
4175 	if (!prop->kernel_queue)
4176 		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);
4177 	mqd->cp_gfx_hqd_cntl = tmp;
4178 
4179 	/* set up cp_doorbell_control */
4180 	tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
4181 	if (prop->use_doorbell) {
4182 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4183 				    DOORBELL_OFFSET, prop->doorbell_index);
4184 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4185 				    DOORBELL_EN, 1);
4186 	} else
4187 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4188 				    DOORBELL_EN, 0);
4189 	mqd->cp_rb_doorbell_control = tmp;
4190 
4191 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4192 	mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
4193 
4194 	/* active the queue */
4195 	mqd->cp_gfx_hqd_active = 1;
4196 
4197 	/* set gfx UQ items */
4198 	mqd->shadow_base_lo = lower_32_bits(prop->shadow_addr);
4199 	mqd->shadow_base_hi = upper_32_bits(prop->shadow_addr);
4200 	mqd->gds_bkup_base_lo = lower_32_bits(prop->gds_bkup_addr);
4201 	mqd->gds_bkup_base_hi = upper_32_bits(prop->gds_bkup_addr);
4202 	mqd->fw_work_area_base_lo = lower_32_bits(prop->csa_addr);
4203 	mqd->fw_work_area_base_hi = upper_32_bits(prop->csa_addr);
4204 	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
4205 	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
4206 
4207 	return 0;
4208 }
4209 
4210 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
4211 {
4212 	struct amdgpu_device *adev = ring->adev;
4213 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
4214 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
4215 
4216 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4217 		memset((void *)mqd, 0, sizeof(*mqd));
4218 		mutex_lock(&adev->srbm_mutex);
4219 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4220 		amdgpu_ring_init_mqd(ring);
4221 		soc21_grbm_select(adev, 0, 0, 0, 0);
4222 		mutex_unlock(&adev->srbm_mutex);
4223 		if (adev->gfx.me.mqd_backup[mqd_idx])
4224 			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4225 	} else {
4226 		/* restore mqd with the backup copy */
4227 		if (adev->gfx.me.mqd_backup[mqd_idx])
4228 			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
4229 		/* reset the ring */
4230 		ring->wptr = 0;
4231 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4232 		amdgpu_ring_clear_ring(ring);
4233 	}
4234 
4235 	return 0;
4236 }
4237 
4238 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
4239 {
4240 	int r, i;
4241 
4242 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4243 		r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
4244 		if (r)
4245 			return r;
4246 	}
4247 
4248 	r = amdgpu_gfx_enable_kgq(adev, 0);
4249 	if (r)
4250 		return r;
4251 
4252 	return gfx_v11_0_cp_gfx_start(adev);
4253 }
4254 
4255 static void gfx_v11_0_compute_mqd_set_cu_mask(struct amdgpu_device *adev,
4256 					      struct v11_compute_mqd *mqd,
4257 					      struct amdgpu_mqd_prop *prop)
4258 {
4259 	uint32_t se_mask[8] = {0};
4260 	uint32_t wa_mask;
4261 	bool has_wa_flag = prop->cu_flags & (AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE |
4262 					  AMDGPU_UPDATE_FLAG_DBG_WA_DISABLE);
4263 
4264 	if (!has_wa_flag && (!prop->cu_mask || !prop->cu_mask_count))
4265 		return;
4266 
4267 	if (has_wa_flag) {
4268 		wa_mask = (prop->cu_flags & AMDGPU_UPDATE_FLAG_DBG_WA_ENABLE) ?
4269 			  0xffff : 0xffffffff;
4270 		mqd->compute_static_thread_mgmt_se0 = wa_mask;
4271 		mqd->compute_static_thread_mgmt_se1 = wa_mask;
4272 		mqd->compute_static_thread_mgmt_se2 = wa_mask;
4273 		mqd->compute_static_thread_mgmt_se3 = wa_mask;
4274 		return;
4275 	}
4276 
4277 	amdgpu_gfx_mqd_symmetrically_map_cu_mask(adev, prop->cu_mask,
4278 						prop->cu_mask_count, se_mask);
4279 
4280 	mqd->compute_static_thread_mgmt_se0 = se_mask[0];
4281 	mqd->compute_static_thread_mgmt_se1 = se_mask[1];
4282 	mqd->compute_static_thread_mgmt_se2 = se_mask[2];
4283 	mqd->compute_static_thread_mgmt_se3 = se_mask[3];
4284 }
4285 
4286 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
4287 				      struct amdgpu_mqd_prop *prop)
4288 {
4289 	struct v11_compute_mqd *mqd = m;
4290 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4291 	uint32_t tmp;
4292 
4293 	mqd->header = 0xC0310800;
4294 	mqd->compute_pipelinestat_enable = 0x00000001;
4295 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4296 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4297 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4298 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4299 	mqd->compute_misc_reserved = 0x00000007;
4300 
4301 	eop_base_addr = prop->eop_gpu_addr >> 8;
4302 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4303 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4304 
4305 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4306 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
4307 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4308 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
4309 
4310 	mqd->cp_hqd_eop_control = tmp;
4311 
4312 	/* enable doorbell? */
4313 	tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
4314 
4315 	if (prop->use_doorbell) {
4316 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4317 				    DOORBELL_OFFSET, prop->doorbell_index);
4318 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4319 				    DOORBELL_EN, 1);
4320 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4321 				    DOORBELL_SOURCE, 0);
4322 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4323 				    DOORBELL_HIT, 0);
4324 	} else {
4325 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4326 				    DOORBELL_EN, 0);
4327 	}
4328 
4329 	mqd->cp_hqd_pq_doorbell_control = tmp;
4330 
4331 	/* disable the queue if it's active */
4332 	mqd->cp_hqd_dequeue_request = 0;
4333 	mqd->cp_hqd_pq_rptr = 0;
4334 	mqd->cp_hqd_pq_wptr_lo = 0;
4335 	mqd->cp_hqd_pq_wptr_hi = 0;
4336 
4337 	/* set the pointer to the MQD */
4338 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
4339 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4340 
4341 	/* set MQD vmid to 0 */
4342 	tmp = regCP_MQD_CONTROL_DEFAULT;
4343 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4344 	mqd->cp_mqd_control = tmp;
4345 
4346 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4347 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4348 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4349 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4350 
4351 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4352 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
4353 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4354 			    (order_base_2(prop->queue_size / 4) - 1));
4355 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4356 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4357 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
4358 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
4359 			    prop->allow_tunneling);
4360 	if (prop->kernel_queue) {
4361 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4362 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4363 	}
4364 	if (prop->tmz_queue)
4365 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);
4366 	mqd->cp_hqd_pq_control = tmp;
4367 
4368 	/* set the wb address whether it's enabled or not */
4369 	wb_gpu_addr = prop->rptr_gpu_addr;
4370 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4371 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4372 		upper_32_bits(wb_gpu_addr) & 0xffff;
4373 
4374 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4375 	wb_gpu_addr = prop->wptr_gpu_addr;
4376 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4377 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4378 
4379 	tmp = 0;
4380 	/* enable the doorbell if requested */
4381 	if (prop->use_doorbell) {
4382 		tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
4383 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4384 				DOORBELL_OFFSET, prop->doorbell_index);
4385 
4386 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4387 				    DOORBELL_EN, 1);
4388 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4389 				    DOORBELL_SOURCE, 0);
4390 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4391 				    DOORBELL_HIT, 0);
4392 	}
4393 
4394 	mqd->cp_hqd_pq_doorbell_control = tmp;
4395 
4396 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4397 	mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
4398 
4399 	/* set the vmid for the queue */
4400 	mqd->cp_hqd_vmid = 0;
4401 
4402 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
4403 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4404 	mqd->cp_hqd_persistent_state = tmp;
4405 
4406 	/* set MIN_IB_AVAIL_SIZE */
4407 	tmp = regCP_HQD_IB_CONTROL_DEFAULT;
4408 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4409 	mqd->cp_hqd_ib_control = tmp;
4410 
4411 	/* set static priority for a compute queue/ring */
4412 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4413 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4414 
4415 	mqd->cp_hqd_active = prop->hqd_active;
4416 
4417 	/* set UQ fenceaddress */
4418 	mqd->fence_address_lo = lower_32_bits(prop->fence_address);
4419 	mqd->fence_address_hi = upper_32_bits(prop->fence_address);
4420 	/* set CU mask */
4421 	gfx_v11_0_compute_mqd_set_cu_mask(adev, mqd, prop);
4422 
4423 	return 0;
4424 }
4425 
4426 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4427 {
4428 	struct amdgpu_device *adev = ring->adev;
4429 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4430 	int j;
4431 
4432 	/* inactivate the queue */
4433 	if (amdgpu_sriov_vf(adev))
4434 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4435 
4436 	/* disable wptr polling */
4437 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4438 
4439 	/* write the EOP addr */
4440 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4441 	       mqd->cp_hqd_eop_base_addr_lo);
4442 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4443 	       mqd->cp_hqd_eop_base_addr_hi);
4444 
4445 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4446 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4447 	       mqd->cp_hqd_eop_control);
4448 
4449 	/* enable doorbell? */
4450 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4451 	       mqd->cp_hqd_pq_doorbell_control);
4452 
4453 	/* disable the queue if it's active */
4454 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4455 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4456 		for (j = 0; j < adev->usec_timeout; j++) {
4457 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4458 				break;
4459 			udelay(1);
4460 		}
4461 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4462 		       mqd->cp_hqd_dequeue_request);
4463 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4464 		       mqd->cp_hqd_pq_rptr);
4465 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4466 		       mqd->cp_hqd_pq_wptr_lo);
4467 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4468 		       mqd->cp_hqd_pq_wptr_hi);
4469 	}
4470 
4471 	/* set the pointer to the MQD */
4472 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4473 	       mqd->cp_mqd_base_addr_lo);
4474 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4475 	       mqd->cp_mqd_base_addr_hi);
4476 
4477 	/* set MQD vmid to 0 */
4478 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4479 	       mqd->cp_mqd_control);
4480 
4481 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4482 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4483 	       mqd->cp_hqd_pq_base_lo);
4484 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4485 	       mqd->cp_hqd_pq_base_hi);
4486 
4487 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4488 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4489 	       mqd->cp_hqd_pq_control);
4490 
4491 	/* set the wb address whether it's enabled or not */
4492 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4493 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4494 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4495 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4496 
4497 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4498 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4499 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4500 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4501 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4502 
4503 	/* enable the doorbell if requested */
4504 	if (ring->use_doorbell) {
4505 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4506 			(adev->doorbell_index.kiq * 2) << 2);
4507 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4508 			(adev->doorbell_index.userqueue_end * 2) << 2);
4509 	}
4510 
4511 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4512 	       mqd->cp_hqd_pq_doorbell_control);
4513 
4514 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4515 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4516 	       mqd->cp_hqd_pq_wptr_lo);
4517 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4518 	       mqd->cp_hqd_pq_wptr_hi);
4519 
4520 	/* set the vmid for the queue */
4521 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4522 
4523 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4524 	       mqd->cp_hqd_persistent_state);
4525 
4526 	/* activate the queue */
4527 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4528 	       mqd->cp_hqd_active);
4529 
4530 	if (ring->use_doorbell)
4531 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4532 
4533 	return 0;
4534 }
4535 
4536 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4537 {
4538 	struct amdgpu_device *adev = ring->adev;
4539 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4540 
4541 	gfx_v11_0_kiq_setting(ring);
4542 
4543 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4544 		/* reset MQD to a clean status */
4545 		if (adev->gfx.kiq[0].mqd_backup)
4546 			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4547 
4548 		/* reset ring buffer */
4549 		ring->wptr = 0;
4550 		amdgpu_ring_clear_ring(ring);
4551 
4552 		mutex_lock(&adev->srbm_mutex);
4553 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4554 		gfx_v11_0_kiq_init_register(ring);
4555 		soc21_grbm_select(adev, 0, 0, 0, 0);
4556 		mutex_unlock(&adev->srbm_mutex);
4557 	} else {
4558 		memset((void *)mqd, 0, sizeof(*mqd));
4559 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4560 			amdgpu_ring_clear_ring(ring);
4561 		mutex_lock(&adev->srbm_mutex);
4562 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4563 		amdgpu_ring_init_mqd(ring);
4564 		gfx_v11_0_kiq_init_register(ring);
4565 		soc21_grbm_select(adev, 0, 0, 0, 0);
4566 		mutex_unlock(&adev->srbm_mutex);
4567 
4568 		if (adev->gfx.kiq[0].mqd_backup)
4569 			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4570 	}
4571 
4572 	return 0;
4573 }
4574 
4575 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
4576 {
4577 	struct amdgpu_device *adev = ring->adev;
4578 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4579 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4580 
4581 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4582 		memset((void *)mqd, 0, sizeof(*mqd));
4583 		mutex_lock(&adev->srbm_mutex);
4584 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4585 		amdgpu_ring_init_mqd(ring);
4586 		soc21_grbm_select(adev, 0, 0, 0, 0);
4587 		mutex_unlock(&adev->srbm_mutex);
4588 
4589 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4590 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4591 	} else {
4592 		/* restore MQD to a clean status */
4593 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4594 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4595 		/* reset ring buffer */
4596 		ring->wptr = 0;
4597 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4598 		amdgpu_ring_clear_ring(ring);
4599 	}
4600 
4601 	return 0;
4602 }
4603 
4604 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4605 {
4606 	gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
4607 	return 0;
4608 }
4609 
4610 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4611 {
4612 	int i, r;
4613 
4614 	if (!amdgpu_async_gfx_ring)
4615 		gfx_v11_0_cp_compute_enable(adev, true);
4616 
4617 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4618 		r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
4619 		if (r)
4620 			return r;
4621 	}
4622 
4623 	return amdgpu_gfx_enable_kcq(adev, 0);
4624 }
4625 
4626 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4627 {
4628 	int r, i;
4629 	struct amdgpu_ring *ring;
4630 
4631 	if (!(adev->flags & AMD_IS_APU))
4632 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4633 
4634 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4635 		/* legacy firmware loading */
4636 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4637 		if (r)
4638 			return r;
4639 
4640 		if (adev->gfx.rs64_enable)
4641 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4642 		else
4643 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4644 		if (r)
4645 			return r;
4646 	}
4647 
4648 	gfx_v11_0_cp_set_doorbell_range(adev);
4649 
4650 	if (amdgpu_async_gfx_ring) {
4651 		gfx_v11_0_cp_compute_enable(adev, true);
4652 		gfx_v11_0_cp_gfx_enable(adev, true);
4653 	}
4654 
4655 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4656 		r = amdgpu_mes_kiq_hw_init(adev, 0);
4657 	else
4658 		r = gfx_v11_0_kiq_resume(adev);
4659 	if (r)
4660 		return r;
4661 
4662 	r = gfx_v11_0_kcq_resume(adev);
4663 	if (r)
4664 		return r;
4665 
4666 	if (!amdgpu_async_gfx_ring) {
4667 		r = gfx_v11_0_cp_gfx_resume(adev);
4668 		if (r)
4669 			return r;
4670 	} else {
4671 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4672 		if (r)
4673 			return r;
4674 	}
4675 
4676 	if (adev->gfx.disable_kq) {
4677 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4678 			ring = &adev->gfx.gfx_ring[i];
4679 			/* we don't want to set ring->ready */
4680 			r = amdgpu_ring_test_ring(ring);
4681 			if (r)
4682 				return r;
4683 		}
4684 		if (amdgpu_async_gfx_ring)
4685 			amdgpu_gfx_disable_kgq(adev, 0);
4686 	} else {
4687 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4688 			ring = &adev->gfx.gfx_ring[i];
4689 			r = amdgpu_ring_test_helper(ring);
4690 			if (r)
4691 				return r;
4692 		}
4693 	}
4694 
4695 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4696 		ring = &adev->gfx.compute_ring[i];
4697 		r = amdgpu_ring_test_helper(ring);
4698 		if (r)
4699 			return r;
4700 	}
4701 
4702 	return 0;
4703 }
4704 
4705 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4706 {
4707 	gfx_v11_0_cp_gfx_enable(adev, enable);
4708 	gfx_v11_0_cp_compute_enable(adev, enable);
4709 }
4710 
4711 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4712 {
4713 	int r;
4714 	bool value;
4715 
4716 	r = adev->gfxhub.funcs->gart_enable(adev);
4717 	if (r)
4718 		return r;
4719 
4720 	amdgpu_device_flush_hdp(adev, NULL);
4721 
4722 	value = amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS;
4723 
4724 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4725 	/* TODO investigate why this and the hdp flush above is needed,
4726 	 * are we missing a flush somewhere else? */
4727 	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4728 
4729 	return 0;
4730 }
4731 
4732 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4733 {
4734 	u32 tmp;
4735 
4736 	/* select RS64 */
4737 	if (adev->gfx.rs64_enable) {
4738 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4739 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4740 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4741 
4742 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4743 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4744 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4745 	}
4746 
4747 	if (amdgpu_emu_mode == 1)
4748 		msleep(100);
4749 }
4750 
4751 static int get_gb_addr_config(struct amdgpu_device * adev)
4752 {
4753 	u32 gb_addr_config;
4754 
4755 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4756 	if (gb_addr_config == 0)
4757 		return -EINVAL;
4758 
4759 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4760 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4761 
4762 	adev->gfx.config.gb_addr_config = gb_addr_config;
4763 
4764 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4765 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4766 				      GB_ADDR_CONFIG, NUM_PIPES);
4767 
4768 	adev->gfx.config.max_tile_pipes =
4769 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4770 
4771 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4772 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4773 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4774 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4775 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4776 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4777 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4778 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4779 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4780 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4781 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4782 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4783 
4784 	return 0;
4785 }
4786 
4787 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4788 {
4789 	uint32_t data;
4790 
4791 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4792 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4793 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4794 
4795 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4796 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4797 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4798 }
4799 
4800 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
4801 {
4802 	int r;
4803 	struct amdgpu_device *adev = ip_block->adev;
4804 
4805 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4806 				       adev->gfx.cleaner_shader_ptr);
4807 
4808 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4809 		if (adev->gfx.imu.funcs) {
4810 			/* RLC autoload sequence 1: Program rlc ram */
4811 			if (adev->gfx.imu.funcs->program_rlc_ram)
4812 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4813 			/* rlc autoload firmware */
4814 			r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4815 			if (r)
4816 				return r;
4817 		}
4818 	} else {
4819 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4820 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4821 				if (adev->gfx.imu.funcs->load_microcode)
4822 					adev->gfx.imu.funcs->load_microcode(adev);
4823 				if (adev->gfx.imu.funcs->setup_imu)
4824 					adev->gfx.imu.funcs->setup_imu(adev);
4825 				if (adev->gfx.imu.funcs->start_imu)
4826 					adev->gfx.imu.funcs->start_imu(adev);
4827 			}
4828 
4829 			/* disable gpa mode in backdoor loading */
4830 			gfx_v11_0_disable_gpa_mode(adev);
4831 		}
4832 	}
4833 
4834 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4835 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4836 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4837 		if (r) {
4838 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4839 			return r;
4840 		}
4841 	}
4842 
4843 	adev->gfx.is_poweron = true;
4844 
4845 	if(get_gb_addr_config(adev))
4846 		drm_warn(adev_to_drm(adev), "Invalid gb_addr_config !\n");
4847 
4848 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4849 	    adev->gfx.rs64_enable)
4850 		gfx_v11_0_config_gfx_rs64(adev);
4851 
4852 	r = gfx_v11_0_gfxhub_enable(adev);
4853 	if (r)
4854 		return r;
4855 
4856 	if (!amdgpu_emu_mode)
4857 		gfx_v11_0_init_golden_registers(adev);
4858 
4859 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4860 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4861 		/**
4862 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4863 		 * loaded firstly, so in direct type, it has to load smc ucode
4864 		 * here before rlc.
4865 		 */
4866 		r = amdgpu_pm_load_smu_firmware(adev, NULL);
4867 		if (r)
4868 			return r;
4869 	}
4870 
4871 	gfx_v11_0_constants_init(adev);
4872 
4873 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4874 		gfx_v11_0_select_cp_fw_arch(adev);
4875 
4876 	if (adev->nbio.funcs->gc_doorbell_init)
4877 		adev->nbio.funcs->gc_doorbell_init(adev);
4878 
4879 	r = gfx_v11_0_rlc_resume(adev);
4880 	if (r)
4881 		return r;
4882 
4883 	/*
4884 	 * init golden registers and rlc resume may override some registers,
4885 	 * reconfig them here
4886 	 */
4887 	gfx_v11_0_tcp_harvest(adev);
4888 
4889 	r = gfx_v11_0_cp_resume(adev);
4890 	if (r)
4891 		return r;
4892 
4893 	/* get IMU version from HW if it's not set */
4894 	if (!adev->gfx.imu_fw_version)
4895 		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4896 
4897 	return r;
4898 }
4899 
4900 static int gfx_v11_0_set_userq_eop_interrupts(struct amdgpu_device *adev,
4901 					      bool enable)
4902 {
4903 	unsigned int irq_type;
4904 	int m, p, r;
4905 
4906 	if (adev->userq_funcs[AMDGPU_HW_IP_GFX]) {
4907 		for (m = 0; m < adev->gfx.me.num_me; m++) {
4908 			for (p = 0; p < adev->gfx.me.num_pipe_per_me; p++) {
4909 				irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + p;
4910 				if (enable)
4911 					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
4912 							   irq_type);
4913 				else
4914 					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
4915 							   irq_type);
4916 				if (r)
4917 					return r;
4918 			}
4919 		}
4920 	}
4921 
4922 	if (adev->userq_funcs[AMDGPU_HW_IP_COMPUTE]) {
4923 		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
4924 			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
4925 				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
4926 					+ (m * adev->gfx.mec.num_pipe_per_mec)
4927 					+ p;
4928 				if (enable)
4929 					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
4930 							   irq_type);
4931 				else
4932 					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
4933 							   irq_type);
4934 				if (r)
4935 					return r;
4936 			}
4937 		}
4938 	}
4939 
4940 	return 0;
4941 }
4942 
4943 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
4944 {
4945 	struct amdgpu_device *adev = ip_block->adev;
4946 
4947 	cancel_delayed_work_sync(&adev->gfx.idle_work);
4948 
4949 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4950 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4951 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4952 	gfx_v11_0_set_userq_eop_interrupts(adev, false);
4953 
4954 	if (!adev->no_hw_access) {
4955 		if (amdgpu_async_gfx_ring &&
4956 		    !adev->gfx.disable_kq) {
4957 			if (amdgpu_gfx_disable_kgq(adev, 0))
4958 				DRM_ERROR("KGQ disable failed\n");
4959 		}
4960 
4961 		if (amdgpu_gfx_disable_kcq(adev, 0))
4962 			DRM_ERROR("KCQ disable failed\n");
4963 
4964 		amdgpu_mes_kiq_hw_fini(adev, 0);
4965 	}
4966 
4967 	if (amdgpu_sriov_vf(adev))
4968 		/* Remove the steps disabling CPG and clearing KIQ position,
4969 		 * so that CP could perform IDLE-SAVE during switch. Those
4970 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
4971 		 * not reproduced on gfx11.
4972 		 */
4973 		return 0;
4974 
4975 	gfx_v11_0_cp_enable(adev, false);
4976 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4977 
4978 	adev->gfxhub.funcs->gart_disable(adev);
4979 
4980 	adev->gfx.is_poweron = false;
4981 
4982 	return 0;
4983 }
4984 
4985 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
4986 {
4987 	return gfx_v11_0_hw_fini(ip_block);
4988 }
4989 
4990 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
4991 {
4992 	return gfx_v11_0_hw_init(ip_block);
4993 }
4994 
4995 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
4996 {
4997 	struct amdgpu_device *adev = ip_block->adev;
4998 
4999 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
5000 				GRBM_STATUS, GUI_ACTIVE))
5001 		return false;
5002 	else
5003 		return true;
5004 }
5005 
5006 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
5007 {
5008 	unsigned i;
5009 	u32 tmp;
5010 	struct amdgpu_device *adev = ip_block->adev;
5011 
5012 	for (i = 0; i < adev->usec_timeout; i++) {
5013 		/* read MC_STATUS */
5014 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
5015 			GRBM_STATUS__GUI_ACTIVE_MASK;
5016 
5017 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5018 			return 0;
5019 		udelay(1);
5020 	}
5021 	return -ETIMEDOUT;
5022 }
5023 
5024 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
5025 				      bool req)
5026 {
5027 	u32 i, tmp, val;
5028 
5029 	for (i = 0; i < adev->usec_timeout; i++) {
5030 		/* Request with MeId=2, PipeId=0 */
5031 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
5032 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
5033 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
5034 
5035 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
5036 		if (req) {
5037 			if (val == tmp)
5038 				break;
5039 		} else {
5040 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
5041 					    REQUEST, 1);
5042 
5043 			/* unlocked or locked by firmware */
5044 			if (val != tmp)
5045 				break;
5046 		}
5047 		udelay(1);
5048 	}
5049 
5050 	if (i >= adev->usec_timeout)
5051 		return -EINVAL;
5052 
5053 	return 0;
5054 }
5055 
5056 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
5057 {
5058 	u32 grbm_soft_reset = 0;
5059 	u32 tmp;
5060 	int r, i, j, k;
5061 	struct amdgpu_device *adev = ip_block->adev;
5062 
5063 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5064 
5065 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5066 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
5067 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
5068 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
5069 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
5070 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
5071 
5072 	mutex_lock(&adev->srbm_mutex);
5073 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
5074 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
5075 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
5076 				soc21_grbm_select(adev, i, k, j, 0);
5077 
5078 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
5079 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
5080 			}
5081 		}
5082 	}
5083 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
5084 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
5085 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
5086 				soc21_grbm_select(adev, i, k, j, 0);
5087 
5088 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
5089 			}
5090 		}
5091 	}
5092 	soc21_grbm_select(adev, 0, 0, 0, 0);
5093 	mutex_unlock(&adev->srbm_mutex);
5094 
5095 	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
5096 	mutex_lock(&adev->gfx.reset_sem_mutex);
5097 	r = gfx_v11_0_request_gfx_index_mutex(adev, true);
5098 	if (r) {
5099 		mutex_unlock(&adev->gfx.reset_sem_mutex);
5100 		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
5101 		return r;
5102 	}
5103 
5104 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
5105 
5106 	// Read CP_VMID_RESET register three times.
5107 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
5108 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
5109 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
5110 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
5111 
5112 	/* release the gfx mutex */
5113 	r = gfx_v11_0_request_gfx_index_mutex(adev, false);
5114 	mutex_unlock(&adev->gfx.reset_sem_mutex);
5115 	if (r) {
5116 		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
5117 		return r;
5118 	}
5119 
5120 	for (i = 0; i < adev->usec_timeout; i++) {
5121 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
5122 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
5123 			break;
5124 		udelay(1);
5125 	}
5126 	if (i >= adev->usec_timeout) {
5127 		printk("Failed to wait all pipes clean\n");
5128 		return -EINVAL;
5129 	}
5130 
5131 	/**********  trigger soft reset  ***********/
5132 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
5133 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5134 					SOFT_RESET_CP, 1);
5135 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5136 					SOFT_RESET_GFX, 1);
5137 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5138 					SOFT_RESET_CPF, 1);
5139 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5140 					SOFT_RESET_CPC, 1);
5141 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5142 					SOFT_RESET_CPG, 1);
5143 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
5144 	/**********  exit soft reset  ***********/
5145 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
5146 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5147 					SOFT_RESET_CP, 0);
5148 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5149 					SOFT_RESET_GFX, 0);
5150 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5151 					SOFT_RESET_CPF, 0);
5152 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5153 					SOFT_RESET_CPC, 0);
5154 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5155 					SOFT_RESET_CPG, 0);
5156 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
5157 
5158 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
5159 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
5160 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
5161 
5162 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
5163 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
5164 
5165 	for (i = 0; i < adev->usec_timeout; i++) {
5166 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
5167 			break;
5168 		udelay(1);
5169 	}
5170 	if (i >= adev->usec_timeout) {
5171 		printk("Failed to wait CP_VMID_RESET to 0\n");
5172 		return -EINVAL;
5173 	}
5174 
5175 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5176 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5177 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5178 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5179 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5180 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
5181 
5182 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5183 
5184 	return gfx_v11_0_cp_resume(adev);
5185 }
5186 
5187 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
5188 {
5189 	int i, r;
5190 	struct amdgpu_device *adev = ip_block->adev;
5191 	struct amdgpu_ring *ring;
5192 	long tmo = msecs_to_jiffies(1000);
5193 
5194 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5195 		ring = &adev->gfx.gfx_ring[i];
5196 		r = amdgpu_ring_test_ib(ring, tmo);
5197 		if (r)
5198 			return true;
5199 	}
5200 
5201 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5202 		ring = &adev->gfx.compute_ring[i];
5203 		r = amdgpu_ring_test_ib(ring, tmo);
5204 		if (r)
5205 			return true;
5206 	}
5207 
5208 	return false;
5209 }
5210 
5211 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5212 {
5213 	struct amdgpu_device *adev = ip_block->adev;
5214 	/**
5215 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
5216 	 */
5217 	return amdgpu_mes_resume(adev);
5218 }
5219 
5220 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5221 {
5222 	uint64_t clock;
5223 	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
5224 
5225 	if (amdgpu_sriov_vf(adev)) {
5226 		amdgpu_gfx_off_ctrl(adev, false);
5227 		mutex_lock(&adev->gfx.gpu_clock_mutex);
5228 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5229 		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5230 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5231 		if (clock_counter_hi_pre != clock_counter_hi_after)
5232 			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5233 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
5234 		amdgpu_gfx_off_ctrl(adev, true);
5235 	} else {
5236 		preempt_disable();
5237 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5238 		clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5239 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5240 		if (clock_counter_hi_pre != clock_counter_hi_after)
5241 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5242 		preempt_enable();
5243 	}
5244 	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
5245 
5246 	return clock;
5247 }
5248 
5249 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5250 					   uint32_t vmid,
5251 					   uint32_t gds_base, uint32_t gds_size,
5252 					   uint32_t gws_base, uint32_t gws_size,
5253 					   uint32_t oa_base, uint32_t oa_size)
5254 {
5255 	struct amdgpu_device *adev = ring->adev;
5256 
5257 	/* GDS Base */
5258 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5259 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
5260 				    gds_base);
5261 
5262 	/* GDS Size */
5263 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5264 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
5265 				    gds_size);
5266 
5267 	/* GWS */
5268 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5269 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
5270 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5271 
5272 	/* OA */
5273 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5274 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
5275 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
5276 }
5277 
5278 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
5279 {
5280 	struct amdgpu_device *adev = ip_block->adev;
5281 
5282 	switch (amdgpu_user_queue) {
5283 	case -1:
5284 	case 0:
5285 	default:
5286 		adev->gfx.disable_kq = false;
5287 		adev->gfx.disable_uq = true;
5288 		break;
5289 	case 1:
5290 		adev->gfx.disable_kq = false;
5291 		adev->gfx.disable_uq = false;
5292 		break;
5293 	case 2:
5294 		adev->gfx.disable_kq = true;
5295 		adev->gfx.disable_uq = false;
5296 		break;
5297 	}
5298 
5299 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
5300 
5301 	if (adev->gfx.disable_kq) {
5302 		/* We need one GFX ring temporarily to set up
5303 		 * the clear state.
5304 		 */
5305 		adev->gfx.num_gfx_rings = 1;
5306 		adev->gfx.num_compute_rings = 0;
5307 	} else {
5308 		adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
5309 		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5310 						  AMDGPU_MAX_COMPUTE_RINGS);
5311 	}
5312 
5313 	gfx_v11_0_set_kiq_pm4_funcs(adev);
5314 	gfx_v11_0_set_ring_funcs(adev);
5315 	gfx_v11_0_set_irq_funcs(adev);
5316 	gfx_v11_0_set_gds_init(adev);
5317 	gfx_v11_0_set_rlc_funcs(adev);
5318 	gfx_v11_0_set_mqd_funcs(adev);
5319 	gfx_v11_0_set_imu_funcs(adev);
5320 
5321 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
5322 
5323 	return gfx_v11_0_init_microcode(adev);
5324 }
5325 
5326 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
5327 {
5328 	struct amdgpu_device *adev = ip_block->adev;
5329 	int r;
5330 
5331 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5332 	if (r)
5333 		return r;
5334 
5335 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5336 	if (r)
5337 		return r;
5338 
5339 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
5340 	if (r)
5341 		return r;
5342 
5343 	r = gfx_v11_0_set_userq_eop_interrupts(adev, true);
5344 	if (r)
5345 		return r;
5346 
5347 	return 0;
5348 }
5349 
5350 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
5351 {
5352 	uint32_t rlc_cntl;
5353 
5354 	/* if RLC is not enabled, do nothing */
5355 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
5356 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
5357 }
5358 
5359 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5360 {
5361 	uint32_t data;
5362 	unsigned i;
5363 
5364 	data = RLC_SAFE_MODE__CMD_MASK;
5365 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5366 
5367 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
5368 
5369 	/* wait for RLC_SAFE_MODE */
5370 	for (i = 0; i < adev->usec_timeout; i++) {
5371 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
5372 				   RLC_SAFE_MODE, CMD))
5373 			break;
5374 		udelay(1);
5375 	}
5376 }
5377 
5378 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5379 {
5380 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
5381 }
5382 
5383 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
5384 				      bool enable)
5385 {
5386 	uint32_t def, data;
5387 
5388 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
5389 		return;
5390 
5391 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5392 
5393 	if (enable)
5394 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5395 	else
5396 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5397 
5398 	if (def != data)
5399 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5400 }
5401 
5402 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
5403 				       bool enable)
5404 {
5405 	uint32_t def, data;
5406 
5407 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
5408 		return;
5409 
5410 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5411 
5412 	if (enable)
5413 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5414 	else
5415 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5416 
5417 	if (def != data)
5418 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5419 }
5420 
5421 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
5422 					   bool enable)
5423 {
5424 	uint32_t def, data;
5425 
5426 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
5427 		return;
5428 
5429 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5430 
5431 	if (enable)
5432 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5433 	else
5434 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5435 
5436 	if (def != data)
5437 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5438 }
5439 
5440 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5441 						       bool enable)
5442 {
5443 	uint32_t data, def;
5444 
5445 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
5446 		return;
5447 
5448 	/* It is disabled by HW by default */
5449 	if (enable) {
5450 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5451 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
5452 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5453 
5454 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5455 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5456 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5457 
5458 			if (def != data)
5459 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5460 		}
5461 	} else {
5462 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5463 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5464 
5465 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5466 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5467 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5468 
5469 			if (def != data)
5470 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5471 		}
5472 	}
5473 }
5474 
5475 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5476 						       bool enable)
5477 {
5478 	uint32_t def, data;
5479 
5480 	if (!(adev->cg_flags &
5481 	      (AMD_CG_SUPPORT_GFX_CGCG |
5482 	      AMD_CG_SUPPORT_GFX_CGLS |
5483 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
5484 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
5485 		return;
5486 
5487 	if (enable) {
5488 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5489 
5490 		/* unset CGCG override */
5491 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5492 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5493 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5494 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5495 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
5496 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5497 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5498 
5499 		/* update CGCG override bits */
5500 		if (def != data)
5501 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5502 
5503 		/* enable cgcg FSM(0x0000363F) */
5504 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5505 
5506 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5507 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
5508 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5509 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5510 		}
5511 
5512 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5513 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
5514 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5515 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5516 		}
5517 
5518 		if (def != data)
5519 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5520 
5521 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5522 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5523 
5524 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5525 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5526 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5527 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5528 		}
5529 
5530 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5531 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5532 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5533 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5534 		}
5535 
5536 		if (def != data)
5537 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5538 
5539 		/* set IDLE_POLL_COUNT(0x00900100) */
5540 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5541 
5542 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5543 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5544 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5545 
5546 		if (def != data)
5547 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5548 
5549 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5550 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5551 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5552 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5553 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5554 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5555 
5556 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5557 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5558 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5559 
5560 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5561 		if (adev->sdma.num_instances > 1) {
5562 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5563 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5564 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5565 		}
5566 	} else {
5567 		/* Program RLC_CGCG_CGLS_CTRL */
5568 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5569 
5570 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5571 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5572 
5573 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5574 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5575 
5576 		if (def != data)
5577 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5578 
5579 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5580 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5581 
5582 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5583 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5584 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5585 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5586 
5587 		if (def != data)
5588 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5589 
5590 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5591 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5592 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5593 
5594 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5595 		if (adev->sdma.num_instances > 1) {
5596 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5597 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5598 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5599 		}
5600 	}
5601 }
5602 
5603 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5604 					    bool enable)
5605 {
5606 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5607 
5608 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5609 
5610 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5611 
5612 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5613 
5614 	gfx_v11_0_update_sram_fgcg(adev, enable);
5615 
5616 	gfx_v11_0_update_perf_clk(adev, enable);
5617 
5618 	if (adev->cg_flags &
5619 	    (AMD_CG_SUPPORT_GFX_MGCG |
5620 	     AMD_CG_SUPPORT_GFX_CGLS |
5621 	     AMD_CG_SUPPORT_GFX_CGCG |
5622 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5623 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5624 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5625 
5626 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5627 
5628 	return 0;
5629 }
5630 
5631 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, int xcc_id,
5632 		struct amdgpu_ring *ring, unsigned vmid)
5633 {
5634 	u32 reg, pre_data, data;
5635 
5636 	amdgpu_gfx_off_ctrl(adev, false);
5637 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5638 	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
5639 		pre_data = RREG32_NO_KIQ(reg);
5640 	else
5641 		pre_data = RREG32(reg);
5642 
5643 	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
5644 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5645 
5646 	if (pre_data != data) {
5647 		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
5648 			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5649 		} else
5650 			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5651 	}
5652 	amdgpu_gfx_off_ctrl(adev, true);
5653 
5654 	if (ring
5655 		&& amdgpu_sriov_is_pp_one_vf(adev)
5656 		&& (pre_data != data)
5657 		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
5658 			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
5659 		amdgpu_ring_emit_wreg(ring, reg, data);
5660 	}
5661 }
5662 
5663 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5664 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5665 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5666 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5667 	.init = gfx_v11_0_rlc_init,
5668 	.get_csb_size = gfx_v11_0_get_csb_size,
5669 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5670 	.resume = gfx_v11_0_rlc_resume,
5671 	.stop = gfx_v11_0_rlc_stop,
5672 	.reset = gfx_v11_0_rlc_reset,
5673 	.start = gfx_v11_0_rlc_start,
5674 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5675 };
5676 
5677 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5678 {
5679 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5680 
5681 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5682 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5683 	else
5684 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5685 
5686 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5687 
5688 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5689 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5690 		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5691 		case IP_VERSION(11, 0, 1):
5692 		case IP_VERSION(11, 0, 4):
5693 		case IP_VERSION(11, 5, 0):
5694 		case IP_VERSION(11, 5, 1):
5695 		case IP_VERSION(11, 5, 2):
5696 		case IP_VERSION(11, 5, 3):
5697 	        case IP_VERSION(11, 5, 4):
5698 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5699 			break;
5700 		default:
5701 			break;
5702 		}
5703 	}
5704 }
5705 
5706 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5707 {
5708 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5709 
5710 	gfx_v11_cntl_power_gating(adev, enable);
5711 
5712 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5713 }
5714 
5715 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5716 					   enum amd_powergating_state state)
5717 {
5718 	struct amdgpu_device *adev = ip_block->adev;
5719 	bool enable = (state == AMD_PG_STATE_GATE);
5720 
5721 	if (amdgpu_sriov_vf(adev))
5722 		return 0;
5723 
5724 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5725 	case IP_VERSION(11, 0, 0):
5726 	case IP_VERSION(11, 0, 2):
5727 	case IP_VERSION(11, 0, 3):
5728 		amdgpu_gfx_off_ctrl(adev, enable);
5729 		break;
5730 	case IP_VERSION(11, 0, 1):
5731 	case IP_VERSION(11, 0, 4):
5732 	case IP_VERSION(11, 5, 0):
5733 	case IP_VERSION(11, 5, 1):
5734 	case IP_VERSION(11, 5, 2):
5735 	case IP_VERSION(11, 5, 3):
5736 	case IP_VERSION(11, 5, 4):
5737 		if (!enable)
5738 			amdgpu_gfx_off_ctrl(adev, false);
5739 
5740 		gfx_v11_cntl_pg(adev, enable);
5741 
5742 		if (enable)
5743 			amdgpu_gfx_off_ctrl(adev, true);
5744 
5745 		break;
5746 	default:
5747 		break;
5748 	}
5749 
5750 	return 0;
5751 }
5752 
5753 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5754 					  enum amd_clockgating_state state)
5755 {
5756 	struct amdgpu_device *adev = ip_block->adev;
5757 
5758 	if (amdgpu_sriov_vf(adev))
5759 	        return 0;
5760 
5761 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5762 	case IP_VERSION(11, 0, 0):
5763 	case IP_VERSION(11, 0, 1):
5764 	case IP_VERSION(11, 0, 2):
5765 	case IP_VERSION(11, 0, 3):
5766 	case IP_VERSION(11, 0, 4):
5767 	case IP_VERSION(11, 5, 0):
5768 	case IP_VERSION(11, 5, 1):
5769 	case IP_VERSION(11, 5, 2):
5770 	case IP_VERSION(11, 5, 3):
5771 	case IP_VERSION(11, 5, 4):
5772 	        gfx_v11_0_update_gfx_clock_gating(adev,
5773 	                        state ==  AMD_CG_STATE_GATE);
5774 	        break;
5775 	default:
5776 	        break;
5777 	}
5778 
5779 	return 0;
5780 }
5781 
5782 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5783 {
5784 	struct amdgpu_device *adev = ip_block->adev;
5785 	int data;
5786 
5787 	/* AMD_CG_SUPPORT_GFX_MGCG */
5788 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5789 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5790 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5791 
5792 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5793 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5794 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5795 
5796 	/* AMD_CG_SUPPORT_GFX_FGCG */
5797 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5798 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5799 
5800 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5801 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5802 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5803 
5804 	/* AMD_CG_SUPPORT_GFX_CGCG */
5805 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5806 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5807 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5808 
5809 	/* AMD_CG_SUPPORT_GFX_CGLS */
5810 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5811 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5812 
5813 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5814 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5815 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5816 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5817 
5818 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5819 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5820 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5821 }
5822 
5823 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5824 {
5825 	/* gfx11 is 32bit rptr*/
5826 	return *(uint32_t *)ring->rptr_cpu_addr;
5827 }
5828 
5829 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5830 {
5831 	struct amdgpu_device *adev = ring->adev;
5832 	u64 wptr;
5833 
5834 	/* XXX check if swapping is necessary on BE */
5835 	if (ring->use_doorbell) {
5836 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5837 	} else {
5838 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5839 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5840 	}
5841 
5842 	return wptr;
5843 }
5844 
5845 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5846 {
5847 	struct amdgpu_device *adev = ring->adev;
5848 
5849 	if (ring->use_doorbell) {
5850 		/* XXX check if swapping is necessary on BE */
5851 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5852 			     ring->wptr);
5853 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5854 	} else {
5855 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5856 			     lower_32_bits(ring->wptr));
5857 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5858 			     upper_32_bits(ring->wptr));
5859 	}
5860 }
5861 
5862 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5863 {
5864 	/* gfx11 hardware is 32bit rptr */
5865 	return *(uint32_t *)ring->rptr_cpu_addr;
5866 }
5867 
5868 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5869 {
5870 	u64 wptr;
5871 
5872 	/* XXX check if swapping is necessary on BE */
5873 	if (ring->use_doorbell)
5874 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5875 	else
5876 		BUG();
5877 	return wptr;
5878 }
5879 
5880 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5881 {
5882 	struct amdgpu_device *adev = ring->adev;
5883 
5884 	/* XXX check if swapping is necessary on BE */
5885 	if (ring->use_doorbell) {
5886 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5887 			     ring->wptr);
5888 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5889 	} else {
5890 		BUG(); /* only DOORBELL method supported on gfx11 now */
5891 	}
5892 }
5893 
5894 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5895 {
5896 	struct amdgpu_device *adev = ring->adev;
5897 	u32 ref_and_mask, reg_mem_engine;
5898 
5899 	if (!adev->gfx.funcs->get_hdp_flush_mask) {
5900 		dev_err(adev->dev, "%s: gfx hdp flush is not supported.\n", __func__);
5901 		return;
5902 	}
5903 
5904 	adev->gfx.funcs->get_hdp_flush_mask(ring, &ref_and_mask, &reg_mem_engine);
5905 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5906 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5907 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5908 			       ref_and_mask, ref_and_mask, 0x20);
5909 }
5910 
5911 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5912 				       struct amdgpu_job *job,
5913 				       struct amdgpu_ib *ib,
5914 				       uint32_t flags)
5915 {
5916 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5917 	u32 header, control = 0;
5918 
5919 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5920 
5921 	control |= ib->length_dw | (vmid << 24);
5922 
5923 	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5924 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5925 
5926 		if (flags & AMDGPU_IB_PREEMPTED)
5927 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5928 
5929 		if (vmid && !ring->adev->gfx.rs64_enable)
5930 			gfx_v11_0_ring_emit_de_meta(ring,
5931 				!amdgpu_sriov_vf(ring->adev) && (flags & AMDGPU_IB_PREEMPTED));
5932 	}
5933 
5934 	amdgpu_ring_write(ring, header);
5935 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5936 	amdgpu_ring_write(ring,
5937 #ifdef __BIG_ENDIAN
5938 		(2 << 0) |
5939 #endif
5940 		lower_32_bits(ib->gpu_addr));
5941 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5942 	amdgpu_ring_write(ring, control);
5943 }
5944 
5945 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5946 					   struct amdgpu_job *job,
5947 					   struct amdgpu_ib *ib,
5948 					   uint32_t flags)
5949 {
5950 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5951 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5952 
5953 	/* Currently, there is a high possibility to get wave ID mismatch
5954 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5955 	 * different wave IDs than the GDS expects. This situation happens
5956 	 * randomly when at least 5 compute pipes use GDS ordered append.
5957 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5958 	 * Those are probably bugs somewhere else in the kernel driver.
5959 	 *
5960 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5961 	 * GDS to 0 for this ring (me/pipe).
5962 	 */
5963 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5964 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5965 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5966 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5967 	}
5968 
5969 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5970 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5971 	amdgpu_ring_write(ring,
5972 #ifdef __BIG_ENDIAN
5973 				(2 << 0) |
5974 #endif
5975 				lower_32_bits(ib->gpu_addr));
5976 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5977 	amdgpu_ring_write(ring, control);
5978 }
5979 
5980 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5981 				     u64 seq, unsigned flags)
5982 {
5983 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5984 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5985 
5986 	/* RELEASE_MEM - flush caches, send int */
5987 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5988 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5989 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5990 				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
5991 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5992 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5993 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5994 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5995 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5996 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5997 
5998 	/*
5999 	 * the address should be Qword aligned if 64bit write, Dword
6000 	 * aligned if only send 32bit data low (discard data high)
6001 	 */
6002 	if (write64bit)
6003 		BUG_ON(addr & 0x7);
6004 	else
6005 		BUG_ON(addr & 0x3);
6006 	amdgpu_ring_write(ring, lower_32_bits(addr));
6007 	amdgpu_ring_write(ring, upper_32_bits(addr));
6008 	amdgpu_ring_write(ring, lower_32_bits(seq));
6009 	amdgpu_ring_write(ring, upper_32_bits(seq));
6010 	amdgpu_ring_write(ring, 0);
6011 }
6012 
6013 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6014 {
6015 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6016 	uint32_t seq = ring->fence_drv.sync_seq;
6017 	uint64_t addr = ring->fence_drv.gpu_addr;
6018 
6019 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
6020 			       upper_32_bits(addr), seq, 0xffffffff, 4);
6021 }
6022 
6023 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
6024 				   uint16_t pasid, uint32_t flush_type,
6025 				   bool all_hub, uint8_t dst_sel)
6026 {
6027 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
6028 	amdgpu_ring_write(ring,
6029 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
6030 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
6031 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
6032 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
6033 }
6034 
6035 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6036 					 unsigned vmid, uint64_t pd_addr)
6037 {
6038 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6039 
6040 	/* compute doesn't have PFP */
6041 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
6042 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6043 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6044 		amdgpu_ring_write(ring, 0x0);
6045 	}
6046 
6047 	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
6048 	 * changed in any way.
6049 	 */
6050 	ring->set_q_mode_offs = 0;
6051 	ring->set_q_mode_ptr = NULL;
6052 }
6053 
6054 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6055 					  u64 seq, unsigned int flags)
6056 {
6057 	struct amdgpu_device *adev = ring->adev;
6058 
6059 	/* we only allocate 32bit for each seq wb address */
6060 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6061 
6062 	/* write fence seq to the "addr" */
6063 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6064 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6065 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6066 	amdgpu_ring_write(ring, lower_32_bits(addr));
6067 	amdgpu_ring_write(ring, upper_32_bits(addr));
6068 	amdgpu_ring_write(ring, lower_32_bits(seq));
6069 
6070 	if (flags & AMDGPU_FENCE_FLAG_INT) {
6071 		/* set register to trigger INT */
6072 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6073 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6074 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6075 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
6076 		amdgpu_ring_write(ring, 0);
6077 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6078 	}
6079 }
6080 
6081 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
6082 					 uint32_t flags)
6083 {
6084 	uint32_t dw2 = 0;
6085 
6086 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6087 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6088 		/* set load_global_config & load_global_uconfig */
6089 		dw2 |= 0x8001;
6090 		/* set load_cs_sh_regs */
6091 		dw2 |= 0x01000000;
6092 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
6093 		dw2 |= 0x10002;
6094 	}
6095 
6096 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6097 	amdgpu_ring_write(ring, dw2);
6098 	amdgpu_ring_write(ring, 0);
6099 }
6100 
6101 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
6102 						   uint64_t addr)
6103 {
6104 	unsigned ret;
6105 
6106 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6107 	amdgpu_ring_write(ring, lower_32_bits(addr));
6108 	amdgpu_ring_write(ring, upper_32_bits(addr));
6109 	/* discard following DWs if *cond_exec_gpu_addr==0 */
6110 	amdgpu_ring_write(ring, 0);
6111 	ret = ring->wptr & ring->buf_mask;
6112 	/* patch dummy value later */
6113 	amdgpu_ring_write(ring, 0);
6114 
6115 	return ret;
6116 }
6117 
6118 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
6119 					   u64 shadow_va, u64 csa_va,
6120 					   u64 gds_va, bool init_shadow,
6121 					   int vmid)
6122 {
6123 	struct amdgpu_device *adev = ring->adev;
6124 	unsigned int offs, end;
6125 
6126 	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
6127 		return;
6128 
6129 	/*
6130 	 * The logic here isn't easy to understand because we need to keep state
6131 	 * accross multiple executions of the function as well as between the
6132 	 * CPU and GPU. The general idea is that the newly written GPU command
6133 	 * has a condition on the previous one and only executed if really
6134 	 * necessary.
6135 	 */
6136 
6137 	/*
6138 	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
6139 	 * executed or not. Reserve 64bits just to be on the save side.
6140 	 */
6141 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
6142 	offs = ring->wptr & ring->buf_mask;
6143 
6144 	/*
6145 	 * We start with skipping the prefix SET_Q_MODE and always executing
6146 	 * the postfix SET_Q_MODE packet. This is changed below with a
6147 	 * WRITE_DATA command when the postfix executed.
6148 	 */
6149 	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
6150 	amdgpu_ring_write(ring, 0);
6151 
6152 	if (ring->set_q_mode_offs) {
6153 		uint64_t addr;
6154 
6155 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
6156 		addr += ring->set_q_mode_offs << 2;
6157 		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
6158 	}
6159 
6160 	/*
6161 	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
6162 	 * next prefix SET_Q_MODE packet executes as well.
6163 	 */
6164 	if (!shadow_va) {
6165 		uint64_t addr;
6166 
6167 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
6168 		addr += offs << 2;
6169 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6170 		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
6171 		amdgpu_ring_write(ring, lower_32_bits(addr));
6172 		amdgpu_ring_write(ring, upper_32_bits(addr));
6173 		amdgpu_ring_write(ring, 0x1);
6174 	}
6175 
6176 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
6177 	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
6178 	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
6179 	amdgpu_ring_write(ring, lower_32_bits(gds_va));
6180 	amdgpu_ring_write(ring, upper_32_bits(gds_va));
6181 	amdgpu_ring_write(ring, lower_32_bits(csa_va));
6182 	amdgpu_ring_write(ring, upper_32_bits(csa_va));
6183 	amdgpu_ring_write(ring, shadow_va ?
6184 			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
6185 	amdgpu_ring_write(ring, init_shadow ?
6186 			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
6187 
6188 	if (ring->set_q_mode_offs)
6189 		amdgpu_ring_patch_cond_exec(ring, end);
6190 
6191 	if (shadow_va) {
6192 		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
6193 
6194 		/*
6195 		 * If the tokens match try to skip the last postfix SET_Q_MODE
6196 		 * packet to avoid saving/restoring the state all the time.
6197 		 */
6198 		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
6199 			*ring->set_q_mode_ptr = 0;
6200 
6201 		ring->set_q_mode_token = token;
6202 	} else {
6203 		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
6204 	}
6205 
6206 	ring->set_q_mode_offs = offs;
6207 }
6208 
6209 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
6210 {
6211 	int i, r = 0;
6212 	struct amdgpu_device *adev = ring->adev;
6213 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
6214 	struct amdgpu_ring *kiq_ring = &kiq->ring;
6215 	unsigned long flags;
6216 
6217 	if (adev->enable_mes)
6218 		return -EINVAL;
6219 
6220 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
6221 		return -EINVAL;
6222 
6223 	spin_lock_irqsave(&kiq->ring_lock, flags);
6224 
6225 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
6226 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
6227 		return -ENOMEM;
6228 	}
6229 
6230 	/* assert preemption condition */
6231 	amdgpu_ring_set_preempt_cond_exec(ring, false);
6232 
6233 	/* assert IB preemption, emit the trailing fence */
6234 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
6235 				   ring->trail_fence_gpu_addr,
6236 				   ++ring->trail_seq);
6237 	amdgpu_ring_commit(kiq_ring);
6238 
6239 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
6240 
6241 	/* poll the trailing fence */
6242 	for (i = 0; i < adev->usec_timeout; i++) {
6243 		if (ring->trail_seq ==
6244 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
6245 			break;
6246 		udelay(1);
6247 	}
6248 
6249 	if (i >= adev->usec_timeout) {
6250 		r = -EINVAL;
6251 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
6252 	}
6253 
6254 	/* deassert preemption condition */
6255 	amdgpu_ring_set_preempt_cond_exec(ring, true);
6256 	return r;
6257 }
6258 
6259 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
6260 {
6261 	struct amdgpu_device *adev = ring->adev;
6262 	struct v10_de_ib_state de_payload = {0};
6263 	uint64_t offset, gds_addr, de_payload_gpu_addr;
6264 	void *de_payload_cpu_addr;
6265 	int cnt;
6266 
6267 	offset = offsetof(struct v10_gfx_meta_data, de_payload);
6268 	de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
6269 	de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
6270 
6271 	gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
6272 			 AMDGPU_CSA_SIZE - adev->gds.gds_size,
6273 			 PAGE_SIZE);
6274 
6275 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
6276 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
6277 
6278 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
6279 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
6280 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6281 				 WRITE_DATA_DST_SEL(8) |
6282 				 WR_CONFIRM) |
6283 				 WRITE_DATA_CACHE_POLICY(0));
6284 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
6285 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
6286 
6287 	if (resume)
6288 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
6289 					   sizeof(de_payload) >> 2);
6290 	else
6291 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
6292 					   sizeof(de_payload) >> 2);
6293 }
6294 
6295 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
6296 				    bool secure)
6297 {
6298 	uint32_t v = secure ? FRAME_TMZ : 0;
6299 
6300 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
6301 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
6302 }
6303 
6304 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6305 				     uint32_t reg_val_offs)
6306 {
6307 	struct amdgpu_device *adev = ring->adev;
6308 
6309 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6310 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6311 				(5 << 8) |	/* dst: memory */
6312 				(1 << 20));	/* write confirm */
6313 	amdgpu_ring_write(ring, reg);
6314 	amdgpu_ring_write(ring, 0);
6315 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6316 				reg_val_offs * 4));
6317 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6318 				reg_val_offs * 4));
6319 }
6320 
6321 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6322 				   uint32_t val)
6323 {
6324 	uint32_t cmd = 0;
6325 
6326 	switch (ring->funcs->type) {
6327 	case AMDGPU_RING_TYPE_GFX:
6328 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6329 		break;
6330 	case AMDGPU_RING_TYPE_KIQ:
6331 		cmd = (1 << 16); /* no inc addr */
6332 		break;
6333 	default:
6334 		cmd = WR_CONFIRM;
6335 		break;
6336 	}
6337 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6338 	amdgpu_ring_write(ring, cmd);
6339 	amdgpu_ring_write(ring, reg);
6340 	amdgpu_ring_write(ring, 0);
6341 	amdgpu_ring_write(ring, val);
6342 }
6343 
6344 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6345 					uint32_t val, uint32_t mask)
6346 {
6347 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6348 }
6349 
6350 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
6351 						   uint32_t reg0, uint32_t reg1,
6352 						   uint32_t ref, uint32_t mask)
6353 {
6354 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6355 
6356 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
6357 			       ref, mask, 0x20);
6358 }
6359 
6360 static void
6361 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6362 				      uint32_t me, uint32_t pipe,
6363 				      enum amdgpu_interrupt_state state)
6364 {
6365 	uint32_t cp_int_cntl, cp_int_cntl_reg;
6366 
6367 	if (!me) {
6368 		switch (pipe) {
6369 		case 0:
6370 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
6371 			break;
6372 		case 1:
6373 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
6374 			break;
6375 		default:
6376 			DRM_DEBUG("invalid pipe %d\n", pipe);
6377 			return;
6378 		}
6379 	} else {
6380 		DRM_DEBUG("invalid me %d\n", me);
6381 		return;
6382 	}
6383 
6384 	switch (state) {
6385 	case AMDGPU_IRQ_STATE_DISABLE:
6386 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6387 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6388 					    TIME_STAMP_INT_ENABLE, 0);
6389 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6390 					    GENERIC0_INT_ENABLE, 0);
6391 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6392 		break;
6393 	case AMDGPU_IRQ_STATE_ENABLE:
6394 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6395 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6396 					    TIME_STAMP_INT_ENABLE, 1);
6397 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6398 					    GENERIC0_INT_ENABLE, 1);
6399 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6400 		break;
6401 	default:
6402 		break;
6403 	}
6404 }
6405 
6406 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6407 						     int me, int pipe,
6408 						     enum amdgpu_interrupt_state state)
6409 {
6410 	u32 mec_int_cntl, mec_int_cntl_reg;
6411 
6412 	/*
6413 	 * amdgpu controls only the first MEC. That's why this function only
6414 	 * handles the setting of interrupts for this specific MEC. All other
6415 	 * pipes' interrupts are set by amdkfd.
6416 	 */
6417 
6418 	if (me == 1) {
6419 		switch (pipe) {
6420 		case 0:
6421 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6422 			break;
6423 		case 1:
6424 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
6425 			break;
6426 		case 2:
6427 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
6428 			break;
6429 		case 3:
6430 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
6431 			break;
6432 		default:
6433 			DRM_DEBUG("invalid pipe %d\n", pipe);
6434 			return;
6435 		}
6436 	} else {
6437 		DRM_DEBUG("invalid me %d\n", me);
6438 		return;
6439 	}
6440 
6441 	switch (state) {
6442 	case AMDGPU_IRQ_STATE_DISABLE:
6443 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6444 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6445 					     TIME_STAMP_INT_ENABLE, 0);
6446 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6447 					     GENERIC0_INT_ENABLE, 0);
6448 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6449 		break;
6450 	case AMDGPU_IRQ_STATE_ENABLE:
6451 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6452 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6453 					     TIME_STAMP_INT_ENABLE, 1);
6454 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6455 					     GENERIC0_INT_ENABLE, 1);
6456 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6457 		break;
6458 	default:
6459 		break;
6460 	}
6461 }
6462 
6463 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6464 					    struct amdgpu_irq_src *src,
6465 					    unsigned type,
6466 					    enum amdgpu_interrupt_state state)
6467 {
6468 	switch (type) {
6469 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6470 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
6471 		break;
6472 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
6473 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
6474 		break;
6475 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6476 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6477 		break;
6478 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6479 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6480 		break;
6481 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6482 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6483 		break;
6484 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6485 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6486 		break;
6487 	default:
6488 		break;
6489 	}
6490 	return 0;
6491 }
6492 
6493 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
6494 			     struct amdgpu_irq_src *source,
6495 			     struct amdgpu_iv_entry *entry)
6496 {
6497 	u32 doorbell_offset = entry->src_data[0];
6498 	u8 me_id, pipe_id, queue_id;
6499 	struct amdgpu_ring *ring;
6500 	int i;
6501 
6502 	DRM_DEBUG("IH: CP EOP\n");
6503 
6504 	if (adev->enable_mes && doorbell_offset) {
6505 		struct amdgpu_userq_fence_driver *fence_drv = NULL;
6506 		struct xarray *xa = &adev->userq_xa;
6507 		unsigned long flags;
6508 
6509 		xa_lock_irqsave(xa, flags);
6510 		fence_drv = xa_load(xa, doorbell_offset);
6511 		if (fence_drv)
6512 			amdgpu_userq_fence_driver_process(fence_drv);
6513 		xa_unlock_irqrestore(xa, flags);
6514 	} else {
6515 		me_id = (entry->ring_id & 0x0c) >> 2;
6516 		pipe_id = (entry->ring_id & 0x03) >> 0;
6517 		queue_id = (entry->ring_id & 0x70) >> 4;
6518 
6519 		switch (me_id) {
6520 		case 0:
6521 			if (pipe_id == 0)
6522 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6523 			else
6524 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6525 			break;
6526 		case 1:
6527 		case 2:
6528 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6529 				ring = &adev->gfx.compute_ring[i];
6530 				/* Per-queue interrupt is supported for MEC starting from VI.
6531 				 * The interrupt can only be enabled/disabled per pipe instead
6532 				 * of per queue.
6533 				 */
6534 				if ((ring->me == me_id) &&
6535 				    (ring->pipe == pipe_id) &&
6536 				    (ring->queue == queue_id))
6537 					amdgpu_fence_process(ring);
6538 			}
6539 			break;
6540 		}
6541 	}
6542 
6543 	return 0;
6544 }
6545 
6546 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6547 					      struct amdgpu_irq_src *source,
6548 					      unsigned int type,
6549 					      enum amdgpu_interrupt_state state)
6550 {
6551 	u32 cp_int_cntl_reg, cp_int_cntl;
6552 	int i, j;
6553 
6554 	switch (state) {
6555 	case AMDGPU_IRQ_STATE_DISABLE:
6556 	case AMDGPU_IRQ_STATE_ENABLE:
6557 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6558 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6559 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6560 
6561 				if (cp_int_cntl_reg) {
6562 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6563 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6564 								    PRIV_REG_INT_ENABLE,
6565 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6566 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6567 				}
6568 			}
6569 		}
6570 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6571 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6572 				/* MECs start at 1 */
6573 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6574 
6575 				if (cp_int_cntl_reg) {
6576 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6577 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6578 								    PRIV_REG_INT_ENABLE,
6579 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6580 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6581 				}
6582 			}
6583 		}
6584 		break;
6585 	default:
6586 		break;
6587 	}
6588 
6589 	return 0;
6590 }
6591 
6592 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6593 					    struct amdgpu_irq_src *source,
6594 					    unsigned type,
6595 					    enum amdgpu_interrupt_state state)
6596 {
6597 	u32 cp_int_cntl_reg, cp_int_cntl;
6598 	int i, j;
6599 
6600 	switch (state) {
6601 	case AMDGPU_IRQ_STATE_DISABLE:
6602 	case AMDGPU_IRQ_STATE_ENABLE:
6603 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6604 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6605 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6606 
6607 				if (cp_int_cntl_reg) {
6608 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6609 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6610 								    OPCODE_ERROR_INT_ENABLE,
6611 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6612 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6613 				}
6614 			}
6615 		}
6616 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6617 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6618 				/* MECs start at 1 */
6619 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6620 
6621 				if (cp_int_cntl_reg) {
6622 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6623 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6624 								    OPCODE_ERROR_INT_ENABLE,
6625 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6626 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6627 				}
6628 			}
6629 		}
6630 		break;
6631 	default:
6632 		break;
6633 	}
6634 	return 0;
6635 }
6636 
6637 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6638 					       struct amdgpu_irq_src *source,
6639 					       unsigned int type,
6640 					       enum amdgpu_interrupt_state state)
6641 {
6642 	u32 cp_int_cntl_reg, cp_int_cntl;
6643 	int i, j;
6644 
6645 	switch (state) {
6646 	case AMDGPU_IRQ_STATE_DISABLE:
6647 	case AMDGPU_IRQ_STATE_ENABLE:
6648 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6649 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6650 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6651 
6652 				if (cp_int_cntl_reg) {
6653 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6654 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6655 								    PRIV_INSTR_INT_ENABLE,
6656 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6657 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6658 				}
6659 			}
6660 		}
6661 		break;
6662 	default:
6663 		break;
6664 	}
6665 
6666 	return 0;
6667 }
6668 
6669 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6670 					struct amdgpu_iv_entry *entry)
6671 {
6672 	u8 me_id, pipe_id, queue_id;
6673 	struct amdgpu_ring *ring;
6674 	int i;
6675 
6676 	me_id = (entry->ring_id & 0x0c) >> 2;
6677 	pipe_id = (entry->ring_id & 0x03) >> 0;
6678 	queue_id = (entry->ring_id & 0x70) >> 4;
6679 
6680 	if (!adev->gfx.disable_kq) {
6681 		switch (me_id) {
6682 		case 0:
6683 			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6684 				ring = &adev->gfx.gfx_ring[i];
6685 				if (ring->me == me_id && ring->pipe == pipe_id &&
6686 				    ring->queue == queue_id)
6687 					drm_sched_fault(&ring->sched);
6688 			}
6689 			break;
6690 		case 1:
6691 		case 2:
6692 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6693 				ring = &adev->gfx.compute_ring[i];
6694 				if (ring->me == me_id && ring->pipe == pipe_id &&
6695 				    ring->queue == queue_id)
6696 					drm_sched_fault(&ring->sched);
6697 			}
6698 			break;
6699 		default:
6700 			BUG();
6701 			break;
6702 		}
6703 	}
6704 }
6705 
6706 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6707 				  struct amdgpu_irq_src *source,
6708 				  struct amdgpu_iv_entry *entry)
6709 {
6710 	DRM_ERROR("Illegal register access in command stream\n");
6711 	gfx_v11_0_handle_priv_fault(adev, entry);
6712 	return 0;
6713 }
6714 
6715 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
6716 				struct amdgpu_irq_src *source,
6717 				struct amdgpu_iv_entry *entry)
6718 {
6719 	DRM_ERROR("Illegal opcode in command stream\n");
6720 	gfx_v11_0_handle_priv_fault(adev, entry);
6721 	return 0;
6722 }
6723 
6724 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6725 				   struct amdgpu_irq_src *source,
6726 				   struct amdgpu_iv_entry *entry)
6727 {
6728 	DRM_ERROR("Illegal instruction in command stream\n");
6729 	gfx_v11_0_handle_priv_fault(adev, entry);
6730 	return 0;
6731 }
6732 
6733 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
6734 				  struct amdgpu_irq_src *source,
6735 				  struct amdgpu_iv_entry *entry)
6736 {
6737 	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
6738 		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
6739 
6740 	return 0;
6741 }
6742 
6743 #if 0
6744 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6745 					     struct amdgpu_irq_src *src,
6746 					     unsigned int type,
6747 					     enum amdgpu_interrupt_state state)
6748 {
6749 	uint32_t tmp, target;
6750 	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6751 
6752 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6753 	target += ring->pipe;
6754 
6755 	switch (type) {
6756 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6757 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6758 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6759 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6760 					    GENERIC2_INT_ENABLE, 0);
6761 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6762 
6763 			tmp = RREG32_SOC15_IP(GC, target);
6764 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6765 					    GENERIC2_INT_ENABLE, 0);
6766 			WREG32_SOC15_IP(GC, target, tmp);
6767 		} else {
6768 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6769 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6770 					    GENERIC2_INT_ENABLE, 1);
6771 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6772 
6773 			tmp = RREG32_SOC15_IP(GC, target);
6774 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6775 					    GENERIC2_INT_ENABLE, 1);
6776 			WREG32_SOC15_IP(GC, target, tmp);
6777 		}
6778 		break;
6779 	default:
6780 		BUG(); /* kiq only support GENERIC2_INT now */
6781 		break;
6782 	}
6783 	return 0;
6784 }
6785 #endif
6786 
6787 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6788 {
6789 	const unsigned int gcr_cntl =
6790 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6791 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6792 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6793 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6794 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6795 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6796 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6797 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6798 
6799 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6800 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6801 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6802 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6803 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6804 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6805 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6806 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6807 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6808 }
6809 
6810 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
6811 {
6812 	/* Disable the pipe reset until the CPFW fully support it.*/
6813 	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
6814 	return false;
6815 }
6816 
6817 
6818 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
6819 {
6820 	struct amdgpu_device *adev = ring->adev;
6821 	uint32_t reset_pipe = 0, clean_pipe = 0;
6822 	int r;
6823 
6824 	if (!gfx_v11_pipe_reset_support(adev))
6825 		return -EOPNOTSUPP;
6826 
6827 	gfx_v11_0_set_safe_mode(adev, 0);
6828 	mutex_lock(&adev->srbm_mutex);
6829 	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6830 
6831 	switch (ring->pipe) {
6832 	case 0:
6833 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6834 					   PFP_PIPE0_RESET, 1);
6835 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6836 					   ME_PIPE0_RESET, 1);
6837 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6838 					   PFP_PIPE0_RESET, 0);
6839 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6840 					   ME_PIPE0_RESET, 0);
6841 		break;
6842 	case 1:
6843 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6844 					   PFP_PIPE1_RESET, 1);
6845 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6846 					   ME_PIPE1_RESET, 1);
6847 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6848 					   PFP_PIPE1_RESET, 0);
6849 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6850 					   ME_PIPE1_RESET, 0);
6851 		break;
6852 	default:
6853 		break;
6854 	}
6855 
6856 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
6857 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
6858 
6859 	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
6860 						RS64_FW_UC_START_ADDR_LO;
6861 	soc21_grbm_select(adev, 0, 0, 0, 0);
6862 	mutex_unlock(&adev->srbm_mutex);
6863 	gfx_v11_0_unset_safe_mode(adev, 0);
6864 
6865 	dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
6866 			r == 0 ? "successfully" : "failed");
6867 	/* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
6868 	 * so the pipe reset status relies on the later gfx ring test result.
6869 	 */
6870 	return 0;
6871 }
6872 
6873 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
6874 			       unsigned int vmid,
6875 			       struct amdgpu_fence *timedout_fence)
6876 {
6877 	struct amdgpu_device *adev = ring->adev;
6878 	bool use_mmio = false;
6879 	int r;
6880 
6881 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
6882 
6883 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, use_mmio, 0);
6884 	if (r) {
6885 
6886 		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
6887 		r = gfx_v11_reset_gfx_pipe(ring);
6888 		if (r)
6889 			return r;
6890 	}
6891 
6892 	if (use_mmio) {
6893 		r = gfx_v11_0_kgq_init_queue(ring, true);
6894 		if (r) {
6895 			dev_err(adev->dev, "failed to init kgq\n");
6896 			return r;
6897 		}
6898 
6899 		r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
6900 		if (r) {
6901 			dev_err(adev->dev, "failed to remap kgq\n");
6902 			return r;
6903 		}
6904 	}
6905 
6906 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
6907 }
6908 
6909 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
6910 {
6911 
6912 	struct amdgpu_device *adev = ring->adev;
6913 	uint32_t reset_pipe = 0, clean_pipe = 0;
6914 	int r;
6915 
6916 	if (!gfx_v11_pipe_reset_support(adev))
6917 		return -EOPNOTSUPP;
6918 
6919 	gfx_v11_0_set_safe_mode(adev, 0);
6920 	mutex_lock(&adev->srbm_mutex);
6921 	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6922 
6923 	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
6924 	clean_pipe = reset_pipe;
6925 
6926 	if (adev->gfx.rs64_enable) {
6927 
6928 		switch (ring->pipe) {
6929 		case 0:
6930 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6931 						   MEC_PIPE0_RESET, 1);
6932 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6933 						   MEC_PIPE0_RESET, 0);
6934 			break;
6935 		case 1:
6936 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6937 						   MEC_PIPE1_RESET, 1);
6938 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6939 						   MEC_PIPE1_RESET, 0);
6940 			break;
6941 		case 2:
6942 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6943 						   MEC_PIPE2_RESET, 1);
6944 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6945 						   MEC_PIPE2_RESET, 0);
6946 			break;
6947 		case 3:
6948 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6949 						   MEC_PIPE3_RESET, 1);
6950 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6951 						   MEC_PIPE3_RESET, 0);
6952 			break;
6953 		default:
6954 			break;
6955 		}
6956 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
6957 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
6958 		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
6959 					RS64_FW_UC_START_ADDR_LO;
6960 	} else {
6961 		if (ring->me == 1) {
6962 			switch (ring->pipe) {
6963 			case 0:
6964 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6965 							   MEC_ME1_PIPE0_RESET, 1);
6966 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6967 							   MEC_ME1_PIPE0_RESET, 0);
6968 				break;
6969 			case 1:
6970 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6971 							   MEC_ME1_PIPE1_RESET, 1);
6972 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6973 							   MEC_ME1_PIPE1_RESET, 0);
6974 				break;
6975 			case 2:
6976 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6977 							   MEC_ME1_PIPE2_RESET, 1);
6978 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6979 							   MEC_ME1_PIPE2_RESET, 0);
6980 				break;
6981 			case 3:
6982 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6983 							   MEC_ME1_PIPE3_RESET, 1);
6984 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6985 							   MEC_ME1_PIPE3_RESET, 0);
6986 				break;
6987 			default:
6988 				break;
6989 			}
6990 			/* mec1 fw pc: CP_MEC1_INSTR_PNTR */
6991 		} else {
6992 			switch (ring->pipe) {
6993 			case 0:
6994 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6995 							   MEC_ME2_PIPE0_RESET, 1);
6996 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6997 							   MEC_ME2_PIPE0_RESET, 0);
6998 				break;
6999 			case 1:
7000 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7001 							   MEC_ME2_PIPE1_RESET, 1);
7002 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7003 							   MEC_ME2_PIPE1_RESET, 0);
7004 				break;
7005 			case 2:
7006 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7007 							   MEC_ME2_PIPE2_RESET, 1);
7008 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7009 							   MEC_ME2_PIPE2_RESET, 0);
7010 				break;
7011 			case 3:
7012 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
7013 							   MEC_ME2_PIPE3_RESET, 1);
7014 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
7015 							   MEC_ME2_PIPE3_RESET, 0);
7016 				break;
7017 			default:
7018 				break;
7019 			}
7020 			/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
7021 		}
7022 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
7023 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
7024 		r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
7025 	}
7026 
7027 	soc21_grbm_select(adev, 0, 0, 0, 0);
7028 	mutex_unlock(&adev->srbm_mutex);
7029 	gfx_v11_0_unset_safe_mode(adev, 0);
7030 
7031 	dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
7032 			r == 0 ? "successfully" : "failed");
7033 	/*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
7034 	 * reset status relies on the compute ring test result.
7035 	 */
7036 	return 0;
7037 }
7038 
7039 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
7040 			       unsigned int vmid,
7041 			       struct amdgpu_fence *timedout_fence)
7042 {
7043 	struct amdgpu_device *adev = ring->adev;
7044 	int r = 0;
7045 
7046 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
7047 
7048 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true, 0);
7049 	if (r) {
7050 		dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
7051 		r = gfx_v11_0_reset_compute_pipe(ring);
7052 		if (r)
7053 			return r;
7054 	}
7055 
7056 	r = gfx_v11_0_kcq_init_queue(ring, true);
7057 	if (r) {
7058 		dev_err(adev->dev, "fail to init kcq\n");
7059 		return r;
7060 	}
7061 	r = amdgpu_mes_map_legacy_queue(adev, ring, 0);
7062 	if (r) {
7063 		dev_err(adev->dev, "failed to remap kcq\n");
7064 		return r;
7065 	}
7066 
7067 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
7068 }
7069 
7070 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7071 {
7072 	struct amdgpu_device *adev = ip_block->adev;
7073 	uint32_t i, j, k, reg, index = 0;
7074 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
7075 
7076 	if (!adev->gfx.ip_dump_core)
7077 		return;
7078 
7079 	for (i = 0; i < reg_count; i++)
7080 		drm_printf(p, "%-50s \t 0x%08x\n",
7081 			   gc_reg_list_11_0[i].reg_name,
7082 			   adev->gfx.ip_dump_core[i]);
7083 
7084 	/* print compute queue registers for all instances */
7085 	if (!adev->gfx.ip_dump_compute_queues)
7086 		return;
7087 
7088 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
7089 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7090 		   adev->gfx.mec.num_mec,
7091 		   adev->gfx.mec.num_pipe_per_mec,
7092 		   adev->gfx.mec.num_queue_per_pipe);
7093 
7094 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7095 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7096 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7097 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7098 				for (reg = 0; reg < reg_count; reg++) {
7099 					if (i && gc_cp_reg_list_11[reg].reg_offset == regCP_MEC_ME1_HEADER_DUMP)
7100 						drm_printf(p, "%-50s \t 0x%08x\n",
7101 							   "regCP_MEC_ME2_HEADER_DUMP",
7102 							   adev->gfx.ip_dump_compute_queues[index + reg]);
7103 					else
7104 						drm_printf(p, "%-50s \t 0x%08x\n",
7105 							   gc_cp_reg_list_11[reg].reg_name,
7106 							   adev->gfx.ip_dump_compute_queues[index + reg]);
7107 				}
7108 				index += reg_count;
7109 			}
7110 		}
7111 	}
7112 
7113 	/* print gfx queue registers for all instances */
7114 	if (!adev->gfx.ip_dump_gfx_queues)
7115 		return;
7116 
7117 	index = 0;
7118 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
7119 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
7120 		   adev->gfx.me.num_me,
7121 		   adev->gfx.me.num_pipe_per_me,
7122 		   adev->gfx.me.num_queue_per_pipe);
7123 
7124 	for (i = 0; i < adev->gfx.me.num_me; i++) {
7125 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
7126 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
7127 				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
7128 				for (reg = 0; reg < reg_count; reg++) {
7129 					drm_printf(p, "%-50s \t 0x%08x\n",
7130 						   gc_gfx_queue_reg_list_11[reg].reg_name,
7131 						   adev->gfx.ip_dump_gfx_queues[index + reg]);
7132 				}
7133 				index += reg_count;
7134 			}
7135 		}
7136 	}
7137 }
7138 
7139 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
7140 {
7141 	struct amdgpu_device *adev = ip_block->adev;
7142 	uint32_t i, j, k, reg, index = 0;
7143 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
7144 
7145 	if (!adev->gfx.ip_dump_core)
7146 		return;
7147 
7148 	amdgpu_gfx_off_ctrl(adev, false);
7149 	for (i = 0; i < reg_count; i++)
7150 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
7151 	amdgpu_gfx_off_ctrl(adev, true);
7152 
7153 	/* dump compute queue registers for all instances */
7154 	if (!adev->gfx.ip_dump_compute_queues)
7155 		return;
7156 
7157 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
7158 	amdgpu_gfx_off_ctrl(adev, false);
7159 	mutex_lock(&adev->srbm_mutex);
7160 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7161 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7162 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7163 				/* ME0 is for GFX so start from 1 for CP */
7164 				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
7165 				for (reg = 0; reg < reg_count; reg++) {
7166 					if (i &&
7167 					    gc_cp_reg_list_11[reg].reg_offset ==
7168 						    regCP_MEC_ME1_HEADER_DUMP)
7169 						adev->gfx.ip_dump_compute_queues[index + reg] =
7170 							RREG32(SOC15_REG_OFFSET(GC, 0,
7171 							       regCP_MEC_ME2_HEADER_DUMP));
7172 					else
7173 						adev->gfx.ip_dump_compute_queues[index + reg] =
7174 							RREG32(SOC15_REG_ENTRY_OFFSET(
7175 								       gc_cp_reg_list_11[reg]));
7176 				}
7177 				index += reg_count;
7178 			}
7179 		}
7180 	}
7181 	soc21_grbm_select(adev, 0, 0, 0, 0);
7182 	mutex_unlock(&adev->srbm_mutex);
7183 	amdgpu_gfx_off_ctrl(adev, true);
7184 
7185 	/* dump gfx queue registers for all instances */
7186 	if (!adev->gfx.ip_dump_gfx_queues)
7187 		return;
7188 
7189 	index = 0;
7190 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
7191 	amdgpu_gfx_off_ctrl(adev, false);
7192 	mutex_lock(&adev->srbm_mutex);
7193 	for (i = 0; i < adev->gfx.me.num_me; i++) {
7194 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
7195 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
7196 				soc21_grbm_select(adev, i, j, k, 0);
7197 
7198 				for (reg = 0; reg < reg_count; reg++) {
7199 					adev->gfx.ip_dump_gfx_queues[index + reg] =
7200 						RREG32(SOC15_REG_ENTRY_OFFSET(
7201 							gc_gfx_queue_reg_list_11[reg]));
7202 				}
7203 				index += reg_count;
7204 			}
7205 		}
7206 	}
7207 	soc21_grbm_select(adev, 0, 0, 0, 0);
7208 	mutex_unlock(&adev->srbm_mutex);
7209 	amdgpu_gfx_off_ctrl(adev, true);
7210 }
7211 
7212 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7213 {
7214 	/* Emit the cleaner shader */
7215 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7216 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7217 }
7218 
7219 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
7220 {
7221 	amdgpu_gfx_profile_ring_begin_use(ring);
7222 
7223 	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7224 }
7225 
7226 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
7227 {
7228 	amdgpu_gfx_profile_ring_end_use(ring);
7229 
7230 	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7231 }
7232 
7233 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
7234 	.name = "gfx_v11_0",
7235 	.early_init = gfx_v11_0_early_init,
7236 	.late_init = gfx_v11_0_late_init,
7237 	.sw_init = gfx_v11_0_sw_init,
7238 	.sw_fini = gfx_v11_0_sw_fini,
7239 	.hw_init = gfx_v11_0_hw_init,
7240 	.hw_fini = gfx_v11_0_hw_fini,
7241 	.suspend = gfx_v11_0_suspend,
7242 	.resume = gfx_v11_0_resume,
7243 	.is_idle = gfx_v11_0_is_idle,
7244 	.wait_for_idle = gfx_v11_0_wait_for_idle,
7245 	.soft_reset = gfx_v11_0_soft_reset,
7246 	.check_soft_reset = gfx_v11_0_check_soft_reset,
7247 	.post_soft_reset = gfx_v11_0_post_soft_reset,
7248 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
7249 	.set_powergating_state = gfx_v11_0_set_powergating_state,
7250 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
7251 	.dump_ip_state = gfx_v11_ip_dump,
7252 	.print_ip_state = gfx_v11_ip_print,
7253 };
7254 
7255 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
7256 	.type = AMDGPU_RING_TYPE_GFX,
7257 	.align_mask = 0xff,
7258 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7259 	.support_64bit_ptrs = true,
7260 	.secure_submission_supported = true,
7261 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
7262 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
7263 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
7264 	.emit_frame_size = /* totally 247 maximum if 16 IBs */
7265 		5 + /* update_spm_vmid */
7266 		5 + /* COND_EXEC */
7267 		22 + /* SET_Q_PREEMPTION_MODE */
7268 		7 + /* PIPELINE_SYNC */
7269 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7270 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7271 		4 + /* VM_FLUSH */
7272 		8 + /* FENCE for VM_FLUSH */
7273 		20 + /* GDS switch */
7274 		5 + /* COND_EXEC */
7275 		7 + /* HDP_flush */
7276 		4 + /* VGT_flush */
7277 		31 + /*	DE_META */
7278 		3 + /* CNTX_CTRL */
7279 		5 + /* HDP_INVL */
7280 		22 + /* SET_Q_PREEMPTION_MODE */
7281 		8 + 8 + /* FENCE x2 */
7282 		8 + /* gfx_v11_0_emit_mem_sync */
7283 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
7284 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
7285 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
7286 	.emit_fence = gfx_v11_0_ring_emit_fence,
7287 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
7288 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
7289 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
7290 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7291 	.test_ring = gfx_v11_0_ring_test_ring,
7292 	.test_ib = gfx_v11_0_ring_test_ib,
7293 	.insert_nop = gfx_v11_ring_insert_nop,
7294 	.pad_ib = amdgpu_ring_generic_pad_ib,
7295 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
7296 	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
7297 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
7298 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
7299 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
7300 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7301 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7302 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7303 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
7304 	.reset = gfx_v11_0_reset_kgq,
7305 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
7306 	.begin_use = gfx_v11_0_ring_begin_use,
7307 	.end_use = gfx_v11_0_ring_end_use,
7308 };
7309 
7310 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
7311 	.type = AMDGPU_RING_TYPE_COMPUTE,
7312 	.align_mask = 0xff,
7313 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7314 	.support_64bit_ptrs = true,
7315 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
7316 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
7317 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
7318 	.emit_frame_size =
7319 		5 + /* update_spm_vmid */
7320 		20 + /* gfx_v11_0_ring_emit_gds_switch */
7321 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
7322 		5 + /* hdp invalidate */
7323 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
7324 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7325 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7326 		2 + /* gfx_v11_0_ring_emit_vm_flush */
7327 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
7328 		8 + /* gfx_v11_0_emit_mem_sync */
7329 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
7330 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
7331 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7332 	.emit_fence = gfx_v11_0_ring_emit_fence,
7333 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
7334 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
7335 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
7336 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7337 	.test_ring = gfx_v11_0_ring_test_ring,
7338 	.test_ib = gfx_v11_0_ring_test_ib,
7339 	.insert_nop = gfx_v11_ring_insert_nop,
7340 	.pad_ib = amdgpu_ring_generic_pad_ib,
7341 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7342 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7343 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7344 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
7345 	.reset = gfx_v11_0_reset_kcq,
7346 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
7347 	.begin_use = gfx_v11_0_ring_begin_use,
7348 	.end_use = gfx_v11_0_ring_end_use,
7349 };
7350 
7351 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
7352 	.type = AMDGPU_RING_TYPE_KIQ,
7353 	.align_mask = 0xff,
7354 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7355 	.support_64bit_ptrs = true,
7356 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
7357 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
7358 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
7359 	.emit_frame_size =
7360 		20 + /* gfx_v11_0_ring_emit_gds_switch */
7361 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
7362 		5 + /*hdp invalidate */
7363 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
7364 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7365 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7366 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7367 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
7368 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7369 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
7370 	.test_ring = gfx_v11_0_ring_test_ring,
7371 	.test_ib = gfx_v11_0_ring_test_ib,
7372 	.insert_nop = amdgpu_ring_insert_nop,
7373 	.pad_ib = amdgpu_ring_generic_pad_ib,
7374 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
7375 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7376 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7377 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7378 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7379 };
7380 
7381 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
7382 {
7383 	int i;
7384 
7385 	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
7386 
7387 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7388 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
7389 
7390 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7391 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
7392 }
7393 
7394 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
7395 	.set = gfx_v11_0_set_eop_interrupt_state,
7396 	.process = gfx_v11_0_eop_irq,
7397 };
7398 
7399 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
7400 	.set = gfx_v11_0_set_priv_reg_fault_state,
7401 	.process = gfx_v11_0_priv_reg_irq,
7402 };
7403 
7404 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
7405 	.set = gfx_v11_0_set_bad_op_fault_state,
7406 	.process = gfx_v11_0_bad_op_irq,
7407 };
7408 
7409 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
7410 	.set = gfx_v11_0_set_priv_inst_fault_state,
7411 	.process = gfx_v11_0_priv_inst_irq,
7412 };
7413 
7414 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
7415 	.process = gfx_v11_0_rlc_gc_fed_irq,
7416 };
7417 
7418 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
7419 {
7420 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7421 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
7422 
7423 	adev->gfx.priv_reg_irq.num_types = 1;
7424 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
7425 
7426 	adev->gfx.bad_op_irq.num_types = 1;
7427 	adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
7428 
7429 	adev->gfx.priv_inst_irq.num_types = 1;
7430 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
7431 
7432 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
7433 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
7434 
7435 }
7436 
7437 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
7438 {
7439 	if (adev->flags & AMD_IS_APU)
7440 		adev->gfx.imu.mode = MISSION_MODE;
7441 	else
7442 		adev->gfx.imu.mode = DEBUG_MODE;
7443 
7444 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
7445 }
7446 
7447 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
7448 {
7449 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
7450 }
7451 
7452 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
7453 {
7454 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
7455 			    adev->gfx.config.max_sh_per_se *
7456 			    adev->gfx.config.max_shader_engines;
7457 
7458 	adev->gds.gds_size = 0x1000;
7459 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
7460 	adev->gds.gws_size = 64;
7461 	adev->gds.oa_size = 16;
7462 }
7463 
7464 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
7465 {
7466 	/* set gfx eng mqd */
7467 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
7468 		sizeof(struct v11_gfx_mqd);
7469 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
7470 		gfx_v11_0_gfx_mqd_init;
7471 	/* set compute eng mqd */
7472 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
7473 		sizeof(struct v11_compute_mqd);
7474 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
7475 		gfx_v11_0_compute_mqd_init;
7476 }
7477 
7478 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
7479 							  u32 bitmap)
7480 {
7481 	u32 data;
7482 
7483 	if (!bitmap)
7484 		return;
7485 
7486 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7487 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7488 
7489 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
7490 }
7491 
7492 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
7493 {
7494 	u32 data, wgp_bitmask;
7495 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
7496 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
7497 
7498 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7499 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7500 
7501 	wgp_bitmask =
7502 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
7503 
7504 	return (~data) & wgp_bitmask;
7505 }
7506 
7507 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
7508 {
7509 	u32 wgp_idx, wgp_active_bitmap;
7510 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
7511 
7512 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
7513 	cu_active_bitmap = 0;
7514 
7515 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
7516 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
7517 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
7518 		if (wgp_active_bitmap & (1 << wgp_idx))
7519 			cu_active_bitmap |= cu_bitmap_per_wgp;
7520 	}
7521 
7522 	return cu_active_bitmap;
7523 }
7524 
7525 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
7526 				 struct amdgpu_cu_info *cu_info)
7527 {
7528 	int i, j, k, counter, active_cu_number = 0;
7529 	u32 mask, bitmap;
7530 	unsigned disable_masks[8 * 2];
7531 
7532 	if (!adev || !cu_info)
7533 		return -EINVAL;
7534 
7535 	amdgpu_gfx_parse_disable_cu(adev, disable_masks, 8, 2);
7536 
7537 	mutex_lock(&adev->grbm_idx_mutex);
7538 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7539 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7540 			bitmap = i * adev->gfx.config.max_sh_per_se + j;
7541 			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
7542 				continue;
7543 			mask = 1;
7544 			counter = 0;
7545 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7546 			if (i < 8 && j < 2)
7547 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
7548 					adev, disable_masks[i * 2 + j]);
7549 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
7550 
7551 			/**
7552 			 * GFX11 could support more than 4 SEs, while the bitmap
7553 			 * in cu_info struct is 4x4 and ioctl interface struct
7554 			 * drm_amdgpu_info_device should keep stable.
7555 			 * So we use last two columns of bitmap to store cu mask for
7556 			 * SEs 4 to 7, the layout of the bitmap is as below:
7557 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
7558 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
7559 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
7560 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
7561 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
7562 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
7563 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
7564 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
7565 			 */
7566 			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
7567 
7568 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
7569 				if (bitmap & mask)
7570 					counter++;
7571 
7572 				mask <<= 1;
7573 			}
7574 			active_cu_number += counter;
7575 		}
7576 	}
7577 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7578 	mutex_unlock(&adev->grbm_idx_mutex);
7579 
7580 	cu_info->number = active_cu_number;
7581 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7582 
7583 	return 0;
7584 }
7585 
7586 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
7587 {
7588 	.type = AMD_IP_BLOCK_TYPE_GFX,
7589 	.major = 11,
7590 	.minor = 0,
7591 	.rev = 0,
7592 	.funcs = &gfx_v11_0_ip_funcs,
7593 };
7594