xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision ce801e5d6c1bac228bf10f75e8bede4285c58282)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "imu_v11_0.h"
33 #include "soc21.h"
34 #include "nvd.h"
35 
36 #include "gc/gc_11_0_0_offset.h"
37 #include "gc/gc_11_0_0_sh_mask.h"
38 #include "smuio/smuio_13_0_6_offset.h"
39 #include "smuio/smuio_13_0_6_sh_mask.h"
40 #include "navi10_enum.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
42 
43 #include "soc15.h"
44 #include "clearstate_gfx11.h"
45 #include "v11_structs.h"
46 #include "gfx_v11_0.h"
47 #include "gfx_v11_0_cleaner_shader.h"
48 #include "gfx_v11_0_3.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 
52 #define GFX11_NUM_GFX_RINGS		1
53 #define GFX11_MEC_HPD_SIZE	2048
54 
55 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
57 
58 #define regCGTT_WD_CLK_CTRL		0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
62 #define regPC_CONFIG_CNTL_1		0x194d
63 #define regPC_CONFIG_CNTL_1_BASE_IDX	1
64 
65 #define regCP_GFX_MQD_CONTROL_DEFAULT                                             0x00000100
66 #define regCP_GFX_HQD_VMID_DEFAULT                                                0x00000000
67 #define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT                                      0x00000000
68 #define regCP_GFX_HQD_QUANTUM_DEFAULT                                             0x00000a01
69 #define regCP_GFX_HQD_CNTL_DEFAULT                                                0x00a00000
70 #define regCP_RB_DOORBELL_CONTROL_DEFAULT                                         0x00000000
71 #define regCP_GFX_HQD_RPTR_DEFAULT                                                0x00000000
72 
73 #define regCP_HQD_EOP_CONTROL_DEFAULT                                             0x00000006
74 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
75 #define regCP_MQD_CONTROL_DEFAULT                                                 0x00000100
76 #define regCP_HQD_PQ_CONTROL_DEFAULT                                              0x00308509
77 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
78 #define regCP_HQD_PQ_RPTR_DEFAULT                                                 0x00000000
79 #define regCP_HQD_PERSISTENT_STATE_DEFAULT                                        0x0be05501
80 #define regCP_HQD_IB_CONTROL_DEFAULT                                              0x00300000
81 
82 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
84 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
85 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
86 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
87 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
88 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
91 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
92 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
94 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
95 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
96 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
98 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
99 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
102 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
103 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
104 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
105 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
106 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
107 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
110 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
111 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
112 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
114 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
115 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
116 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
118 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
119 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
120 
121 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
122 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
123 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
124 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
125 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
126 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
127 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
128 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
129 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
130 	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
131 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
132 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
133 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
134 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
135 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
136 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
137 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
138 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
139 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
140 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
141 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
142 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
143 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
144 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
145 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
146 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
147 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
148 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
149 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
150 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
151 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
152 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
153 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
154 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
155 	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
156 	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
157 	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
158 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
159 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
160 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
161 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
162 	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
163 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
164 	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
165 	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
166 	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
167 	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
168 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
169 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
170 	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
171 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
172 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
173 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
174 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
175 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
176 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
177 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
178 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
179 	/* cp header registers */
180 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
181 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
182 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
183 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
184 	/* SE status registers */
185 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
186 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
187 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
188 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
189 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
190 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
191 };
192 
193 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
194 	/* compute registers */
195 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
196 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
197 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
198 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
199 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
200 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
201 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
202 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
203 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
204 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
205 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
206 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
207 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
208 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
209 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
210 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
211 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
212 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
213 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
214 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
215 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
216 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
217 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
218 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
219 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
220 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
221 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
222 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
223 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
224 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
225 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
226 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
227 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
228 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
229 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
230 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
231 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
232 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
233 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
234 };
235 
236 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
237 	/* gfx queue registers */
238 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
239 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
240 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
241 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
242 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
243 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
244 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
245 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
246 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
247 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
248 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
249 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
250 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
251 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
252 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
253 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
254 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
255 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
256 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
257 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
258 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
259 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
260 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
261 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
262 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
263 };
264 
265 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
267 };
268 
269 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
270 {
271 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
272 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
273 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
274 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
275 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
276 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
277 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
278 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
279 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
280 };
281 
282 #define DEFAULT_SH_MEM_CONFIG \
283 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
284 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
285 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
286 
287 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
288 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
289 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
290 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
291 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
292 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
293 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
294 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
295                                  struct amdgpu_cu_info *cu_info);
296 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
297 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
298 				   u32 sh_num, u32 instance, int xcc_id);
299 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
300 
301 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
302 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
303 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
304 				     uint32_t val);
305 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
306 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
307 					   uint16_t pasid, uint32_t flush_type,
308 					   bool all_hub, uint8_t dst_sel);
309 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
310 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
311 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
312 				      bool enable);
313 
314 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
315 {
316 	struct amdgpu_device *adev = kiq_ring->adev;
317 	u64 shader_mc_addr;
318 
319 	/* Cleaner shader MC address */
320 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
321 
322 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
323 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
324 			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
325 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
326 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
327 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
328 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
329 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
330 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
331 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
332 }
333 
334 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
335 				 struct amdgpu_ring *ring)
336 {
337 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
338 	uint64_t wptr_addr = ring->wptr_gpu_addr;
339 	uint32_t me = 0, eng_sel = 0;
340 
341 	switch (ring->funcs->type) {
342 	case AMDGPU_RING_TYPE_COMPUTE:
343 		me = 1;
344 		eng_sel = 0;
345 		break;
346 	case AMDGPU_RING_TYPE_GFX:
347 		me = 0;
348 		eng_sel = 4;
349 		break;
350 	case AMDGPU_RING_TYPE_MES:
351 		me = 2;
352 		eng_sel = 5;
353 		break;
354 	default:
355 		WARN_ON(1);
356 	}
357 
358 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
359 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
360 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
361 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
362 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
363 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
364 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
365 			  PACKET3_MAP_QUEUES_ME((me)) |
366 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
367 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
368 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
369 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
370 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
371 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
372 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
373 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
374 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
375 }
376 
377 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
378 				   struct amdgpu_ring *ring,
379 				   enum amdgpu_unmap_queues_action action,
380 				   u64 gpu_addr, u64 seq)
381 {
382 	struct amdgpu_device *adev = kiq_ring->adev;
383 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
384 
385 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
386 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
387 		return;
388 	}
389 
390 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
391 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
392 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
393 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
394 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
395 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
396 	amdgpu_ring_write(kiq_ring,
397 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
398 
399 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
400 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
401 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
402 		amdgpu_ring_write(kiq_ring, seq);
403 	} else {
404 		amdgpu_ring_write(kiq_ring, 0);
405 		amdgpu_ring_write(kiq_ring, 0);
406 		amdgpu_ring_write(kiq_ring, 0);
407 	}
408 }
409 
410 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
411 				   struct amdgpu_ring *ring,
412 				   u64 addr,
413 				   u64 seq)
414 {
415 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
416 
417 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
418 	amdgpu_ring_write(kiq_ring,
419 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
420 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
421 			  PACKET3_QUERY_STATUS_COMMAND(2));
422 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
423 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
424 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
425 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
426 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
427 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
428 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
429 }
430 
431 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
432 				uint16_t pasid, uint32_t flush_type,
433 				bool all_hub)
434 {
435 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
436 }
437 
438 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
439 	.kiq_set_resources = gfx11_kiq_set_resources,
440 	.kiq_map_queues = gfx11_kiq_map_queues,
441 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
442 	.kiq_query_status = gfx11_kiq_query_status,
443 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
444 	.set_resources_size = 8,
445 	.map_queues_size = 7,
446 	.unmap_queues_size = 6,
447 	.query_status_size = 7,
448 	.invalidate_tlbs_size = 2,
449 };
450 
451 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
452 {
453 	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
454 }
455 
456 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
457 {
458 	if (amdgpu_sriov_vf(adev))
459 		return;
460 
461 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
462 	case IP_VERSION(11, 0, 1):
463 	case IP_VERSION(11, 0, 4):
464 		soc15_program_register_sequence(adev,
465 						golden_settings_gc_11_0_1,
466 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
467 		break;
468 	default:
469 		break;
470 	}
471 	soc15_program_register_sequence(adev,
472 					golden_settings_gc_11_0,
473 					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
474 
475 }
476 
477 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
478 				       bool wc, uint32_t reg, uint32_t val)
479 {
480 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
481 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
482 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
483 	amdgpu_ring_write(ring, reg);
484 	amdgpu_ring_write(ring, 0);
485 	amdgpu_ring_write(ring, val);
486 }
487 
488 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
489 				  int mem_space, int opt, uint32_t addr0,
490 				  uint32_t addr1, uint32_t ref, uint32_t mask,
491 				  uint32_t inv)
492 {
493 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
494 	amdgpu_ring_write(ring,
495 			  /* memory (1) or register (0) */
496 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
497 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
498 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
499 			   WAIT_REG_MEM_ENGINE(eng_sel)));
500 
501 	if (mem_space)
502 		BUG_ON(addr0 & 0x3); /* Dword align */
503 	amdgpu_ring_write(ring, addr0);
504 	amdgpu_ring_write(ring, addr1);
505 	amdgpu_ring_write(ring, ref);
506 	amdgpu_ring_write(ring, mask);
507 	amdgpu_ring_write(ring, inv); /* poll interval */
508 }
509 
510 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
511 {
512 	/* Header itself is a NOP packet */
513 	if (num_nop == 1) {
514 		amdgpu_ring_write(ring, ring->funcs->nop);
515 		return;
516 	}
517 
518 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
519 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
520 
521 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
522 	amdgpu_ring_insert_nop(ring, num_nop - 1);
523 }
524 
525 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
526 {
527 	struct amdgpu_device *adev = ring->adev;
528 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
529 	uint32_t tmp = 0;
530 	unsigned i;
531 	int r;
532 
533 	WREG32(scratch, 0xCAFEDEAD);
534 	r = amdgpu_ring_alloc(ring, 5);
535 	if (r) {
536 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
537 			  ring->idx, r);
538 		return r;
539 	}
540 
541 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
542 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
543 	} else {
544 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
545 		amdgpu_ring_write(ring, scratch -
546 				  PACKET3_SET_UCONFIG_REG_START);
547 		amdgpu_ring_write(ring, 0xDEADBEEF);
548 	}
549 	amdgpu_ring_commit(ring);
550 
551 	for (i = 0; i < adev->usec_timeout; i++) {
552 		tmp = RREG32(scratch);
553 		if (tmp == 0xDEADBEEF)
554 			break;
555 		if (amdgpu_emu_mode == 1)
556 			msleep(1);
557 		else
558 			udelay(1);
559 	}
560 
561 	if (i >= adev->usec_timeout)
562 		r = -ETIMEDOUT;
563 	return r;
564 }
565 
566 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
567 {
568 	struct amdgpu_device *adev = ring->adev;
569 	struct amdgpu_ib ib;
570 	struct dma_fence *f = NULL;
571 	unsigned index;
572 	uint64_t gpu_addr;
573 	volatile uint32_t *cpu_ptr;
574 	long r;
575 
576 	/* MES KIQ fw hasn't indirect buffer support for now */
577 	if (adev->enable_mes_kiq &&
578 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
579 		return 0;
580 
581 	memset(&ib, 0, sizeof(ib));
582 
583 	if (ring->is_mes_queue) {
584 		uint32_t padding, offset;
585 
586 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
587 		padding = amdgpu_mes_ctx_get_offs(ring,
588 						  AMDGPU_MES_CTX_PADDING_OFFS);
589 
590 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
591 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
592 
593 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
594 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
595 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
596 	} else {
597 		r = amdgpu_device_wb_get(adev, &index);
598 		if (r)
599 			return r;
600 
601 		gpu_addr = adev->wb.gpu_addr + (index * 4);
602 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
603 		cpu_ptr = &adev->wb.wb[index];
604 
605 		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
606 		if (r) {
607 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
608 			goto err1;
609 		}
610 	}
611 
612 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
613 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
614 	ib.ptr[2] = lower_32_bits(gpu_addr);
615 	ib.ptr[3] = upper_32_bits(gpu_addr);
616 	ib.ptr[4] = 0xDEADBEEF;
617 	ib.length_dw = 5;
618 
619 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
620 	if (r)
621 		goto err2;
622 
623 	r = dma_fence_wait_timeout(f, false, timeout);
624 	if (r == 0) {
625 		r = -ETIMEDOUT;
626 		goto err2;
627 	} else if (r < 0) {
628 		goto err2;
629 	}
630 
631 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
632 		r = 0;
633 	else
634 		r = -EINVAL;
635 err2:
636 	if (!ring->is_mes_queue)
637 		amdgpu_ib_free(&ib, NULL);
638 	dma_fence_put(f);
639 err1:
640 	if (!ring->is_mes_queue)
641 		amdgpu_device_wb_free(adev, index);
642 	return r;
643 }
644 
645 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
646 {
647 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
648 	amdgpu_ucode_release(&adev->gfx.me_fw);
649 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
650 	amdgpu_ucode_release(&adev->gfx.mec_fw);
651 
652 	kfree(adev->gfx.rlc.register_list_format);
653 }
654 
655 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
656 {
657 	const struct psp_firmware_header_v1_0 *toc_hdr;
658 	int err = 0;
659 
660 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
661 				   AMDGPU_UCODE_REQUIRED,
662 				   "amdgpu/%s_toc.bin", ucode_prefix);
663 	if (err)
664 		goto out;
665 
666 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
667 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
668 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
669 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
670 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
671 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
672 	return 0;
673 out:
674 	amdgpu_ucode_release(&adev->psp.toc_fw);
675 	return err;
676 }
677 
678 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
679 {
680 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
681 	case IP_VERSION(11, 0, 0):
682 	case IP_VERSION(11, 0, 2):
683 	case IP_VERSION(11, 0, 3):
684 		if ((adev->gfx.me_fw_version >= 1505) &&
685 		    (adev->gfx.pfp_fw_version >= 1600) &&
686 		    (adev->gfx.mec_fw_version >= 512)) {
687 			if (amdgpu_sriov_vf(adev))
688 				adev->gfx.cp_gfx_shadow = true;
689 			else
690 				adev->gfx.cp_gfx_shadow = false;
691 		}
692 		break;
693 	default:
694 		adev->gfx.cp_gfx_shadow = false;
695 		break;
696 	}
697 }
698 
699 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
700 {
701 	char ucode_prefix[25];
702 	int err;
703 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
704 	uint16_t version_major;
705 	uint16_t version_minor;
706 
707 	DRM_DEBUG("\n");
708 
709 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
710 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
711 				   AMDGPU_UCODE_REQUIRED,
712 				   "amdgpu/%s_pfp.bin", ucode_prefix);
713 	if (err)
714 		goto out;
715 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
716 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
717 				(union amdgpu_firmware_header *)
718 				adev->gfx.pfp_fw->data, 2, 0);
719 	if (adev->gfx.rs64_enable) {
720 		dev_info(adev->dev, "CP RS64 enable\n");
721 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
722 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
723 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
724 	} else {
725 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
726 	}
727 
728 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
729 				   AMDGPU_UCODE_REQUIRED,
730 				   "amdgpu/%s_me.bin", ucode_prefix);
731 	if (err)
732 		goto out;
733 	if (adev->gfx.rs64_enable) {
734 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
735 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
736 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
737 	} else {
738 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
739 	}
740 
741 	if (!amdgpu_sriov_vf(adev)) {
742 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
743 		    adev->pdev->revision == 0xCE)
744 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
745 						   AMDGPU_UCODE_REQUIRED,
746 						   "amdgpu/gc_11_0_0_rlc_1.bin");
747 		else
748 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
749 						   AMDGPU_UCODE_REQUIRED,
750 						   "amdgpu/%s_rlc.bin", ucode_prefix);
751 		if (err)
752 			goto out;
753 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
754 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
755 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
756 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
757 		if (err)
758 			goto out;
759 	}
760 
761 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
762 				   AMDGPU_UCODE_REQUIRED,
763 				   "amdgpu/%s_mec.bin", ucode_prefix);
764 	if (err)
765 		goto out;
766 	if (adev->gfx.rs64_enable) {
767 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
768 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
769 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
770 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
771 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
772 	} else {
773 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
774 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
775 	}
776 
777 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
778 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
779 
780 	/* only one MEC for gfx 11.0.0. */
781 	adev->gfx.mec2_fw = NULL;
782 
783 	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
784 
785 	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
786 		err = adev->gfx.imu.funcs->init_microcode(adev);
787 		if (err)
788 			DRM_ERROR("Failed to init imu firmware!\n");
789 		return err;
790 	}
791 
792 out:
793 	if (err) {
794 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
795 		amdgpu_ucode_release(&adev->gfx.me_fw);
796 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
797 		amdgpu_ucode_release(&adev->gfx.mec_fw);
798 	}
799 
800 	return err;
801 }
802 
803 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
804 {
805 	u32 count = 0;
806 	const struct cs_section_def *sect = NULL;
807 	const struct cs_extent_def *ext = NULL;
808 
809 	/* begin clear state */
810 	count += 2;
811 	/* context control state */
812 	count += 3;
813 
814 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
815 		for (ext = sect->section; ext->extent != NULL; ++ext) {
816 			if (sect->id == SECT_CONTEXT)
817 				count += 2 + ext->reg_count;
818 			else
819 				return 0;
820 		}
821 	}
822 
823 	/* set PA_SC_TILE_STEERING_OVERRIDE */
824 	count += 3;
825 	/* end clear state */
826 	count += 2;
827 	/* clear state */
828 	count += 2;
829 
830 	return count;
831 }
832 
833 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
834 				    volatile u32 *buffer)
835 {
836 	u32 count = 0, i;
837 	const struct cs_section_def *sect = NULL;
838 	const struct cs_extent_def *ext = NULL;
839 	int ctx_reg_offset;
840 
841 	if (adev->gfx.rlc.cs_data == NULL)
842 		return;
843 	if (buffer == NULL)
844 		return;
845 
846 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
847 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
848 
849 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
850 	buffer[count++] = cpu_to_le32(0x80000000);
851 	buffer[count++] = cpu_to_le32(0x80000000);
852 
853 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
854 		for (ext = sect->section; ext->extent != NULL; ++ext) {
855 			if (sect->id == SECT_CONTEXT) {
856 				buffer[count++] =
857 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
858 				buffer[count++] = cpu_to_le32(ext->reg_index -
859 						PACKET3_SET_CONTEXT_REG_START);
860 				for (i = 0; i < ext->reg_count; i++)
861 					buffer[count++] = cpu_to_le32(ext->extent[i]);
862 			}
863 		}
864 	}
865 
866 	ctx_reg_offset =
867 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
868 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
869 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
870 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
871 
872 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
873 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
874 
875 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
876 	buffer[count++] = cpu_to_le32(0);
877 }
878 
879 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
880 {
881 	/* clear state block */
882 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
883 			&adev->gfx.rlc.clear_state_gpu_addr,
884 			(void **)&adev->gfx.rlc.cs_ptr);
885 
886 	/* jump table block */
887 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
888 			&adev->gfx.rlc.cp_table_gpu_addr,
889 			(void **)&adev->gfx.rlc.cp_table_ptr);
890 }
891 
892 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
893 {
894 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
895 
896 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
897 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
898 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
899 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
900 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
901 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
902 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
903 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
904 	adev->gfx.rlc.rlcg_reg_access_supported = true;
905 }
906 
907 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
908 {
909 	const struct cs_section_def *cs_data;
910 	int r;
911 
912 	adev->gfx.rlc.cs_data = gfx11_cs_data;
913 
914 	cs_data = adev->gfx.rlc.cs_data;
915 
916 	if (cs_data) {
917 		/* init clear state block */
918 		r = amdgpu_gfx_rlc_init_csb(adev);
919 		if (r)
920 			return r;
921 	}
922 
923 	/* init spm vmid with 0xf */
924 	if (adev->gfx.rlc.funcs->update_spm_vmid)
925 		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
926 
927 	return 0;
928 }
929 
930 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
931 {
932 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
933 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
934 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
935 }
936 
937 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
938 {
939 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
940 
941 	amdgpu_gfx_graphics_queue_acquire(adev);
942 }
943 
944 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
945 {
946 	int r;
947 	u32 *hpd;
948 	size_t mec_hpd_size;
949 
950 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
951 
952 	/* take ownership of the relevant compute queues */
953 	amdgpu_gfx_compute_queue_acquire(adev);
954 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
955 
956 	if (mec_hpd_size) {
957 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
958 					      AMDGPU_GEM_DOMAIN_GTT,
959 					      &adev->gfx.mec.hpd_eop_obj,
960 					      &adev->gfx.mec.hpd_eop_gpu_addr,
961 					      (void **)&hpd);
962 		if (r) {
963 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
964 			gfx_v11_0_mec_fini(adev);
965 			return r;
966 		}
967 
968 		memset(hpd, 0, mec_hpd_size);
969 
970 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
971 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
972 	}
973 
974 	return 0;
975 }
976 
977 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
978 {
979 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
980 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
981 		(address << SQ_IND_INDEX__INDEX__SHIFT));
982 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
983 }
984 
985 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
986 			   uint32_t thread, uint32_t regno,
987 			   uint32_t num, uint32_t *out)
988 {
989 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
990 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
991 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
992 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
993 		(SQ_IND_INDEX__AUTO_INCR_MASK));
994 	while (num--)
995 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
996 }
997 
998 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
999 {
1000 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
1001 	 * field when performing a select_se_sh so it should be
1002 	 * zero here */
1003 	WARN_ON(simd != 0);
1004 
1005 	/* type 3 wave data */
1006 	dst[(*no_fields)++] = 3;
1007 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
1008 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
1009 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
1010 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
1011 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
1012 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
1013 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
1014 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
1015 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
1016 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
1017 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
1018 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
1019 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
1020 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
1021 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
1022 }
1023 
1024 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1025 				     uint32_t wave, uint32_t start,
1026 				     uint32_t size, uint32_t *dst)
1027 {
1028 	WARN_ON(simd != 0);
1029 
1030 	wave_read_regs(
1031 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
1032 		dst);
1033 }
1034 
1035 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1036 				      uint32_t wave, uint32_t thread,
1037 				      uint32_t start, uint32_t size,
1038 				      uint32_t *dst)
1039 {
1040 	wave_read_regs(
1041 		adev, wave, thread,
1042 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1043 }
1044 
1045 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
1046 					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1047 {
1048 	soc21_grbm_select(adev, me, pipe, q, vm);
1049 }
1050 
1051 /* all sizes are in bytes */
1052 #define MQD_SHADOW_BASE_SIZE      73728
1053 #define MQD_SHADOW_BASE_ALIGNMENT 256
1054 #define MQD_FWWORKAREA_SIZE       484
1055 #define MQD_FWWORKAREA_ALIGNMENT  256
1056 
1057 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
1058 					 struct amdgpu_gfx_shadow_info *shadow_info)
1059 {
1060 	if (adev->gfx.cp_gfx_shadow) {
1061 		shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
1062 		shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
1063 		shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
1064 		shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
1065 		return 0;
1066 	} else {
1067 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
1068 		return -ENOTSUPP;
1069 	}
1070 }
1071 
1072 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
1073 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
1074 	.select_se_sh = &gfx_v11_0_select_se_sh,
1075 	.read_wave_data = &gfx_v11_0_read_wave_data,
1076 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
1077 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
1078 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1079 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1080 	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
1081 };
1082 
1083 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1084 {
1085 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1086 	case IP_VERSION(11, 0, 0):
1087 	case IP_VERSION(11, 0, 2):
1088 		adev->gfx.config.max_hw_contexts = 8;
1089 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1090 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1091 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1092 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1093 		break;
1094 	case IP_VERSION(11, 0, 3):
1095 		adev->gfx.ras = &gfx_v11_0_3_ras;
1096 		adev->gfx.config.max_hw_contexts = 8;
1097 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1098 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1099 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1100 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1101 		break;
1102 	case IP_VERSION(11, 0, 1):
1103 	case IP_VERSION(11, 0, 4):
1104 	case IP_VERSION(11, 5, 0):
1105 	case IP_VERSION(11, 5, 1):
1106 	case IP_VERSION(11, 5, 2):
1107 	case IP_VERSION(11, 5, 3):
1108 		adev->gfx.config.max_hw_contexts = 8;
1109 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1110 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1111 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1112 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1113 		break;
1114 	default:
1115 		BUG();
1116 		break;
1117 	}
1118 
1119 	return 0;
1120 }
1121 
1122 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1123 				   int me, int pipe, int queue)
1124 {
1125 	struct amdgpu_ring *ring;
1126 	unsigned int irq_type;
1127 	unsigned int hw_prio;
1128 
1129 	ring = &adev->gfx.gfx_ring[ring_id];
1130 
1131 	ring->me = me;
1132 	ring->pipe = pipe;
1133 	ring->queue = queue;
1134 
1135 	ring->ring_obj = NULL;
1136 	ring->use_doorbell = true;
1137 
1138 	if (!ring_id)
1139 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1140 	else
1141 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1142 	ring->vm_hub = AMDGPU_GFXHUB(0);
1143 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1144 
1145 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1146 	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
1147 		AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1148 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1149 				hw_prio, NULL);
1150 }
1151 
1152 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1153 				       int mec, int pipe, int queue)
1154 {
1155 	int r;
1156 	unsigned irq_type;
1157 	struct amdgpu_ring *ring;
1158 	unsigned int hw_prio;
1159 
1160 	ring = &adev->gfx.compute_ring[ring_id];
1161 
1162 	/* mec0 is me1 */
1163 	ring->me = mec + 1;
1164 	ring->pipe = pipe;
1165 	ring->queue = queue;
1166 
1167 	ring->ring_obj = NULL;
1168 	ring->use_doorbell = true;
1169 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1170 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1171 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1172 	ring->vm_hub = AMDGPU_GFXHUB(0);
1173 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1174 
1175 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1176 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1177 		+ ring->pipe;
1178 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1179 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1180 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1181 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1182 			     hw_prio, NULL);
1183 	if (r)
1184 		return r;
1185 
1186 	return 0;
1187 }
1188 
1189 static struct {
1190 	SOC21_FIRMWARE_ID	id;
1191 	unsigned int		offset;
1192 	unsigned int		size;
1193 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1194 
1195 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1196 {
1197 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1198 
1199 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1200 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1201 		rlc_autoload_info[ucode->id].id = ucode->id;
1202 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1203 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1204 
1205 		ucode++;
1206 	}
1207 }
1208 
1209 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1210 {
1211 	uint32_t total_size = 0;
1212 	SOC21_FIRMWARE_ID id;
1213 
1214 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1215 
1216 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1217 		total_size += rlc_autoload_info[id].size;
1218 
1219 	/* In case the offset in rlc toc ucode is aligned */
1220 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1221 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1222 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1223 
1224 	return total_size;
1225 }
1226 
1227 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1228 {
1229 	int r;
1230 	uint32_t total_size;
1231 
1232 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1233 
1234 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1235 				      AMDGPU_GEM_DOMAIN_VRAM |
1236 				      AMDGPU_GEM_DOMAIN_GTT,
1237 				      &adev->gfx.rlc.rlc_autoload_bo,
1238 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
1239 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1240 
1241 	if (r) {
1242 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1243 		return r;
1244 	}
1245 
1246 	return 0;
1247 }
1248 
1249 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1250 					      SOC21_FIRMWARE_ID id,
1251 			    		      const void *fw_data,
1252 					      uint32_t fw_size,
1253 					      uint32_t *fw_autoload_mask)
1254 {
1255 	uint32_t toc_offset;
1256 	uint32_t toc_fw_size;
1257 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1258 
1259 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1260 		return;
1261 
1262 	toc_offset = rlc_autoload_info[id].offset;
1263 	toc_fw_size = rlc_autoload_info[id].size;
1264 
1265 	if (fw_size == 0)
1266 		fw_size = toc_fw_size;
1267 
1268 	if (fw_size > toc_fw_size)
1269 		fw_size = toc_fw_size;
1270 
1271 	memcpy(ptr + toc_offset, fw_data, fw_size);
1272 
1273 	if (fw_size < toc_fw_size)
1274 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1275 
1276 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1277 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1278 }
1279 
1280 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1281 							uint32_t *fw_autoload_mask)
1282 {
1283 	void *data;
1284 	uint32_t size;
1285 	uint64_t *toc_ptr;
1286 
1287 	*(uint64_t *)fw_autoload_mask |= 0x1;
1288 
1289 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1290 
1291 	data = adev->psp.toc.start_addr;
1292 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1293 
1294 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1295 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1296 
1297 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1298 					data, size, fw_autoload_mask);
1299 }
1300 
1301 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1302 							uint32_t *fw_autoload_mask)
1303 {
1304 	const __le32 *fw_data;
1305 	uint32_t fw_size;
1306 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1307 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1308 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1309 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1310 	uint16_t version_major, version_minor;
1311 
1312 	if (adev->gfx.rs64_enable) {
1313 		/* pfp ucode */
1314 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1315 			adev->gfx.pfp_fw->data;
1316 		/* instruction */
1317 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1318 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1319 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1320 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1321 						fw_data, fw_size, fw_autoload_mask);
1322 		/* data */
1323 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1324 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1325 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1326 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1327 						fw_data, fw_size, fw_autoload_mask);
1328 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1329 						fw_data, fw_size, fw_autoload_mask);
1330 		/* me ucode */
1331 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1332 			adev->gfx.me_fw->data;
1333 		/* instruction */
1334 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1335 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1336 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1337 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1338 						fw_data, fw_size, fw_autoload_mask);
1339 		/* data */
1340 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1341 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1342 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1343 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1344 						fw_data, fw_size, fw_autoload_mask);
1345 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1346 						fw_data, fw_size, fw_autoload_mask);
1347 		/* mec ucode */
1348 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1349 			adev->gfx.mec_fw->data;
1350 		/* instruction */
1351 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1352 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1353 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1354 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1355 						fw_data, fw_size, fw_autoload_mask);
1356 		/* data */
1357 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1358 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1359 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1360 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1361 						fw_data, fw_size, fw_autoload_mask);
1362 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1363 						fw_data, fw_size, fw_autoload_mask);
1364 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1365 						fw_data, fw_size, fw_autoload_mask);
1366 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1367 						fw_data, fw_size, fw_autoload_mask);
1368 	} else {
1369 		/* pfp ucode */
1370 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1371 			adev->gfx.pfp_fw->data;
1372 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1373 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1374 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1375 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1376 						fw_data, fw_size, fw_autoload_mask);
1377 
1378 		/* me ucode */
1379 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1380 			adev->gfx.me_fw->data;
1381 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1382 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1383 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1384 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1385 						fw_data, fw_size, fw_autoload_mask);
1386 
1387 		/* mec ucode */
1388 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1389 			adev->gfx.mec_fw->data;
1390 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1391 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1392 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1393 			cp_hdr->jt_size * 4;
1394 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1395 						fw_data, fw_size, fw_autoload_mask);
1396 	}
1397 
1398 	/* rlc ucode */
1399 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1400 		adev->gfx.rlc_fw->data;
1401 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1402 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1403 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1404 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1405 					fw_data, fw_size, fw_autoload_mask);
1406 
1407 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1408 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1409 	if (version_major == 2) {
1410 		if (version_minor >= 2) {
1411 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1412 
1413 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1414 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1415 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1416 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1417 					fw_data, fw_size, fw_autoload_mask);
1418 
1419 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1420 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1421 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1422 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1423 					fw_data, fw_size, fw_autoload_mask);
1424 		}
1425 	}
1426 }
1427 
1428 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1429 							uint32_t *fw_autoload_mask)
1430 {
1431 	const __le32 *fw_data;
1432 	uint32_t fw_size;
1433 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1434 
1435 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1436 		adev->sdma.instance[0].fw->data;
1437 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1438 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1439 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1440 
1441 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1442 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1443 
1444 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1445 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1446 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1447 
1448 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1449 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1450 }
1451 
1452 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1453 							uint32_t *fw_autoload_mask)
1454 {
1455 	const __le32 *fw_data;
1456 	unsigned fw_size;
1457 	const struct mes_firmware_header_v1_0 *mes_hdr;
1458 	int pipe, ucode_id, data_id;
1459 
1460 	for (pipe = 0; pipe < 2; pipe++) {
1461 		if (pipe==0) {
1462 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1463 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1464 		} else {
1465 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1466 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1467 		}
1468 
1469 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1470 			adev->mes.fw[pipe]->data;
1471 
1472 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1473 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1474 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1475 
1476 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1477 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1478 
1479 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1480 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1481 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1482 
1483 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1484 				data_id, fw_data, fw_size, fw_autoload_mask);
1485 	}
1486 }
1487 
1488 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1489 {
1490 	uint32_t rlc_g_offset, rlc_g_size;
1491 	uint64_t gpu_addr;
1492 	uint32_t autoload_fw_id[2];
1493 
1494 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1495 
1496 	/* RLC autoload sequence 2: copy ucode */
1497 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1498 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1499 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1500 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1501 
1502 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1503 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1504 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1505 
1506 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1507 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1508 
1509 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1510 
1511 	/* RLC autoload sequence 3: load IMU fw */
1512 	if (adev->gfx.imu.funcs->load_microcode)
1513 		adev->gfx.imu.funcs->load_microcode(adev);
1514 	/* RLC autoload sequence 4 init IMU fw */
1515 	if (adev->gfx.imu.funcs->setup_imu)
1516 		adev->gfx.imu.funcs->setup_imu(adev);
1517 	if (adev->gfx.imu.funcs->start_imu)
1518 		adev->gfx.imu.funcs->start_imu(adev);
1519 
1520 	/* RLC autoload sequence 5 disable gpa mode */
1521 	gfx_v11_0_disable_gpa_mode(adev);
1522 
1523 	return 0;
1524 }
1525 
1526 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
1527 {
1528 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
1529 	uint32_t *ptr;
1530 	uint32_t inst;
1531 
1532 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1533 	if (!ptr) {
1534 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1535 		adev->gfx.ip_dump_core = NULL;
1536 	} else {
1537 		adev->gfx.ip_dump_core = ptr;
1538 	}
1539 
1540 	/* Allocate memory for compute queue registers for all the instances */
1541 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
1542 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1543 		adev->gfx.mec.num_queue_per_pipe;
1544 
1545 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1546 	if (!ptr) {
1547 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1548 		adev->gfx.ip_dump_compute_queues = NULL;
1549 	} else {
1550 		adev->gfx.ip_dump_compute_queues = ptr;
1551 	}
1552 
1553 	/* Allocate memory for gfx queue registers for all the instances */
1554 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
1555 	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1556 		adev->gfx.me.num_queue_per_pipe;
1557 
1558 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1559 	if (!ptr) {
1560 		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1561 		adev->gfx.ip_dump_gfx_queues = NULL;
1562 	} else {
1563 		adev->gfx.ip_dump_gfx_queues = ptr;
1564 	}
1565 }
1566 
1567 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
1568 {
1569 	int i, j, k, r, ring_id = 0;
1570 	int xcc_id = 0;
1571 	struct amdgpu_device *adev = ip_block->adev;
1572 	int num_queue_per_pipe = 1; /* we only enable 1 KGQ per pipe */
1573 
1574 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
1575 
1576 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1577 	case IP_VERSION(11, 0, 0):
1578 	case IP_VERSION(11, 0, 2):
1579 	case IP_VERSION(11, 0, 3):
1580 		adev->gfx.me.num_me = 1;
1581 		adev->gfx.me.num_pipe_per_me = 1;
1582 		adev->gfx.me.num_queue_per_pipe = 2;
1583 		adev->gfx.mec.num_mec = 1;
1584 		adev->gfx.mec.num_pipe_per_mec = 4;
1585 		adev->gfx.mec.num_queue_per_pipe = 4;
1586 		break;
1587 	case IP_VERSION(11, 0, 1):
1588 	case IP_VERSION(11, 0, 4):
1589 	case IP_VERSION(11, 5, 0):
1590 	case IP_VERSION(11, 5, 1):
1591 	case IP_VERSION(11, 5, 2):
1592 	case IP_VERSION(11, 5, 3):
1593 		adev->gfx.me.num_me = 1;
1594 		adev->gfx.me.num_pipe_per_me = 1;
1595 		adev->gfx.me.num_queue_per_pipe = 2;
1596 		adev->gfx.mec.num_mec = 1;
1597 		adev->gfx.mec.num_pipe_per_mec = 4;
1598 		adev->gfx.mec.num_queue_per_pipe = 4;
1599 		break;
1600 	default:
1601 		adev->gfx.me.num_me = 1;
1602 		adev->gfx.me.num_pipe_per_me = 1;
1603 		adev->gfx.me.num_queue_per_pipe = 1;
1604 		adev->gfx.mec.num_mec = 1;
1605 		adev->gfx.mec.num_pipe_per_mec = 4;
1606 		adev->gfx.mec.num_queue_per_pipe = 8;
1607 		break;
1608 	}
1609 
1610 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1611 	case IP_VERSION(11, 0, 0):
1612 	case IP_VERSION(11, 0, 2):
1613 	case IP_VERSION(11, 0, 3):
1614 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1615 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1616 		if (adev->gfx.me_fw_version  >= 2280 &&
1617 		    adev->gfx.pfp_fw_version >= 2370 &&
1618 		    adev->gfx.mec_fw_version >= 2450  &&
1619 		    adev->mes.fw_version[0] >= 99) {
1620 			adev->gfx.enable_cleaner_shader = true;
1621 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1622 			if (r) {
1623 				adev->gfx.enable_cleaner_shader = false;
1624 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1625 			}
1626 		}
1627 		break;
1628 	case IP_VERSION(11, 5, 0):
1629 	case IP_VERSION(11, 5, 1):
1630 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1631 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1632 		if (adev->gfx.mec_fw_version >= 26 &&
1633 		    adev->mes.fw_version[0] >= 114) {
1634 			adev->gfx.enable_cleaner_shader = true;
1635 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1636 			if (r) {
1637 				adev->gfx.enable_cleaner_shader = false;
1638 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1639 			}
1640 		}
1641 		break;
1642 	default:
1643 		adev->gfx.enable_cleaner_shader = false;
1644 		break;
1645 	}
1646 
1647 	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1648 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1649 	    amdgpu_sriov_is_pp_one_vf(adev))
1650 		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1651 
1652 	/* EOP Event */
1653 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1654 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1655 			      &adev->gfx.eop_irq);
1656 	if (r)
1657 		return r;
1658 
1659 	/* Bad opcode Event */
1660 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1661 			      GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1662 			      &adev->gfx.bad_op_irq);
1663 	if (r)
1664 		return r;
1665 
1666 	/* Privileged reg */
1667 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1668 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1669 			      &adev->gfx.priv_reg_irq);
1670 	if (r)
1671 		return r;
1672 
1673 	/* Privileged inst */
1674 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1675 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1676 			      &adev->gfx.priv_inst_irq);
1677 	if (r)
1678 		return r;
1679 
1680 	/* FED error */
1681 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1682 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1683 				  &adev->gfx.rlc_gc_fed_irq);
1684 	if (r)
1685 		return r;
1686 
1687 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1688 
1689 	gfx_v11_0_me_init(adev);
1690 
1691 	r = gfx_v11_0_rlc_init(adev);
1692 	if (r) {
1693 		DRM_ERROR("Failed to init rlc BOs!\n");
1694 		return r;
1695 	}
1696 
1697 	r = gfx_v11_0_mec_init(adev);
1698 	if (r) {
1699 		DRM_ERROR("Failed to init MEC BOs!\n");
1700 		return r;
1701 	}
1702 
1703 	/* set up the gfx ring */
1704 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1705 		for (j = 0; j < num_queue_per_pipe; j++) {
1706 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1707 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1708 					continue;
1709 
1710 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1711 							    i, k, j);
1712 				if (r)
1713 					return r;
1714 				ring_id++;
1715 			}
1716 		}
1717 	}
1718 
1719 	ring_id = 0;
1720 	/* set up the compute queues - allocate horizontally across pipes */
1721 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1722 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1723 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1724 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1725 								     k, j))
1726 					continue;
1727 
1728 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1729 								i, k, j);
1730 				if (r)
1731 					return r;
1732 
1733 				ring_id++;
1734 			}
1735 		}
1736 	}
1737 
1738 	adev->gfx.gfx_supported_reset =
1739 		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
1740 	adev->gfx.compute_supported_reset =
1741 		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
1742 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1743 	case IP_VERSION(11, 0, 0):
1744 	case IP_VERSION(11, 0, 2):
1745 	case IP_VERSION(11, 0, 3):
1746 		if ((adev->gfx.me_fw_version >= 2280) &&
1747 			    (adev->gfx.mec_fw_version >= 2410)) {
1748 				adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1749 				adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1750 		}
1751 		break;
1752 	default:
1753 		break;
1754 	}
1755 
1756 	if (!adev->enable_mes_kiq) {
1757 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1758 		if (r) {
1759 			DRM_ERROR("Failed to init KIQ BOs!\n");
1760 			return r;
1761 		}
1762 
1763 		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1764 		if (r)
1765 			return r;
1766 	}
1767 
1768 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1769 	if (r)
1770 		return r;
1771 
1772 	/* allocate visible FB for rlc auto-loading fw */
1773 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1774 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1775 		if (r)
1776 			return r;
1777 	}
1778 
1779 	r = gfx_v11_0_gpu_early_init(adev);
1780 	if (r)
1781 		return r;
1782 
1783 	if (amdgpu_gfx_ras_sw_init(adev)) {
1784 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1785 		return -EINVAL;
1786 	}
1787 
1788 	gfx_v11_0_alloc_ip_dump(adev);
1789 
1790 	r = amdgpu_gfx_sysfs_init(adev);
1791 	if (r)
1792 		return r;
1793 
1794 	return 0;
1795 }
1796 
1797 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1798 {
1799 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1800 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1801 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1802 
1803 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1804 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1805 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1806 }
1807 
1808 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1809 {
1810 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1811 			      &adev->gfx.me.me_fw_gpu_addr,
1812 			      (void **)&adev->gfx.me.me_fw_ptr);
1813 
1814 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1815 			       &adev->gfx.me.me_fw_data_gpu_addr,
1816 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1817 }
1818 
1819 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1820 {
1821 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1822 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1823 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1824 }
1825 
1826 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
1827 {
1828 	int i;
1829 	struct amdgpu_device *adev = ip_block->adev;
1830 
1831 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1832 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1833 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1834 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1835 
1836 	amdgpu_gfx_mqd_sw_fini(adev, 0);
1837 
1838 	if (!adev->enable_mes_kiq) {
1839 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1840 		amdgpu_gfx_kiq_fini(adev, 0);
1841 	}
1842 
1843 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
1844 
1845 	gfx_v11_0_pfp_fini(adev);
1846 	gfx_v11_0_me_fini(adev);
1847 	gfx_v11_0_rlc_fini(adev);
1848 	gfx_v11_0_mec_fini(adev);
1849 
1850 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1851 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1852 
1853 	gfx_v11_0_free_microcode(adev);
1854 
1855 	amdgpu_gfx_sysfs_fini(adev);
1856 
1857 	kfree(adev->gfx.ip_dump_core);
1858 	kfree(adev->gfx.ip_dump_compute_queues);
1859 	kfree(adev->gfx.ip_dump_gfx_queues);
1860 
1861 	return 0;
1862 }
1863 
1864 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1865 				   u32 sh_num, u32 instance, int xcc_id)
1866 {
1867 	u32 data;
1868 
1869 	if (instance == 0xffffffff)
1870 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1871 				     INSTANCE_BROADCAST_WRITES, 1);
1872 	else
1873 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1874 				     instance);
1875 
1876 	if (se_num == 0xffffffff)
1877 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1878 				     1);
1879 	else
1880 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1881 
1882 	if (sh_num == 0xffffffff)
1883 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1884 				     1);
1885 	else
1886 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1887 
1888 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1889 }
1890 
1891 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1892 {
1893 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1894 
1895 	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
1896 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1897 					   CC_GC_SA_UNIT_DISABLE,
1898 					   SA_DISABLE);
1899 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
1900 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1901 						 GC_USER_SA_UNIT_DISABLE,
1902 						 SA_DISABLE);
1903 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1904 					    adev->gfx.config.max_shader_engines);
1905 
1906 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1907 }
1908 
1909 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1910 {
1911 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1912 	u32 rb_mask;
1913 
1914 	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1915 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1916 					    CC_RB_BACKEND_DISABLE,
1917 					    BACKEND_DISABLE);
1918 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1919 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1920 						 GC_USER_RB_BACKEND_DISABLE,
1921 						 BACKEND_DISABLE);
1922 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1923 					    adev->gfx.config.max_shader_engines);
1924 
1925 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1926 }
1927 
1928 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1929 {
1930 	u32 rb_bitmap_per_sa;
1931 	u32 rb_bitmap_width_per_sa;
1932 	u32 max_sa;
1933 	u32 active_sa_bitmap;
1934 	u32 global_active_rb_bitmap;
1935 	u32 active_rb_bitmap = 0;
1936 	u32 i;
1937 
1938 	/* query sa bitmap from SA_UNIT_DISABLE registers */
1939 	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
1940 	/* query rb bitmap from RB_BACKEND_DISABLE registers */
1941 	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
1942 
1943 	/* generate active rb bitmap according to active sa bitmap */
1944 	max_sa = adev->gfx.config.max_shader_engines *
1945 		 adev->gfx.config.max_sh_per_se;
1946 	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1947 				 adev->gfx.config.max_sh_per_se;
1948 	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
1949 
1950 	for (i = 0; i < max_sa; i++) {
1951 		if (active_sa_bitmap & (1 << i))
1952 			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
1953 	}
1954 
1955 	active_rb_bitmap &= global_active_rb_bitmap;
1956 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1957 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1958 }
1959 
1960 #define DEFAULT_SH_MEM_BASES	(0x6000)
1961 #define LDS_APP_BASE           0x1
1962 #define SCRATCH_APP_BASE       0x2
1963 
1964 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1965 {
1966 	int i;
1967 	uint32_t sh_mem_bases;
1968 	uint32_t data;
1969 
1970 	/*
1971 	 * Configure apertures:
1972 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1973 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1974 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1975 	 */
1976 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1977 			SCRATCH_APP_BASE;
1978 
1979 	mutex_lock(&adev->srbm_mutex);
1980 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1981 		soc21_grbm_select(adev, 0, 0, 0, i);
1982 		/* CP and shaders */
1983 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1984 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1985 
1986 		/* Enable trap for each kfd vmid. */
1987 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1988 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1989 		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1990 	}
1991 	soc21_grbm_select(adev, 0, 0, 0, 0);
1992 	mutex_unlock(&adev->srbm_mutex);
1993 
1994 	/*
1995 	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
1996 	 * access. These should be enabled by FW for target VMIDs.
1997 	 */
1998 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1999 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
2000 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
2001 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
2002 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
2003 	}
2004 }
2005 
2006 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
2007 {
2008 	int vmid;
2009 
2010 	/*
2011 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2012 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2013 	 * the driver can enable them for graphics. VMID0 should maintain
2014 	 * access so that HWS firmware can save/restore entries.
2015 	 */
2016 	for (vmid = 1; vmid < 16; vmid++) {
2017 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
2018 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
2019 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
2020 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
2021 	}
2022 }
2023 
2024 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
2025 {
2026 	/* TODO: harvest feature to be added later. */
2027 }
2028 
2029 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
2030 {
2031 	/* TCCs are global (not instanced). */
2032 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
2033 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
2034 
2035 	adev->gfx.config.tcc_disabled_mask =
2036 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
2037 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
2038 }
2039 
2040 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
2041 {
2042 	u32 tmp;
2043 	int i;
2044 
2045 	if (!amdgpu_sriov_vf(adev))
2046 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2047 
2048 	gfx_v11_0_setup_rb(adev);
2049 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
2050 	gfx_v11_0_get_tcc_info(adev);
2051 	adev->gfx.config.pa_sc_tile_steering_override = 0;
2052 
2053 	/* Set whether texture coordinate truncation is conformant. */
2054 	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
2055 	adev->gfx.config.ta_cntl2_truncate_coord_mode =
2056 		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
2057 
2058 	/* XXX SH_MEM regs */
2059 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2060 	mutex_lock(&adev->srbm_mutex);
2061 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2062 		soc21_grbm_select(adev, 0, 0, 0, i);
2063 		/* CP and shaders */
2064 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2065 		if (i != 0) {
2066 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2067 				(adev->gmc.private_aperture_start >> 48));
2068 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2069 				(adev->gmc.shared_aperture_start >> 48));
2070 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
2071 		}
2072 	}
2073 	soc21_grbm_select(adev, 0, 0, 0, 0);
2074 
2075 	mutex_unlock(&adev->srbm_mutex);
2076 
2077 	gfx_v11_0_init_compute_vmid(adev);
2078 	gfx_v11_0_init_gds_vmid(adev);
2079 }
2080 
2081 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
2082 				      int me, int pipe)
2083 {
2084 	if (me != 0)
2085 		return 0;
2086 
2087 	switch (pipe) {
2088 	case 0:
2089 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
2090 	case 1:
2091 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
2092 	default:
2093 		return 0;
2094 	}
2095 }
2096 
2097 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
2098 				      int me, int pipe)
2099 {
2100 	/*
2101 	 * amdgpu controls only the first MEC. That's why this function only
2102 	 * handles the setting of interrupts for this specific MEC. All other
2103 	 * pipes' interrupts are set by amdkfd.
2104 	 */
2105 	if (me != 1)
2106 		return 0;
2107 
2108 	switch (pipe) {
2109 	case 0:
2110 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
2111 	case 1:
2112 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
2113 	case 2:
2114 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
2115 	case 3:
2116 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
2117 	default:
2118 		return 0;
2119 	}
2120 }
2121 
2122 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2123 					       bool enable)
2124 {
2125 	u32 tmp, cp_int_cntl_reg;
2126 	int i, j;
2127 
2128 	if (amdgpu_sriov_vf(adev))
2129 		return;
2130 
2131 	for (i = 0; i < adev->gfx.me.num_me; i++) {
2132 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
2133 			cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
2134 
2135 			if (cp_int_cntl_reg) {
2136 				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
2137 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
2138 						    enable ? 1 : 0);
2139 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
2140 						    enable ? 1 : 0);
2141 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
2142 						    enable ? 1 : 0);
2143 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
2144 						    enable ? 1 : 0);
2145 				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
2146 			}
2147 		}
2148 	}
2149 }
2150 
2151 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
2152 {
2153 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2154 
2155 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
2156 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2157 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
2158 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2159 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
2160 
2161 	return 0;
2162 }
2163 
2164 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
2165 {
2166 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
2167 
2168 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2169 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
2170 }
2171 
2172 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
2173 {
2174 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2175 	udelay(50);
2176 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2177 	udelay(50);
2178 }
2179 
2180 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
2181 					     bool enable)
2182 {
2183 	uint32_t rlc_pg_cntl;
2184 
2185 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
2186 
2187 	if (!enable) {
2188 		/* RLC_PG_CNTL[23] = 0 (default)
2189 		 * RLC will wait for handshake acks with SMU
2190 		 * GFXOFF will be enabled
2191 		 * RLC_PG_CNTL[23] = 1
2192 		 * RLC will not issue any message to SMU
2193 		 * hence no handshake between SMU & RLC
2194 		 * GFXOFF will be disabled
2195 		 */
2196 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2197 	} else
2198 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2199 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
2200 }
2201 
2202 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
2203 {
2204 	/* TODO: enable rlc & smu handshake until smu
2205 	 * and gfxoff feature works as expected */
2206 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
2207 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
2208 
2209 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2210 	udelay(50);
2211 }
2212 
2213 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
2214 {
2215 	uint32_t tmp;
2216 
2217 	/* enable Save Restore Machine */
2218 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
2219 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2220 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
2221 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
2222 }
2223 
2224 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
2225 {
2226 	const struct rlc_firmware_header_v2_0 *hdr;
2227 	const __le32 *fw_data;
2228 	unsigned i, fw_size;
2229 
2230 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2231 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2232 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2233 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2234 
2235 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
2236 		     RLCG_UCODE_LOADING_START_ADDRESS);
2237 
2238 	for (i = 0; i < fw_size; i++)
2239 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
2240 			     le32_to_cpup(fw_data++));
2241 
2242 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2243 }
2244 
2245 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
2246 {
2247 	const struct rlc_firmware_header_v2_2 *hdr;
2248 	const __le32 *fw_data;
2249 	unsigned i, fw_size;
2250 	u32 tmp;
2251 
2252 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
2253 
2254 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2255 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
2256 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
2257 
2258 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
2259 
2260 	for (i = 0; i < fw_size; i++) {
2261 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2262 			msleep(1);
2263 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
2264 				le32_to_cpup(fw_data++));
2265 	}
2266 
2267 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2268 
2269 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2270 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
2271 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
2272 
2273 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
2274 	for (i = 0; i < fw_size; i++) {
2275 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2276 			msleep(1);
2277 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
2278 				le32_to_cpup(fw_data++));
2279 	}
2280 
2281 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2282 
2283 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
2284 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
2285 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
2286 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
2287 }
2288 
2289 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
2290 {
2291 	const struct rlc_firmware_header_v2_3 *hdr;
2292 	const __le32 *fw_data;
2293 	unsigned i, fw_size;
2294 	u32 tmp;
2295 
2296 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
2297 
2298 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2299 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
2300 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
2301 
2302 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
2303 
2304 	for (i = 0; i < fw_size; i++) {
2305 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2306 			msleep(1);
2307 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
2308 				le32_to_cpup(fw_data++));
2309 	}
2310 
2311 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2312 
2313 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2314 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2315 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2316 
2317 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2318 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2319 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2320 
2321 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2322 
2323 	for (i = 0; i < fw_size; i++) {
2324 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2325 			msleep(1);
2326 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2327 				le32_to_cpup(fw_data++));
2328 	}
2329 
2330 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2331 
2332 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2333 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2334 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2335 }
2336 
2337 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2338 {
2339 	const struct rlc_firmware_header_v2_0 *hdr;
2340 	uint16_t version_major;
2341 	uint16_t version_minor;
2342 
2343 	if (!adev->gfx.rlc_fw)
2344 		return -EINVAL;
2345 
2346 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2347 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2348 
2349 	version_major = le16_to_cpu(hdr->header.header_version_major);
2350 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2351 
2352 	if (version_major == 2) {
2353 		gfx_v11_0_load_rlcg_microcode(adev);
2354 		if (amdgpu_dpm == 1) {
2355 			if (version_minor >= 2)
2356 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2357 			if (version_minor == 3)
2358 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2359 		}
2360 
2361 		return 0;
2362 	}
2363 
2364 	return -EINVAL;
2365 }
2366 
2367 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2368 {
2369 	int r;
2370 
2371 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2372 		gfx_v11_0_init_csb(adev);
2373 
2374 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2375 			gfx_v11_0_rlc_enable_srm(adev);
2376 	} else {
2377 		if (amdgpu_sriov_vf(adev)) {
2378 			gfx_v11_0_init_csb(adev);
2379 			return 0;
2380 		}
2381 
2382 		adev->gfx.rlc.funcs->stop(adev);
2383 
2384 		/* disable CG */
2385 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2386 
2387 		/* disable PG */
2388 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2389 
2390 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2391 			/* legacy rlc firmware loading */
2392 			r = gfx_v11_0_rlc_load_microcode(adev);
2393 			if (r)
2394 				return r;
2395 		}
2396 
2397 		gfx_v11_0_init_csb(adev);
2398 
2399 		adev->gfx.rlc.funcs->start(adev);
2400 	}
2401 	return 0;
2402 }
2403 
2404 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2405 {
2406 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2407 	uint32_t tmp;
2408 	int i;
2409 
2410 	/* Trigger an invalidation of the L1 instruction caches */
2411 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2412 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2413 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2414 
2415 	/* Wait for invalidation complete */
2416 	for (i = 0; i < usec_timeout; i++) {
2417 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2418 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2419 					INVALIDATE_CACHE_COMPLETE))
2420 			break;
2421 		udelay(1);
2422 	}
2423 
2424 	if (i >= usec_timeout) {
2425 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2426 		return -EINVAL;
2427 	}
2428 
2429 	if (amdgpu_emu_mode == 1)
2430 		adev->hdp.funcs->flush_hdp(adev, NULL);
2431 
2432 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2433 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2434 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2435 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2436 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2437 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2438 
2439 	/* Program me ucode address into intruction cache address register */
2440 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2441 			lower_32_bits(addr) & 0xFFFFF000);
2442 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2443 			upper_32_bits(addr));
2444 
2445 	return 0;
2446 }
2447 
2448 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2449 {
2450 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2451 	uint32_t tmp;
2452 	int i;
2453 
2454 	/* Trigger an invalidation of the L1 instruction caches */
2455 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2456 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2457 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2458 
2459 	/* Wait for invalidation complete */
2460 	for (i = 0; i < usec_timeout; i++) {
2461 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2462 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2463 					INVALIDATE_CACHE_COMPLETE))
2464 			break;
2465 		udelay(1);
2466 	}
2467 
2468 	if (i >= usec_timeout) {
2469 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2470 		return -EINVAL;
2471 	}
2472 
2473 	if (amdgpu_emu_mode == 1)
2474 		adev->hdp.funcs->flush_hdp(adev, NULL);
2475 
2476 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2477 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2478 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2479 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2480 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2481 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2482 
2483 	/* Program pfp ucode address into intruction cache address register */
2484 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2485 			lower_32_bits(addr) & 0xFFFFF000);
2486 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2487 			upper_32_bits(addr));
2488 
2489 	return 0;
2490 }
2491 
2492 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2493 {
2494 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2495 	uint32_t tmp;
2496 	int i;
2497 
2498 	/* Trigger an invalidation of the L1 instruction caches */
2499 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2500 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2501 
2502 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2503 
2504 	/* Wait for invalidation complete */
2505 	for (i = 0; i < usec_timeout; i++) {
2506 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2507 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2508 					INVALIDATE_CACHE_COMPLETE))
2509 			break;
2510 		udelay(1);
2511 	}
2512 
2513 	if (i >= usec_timeout) {
2514 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2515 		return -EINVAL;
2516 	}
2517 
2518 	if (amdgpu_emu_mode == 1)
2519 		adev->hdp.funcs->flush_hdp(adev, NULL);
2520 
2521 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2522 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2523 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2524 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2525 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2526 
2527 	/* Program mec1 ucode address into intruction cache address register */
2528 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2529 			lower_32_bits(addr) & 0xFFFFF000);
2530 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2531 			upper_32_bits(addr));
2532 
2533 	return 0;
2534 }
2535 
2536 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2537 {
2538 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2539 	uint32_t tmp;
2540 	unsigned i, pipe_id;
2541 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2542 
2543 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2544 		adev->gfx.pfp_fw->data;
2545 
2546 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2547 		lower_32_bits(addr));
2548 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2549 		upper_32_bits(addr));
2550 
2551 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2552 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2553 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2554 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2555 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2556 
2557 	/*
2558 	 * Programming any of the CP_PFP_IC_BASE registers
2559 	 * forces invalidation of the ME L1 I$. Wait for the
2560 	 * invalidation complete
2561 	 */
2562 	for (i = 0; i < usec_timeout; i++) {
2563 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2564 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2565 			INVALIDATE_CACHE_COMPLETE))
2566 			break;
2567 		udelay(1);
2568 	}
2569 
2570 	if (i >= usec_timeout) {
2571 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2572 		return -EINVAL;
2573 	}
2574 
2575 	/* Prime the L1 instruction caches */
2576 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2577 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2578 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2579 	/* Waiting for cache primed*/
2580 	for (i = 0; i < usec_timeout; i++) {
2581 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2582 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2583 			ICACHE_PRIMED))
2584 			break;
2585 		udelay(1);
2586 	}
2587 
2588 	if (i >= usec_timeout) {
2589 		dev_err(adev->dev, "failed to prime instruction cache\n");
2590 		return -EINVAL;
2591 	}
2592 
2593 	mutex_lock(&adev->srbm_mutex);
2594 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2595 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2596 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2597 			(pfp_hdr->ucode_start_addr_hi << 30) |
2598 			(pfp_hdr->ucode_start_addr_lo >> 2));
2599 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2600 			pfp_hdr->ucode_start_addr_hi >> 2);
2601 
2602 		/*
2603 		 * Program CP_ME_CNTL to reset given PIPE to take
2604 		 * effect of CP_PFP_PRGRM_CNTR_START.
2605 		 */
2606 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2607 		if (pipe_id == 0)
2608 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2609 					PFP_PIPE0_RESET, 1);
2610 		else
2611 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2612 					PFP_PIPE1_RESET, 1);
2613 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2614 
2615 		/* Clear pfp pipe0 reset bit. */
2616 		if (pipe_id == 0)
2617 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2618 					PFP_PIPE0_RESET, 0);
2619 		else
2620 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2621 					PFP_PIPE1_RESET, 0);
2622 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2623 
2624 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2625 			lower_32_bits(addr2));
2626 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2627 			upper_32_bits(addr2));
2628 	}
2629 	soc21_grbm_select(adev, 0, 0, 0, 0);
2630 	mutex_unlock(&adev->srbm_mutex);
2631 
2632 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2633 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2634 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2635 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2636 
2637 	/* Invalidate the data caches */
2638 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2639 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2640 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2641 
2642 	for (i = 0; i < usec_timeout; i++) {
2643 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2644 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2645 			INVALIDATE_DCACHE_COMPLETE))
2646 			break;
2647 		udelay(1);
2648 	}
2649 
2650 	if (i >= usec_timeout) {
2651 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2652 		return -EINVAL;
2653 	}
2654 
2655 	return 0;
2656 }
2657 
2658 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2659 {
2660 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2661 	uint32_t tmp;
2662 	unsigned i, pipe_id;
2663 	const struct gfx_firmware_header_v2_0 *me_hdr;
2664 
2665 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2666 		adev->gfx.me_fw->data;
2667 
2668 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2669 		lower_32_bits(addr));
2670 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2671 		upper_32_bits(addr));
2672 
2673 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2674 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2675 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2676 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2677 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2678 
2679 	/*
2680 	 * Programming any of the CP_ME_IC_BASE registers
2681 	 * forces invalidation of the ME L1 I$. Wait for the
2682 	 * invalidation complete
2683 	 */
2684 	for (i = 0; i < usec_timeout; i++) {
2685 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2686 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2687 			INVALIDATE_CACHE_COMPLETE))
2688 			break;
2689 		udelay(1);
2690 	}
2691 
2692 	if (i >= usec_timeout) {
2693 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2694 		return -EINVAL;
2695 	}
2696 
2697 	/* Prime the instruction caches */
2698 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2699 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2700 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2701 
2702 	/* Waiting for instruction cache primed*/
2703 	for (i = 0; i < usec_timeout; i++) {
2704 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2705 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2706 			ICACHE_PRIMED))
2707 			break;
2708 		udelay(1);
2709 	}
2710 
2711 	if (i >= usec_timeout) {
2712 		dev_err(adev->dev, "failed to prime instruction cache\n");
2713 		return -EINVAL;
2714 	}
2715 
2716 	mutex_lock(&adev->srbm_mutex);
2717 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2718 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2719 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2720 			(me_hdr->ucode_start_addr_hi << 30) |
2721 			(me_hdr->ucode_start_addr_lo >> 2) );
2722 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2723 			me_hdr->ucode_start_addr_hi>>2);
2724 
2725 		/*
2726 		 * Program CP_ME_CNTL to reset given PIPE to take
2727 		 * effect of CP_PFP_PRGRM_CNTR_START.
2728 		 */
2729 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2730 		if (pipe_id == 0)
2731 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2732 					ME_PIPE0_RESET, 1);
2733 		else
2734 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2735 					ME_PIPE1_RESET, 1);
2736 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2737 
2738 		/* Clear pfp pipe0 reset bit. */
2739 		if (pipe_id == 0)
2740 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2741 					ME_PIPE0_RESET, 0);
2742 		else
2743 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2744 					ME_PIPE1_RESET, 0);
2745 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2746 
2747 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2748 			lower_32_bits(addr2));
2749 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2750 			upper_32_bits(addr2));
2751 	}
2752 	soc21_grbm_select(adev, 0, 0, 0, 0);
2753 	mutex_unlock(&adev->srbm_mutex);
2754 
2755 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2756 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2757 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2758 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2759 
2760 	/* Invalidate the data caches */
2761 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2762 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2763 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2764 
2765 	for (i = 0; i < usec_timeout; i++) {
2766 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2767 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2768 			INVALIDATE_DCACHE_COMPLETE))
2769 			break;
2770 		udelay(1);
2771 	}
2772 
2773 	if (i >= usec_timeout) {
2774 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2775 		return -EINVAL;
2776 	}
2777 
2778 	return 0;
2779 }
2780 
2781 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2782 {
2783 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2784 	uint32_t tmp;
2785 	unsigned i;
2786 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2787 
2788 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2789 		adev->gfx.mec_fw->data;
2790 
2791 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2792 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2793 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2794 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2795 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2796 
2797 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2798 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2799 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2800 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2801 
2802 	mutex_lock(&adev->srbm_mutex);
2803 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2804 		soc21_grbm_select(adev, 1, i, 0, 0);
2805 
2806 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2807 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2808 		     upper_32_bits(addr2));
2809 
2810 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2811 					mec_hdr->ucode_start_addr_lo >> 2 |
2812 					mec_hdr->ucode_start_addr_hi << 30);
2813 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2814 					mec_hdr->ucode_start_addr_hi >> 2);
2815 
2816 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2817 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2818 		     upper_32_bits(addr));
2819 	}
2820 	mutex_unlock(&adev->srbm_mutex);
2821 	soc21_grbm_select(adev, 0, 0, 0, 0);
2822 
2823 	/* Trigger an invalidation of the L1 instruction caches */
2824 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2825 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2826 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2827 
2828 	/* Wait for invalidation complete */
2829 	for (i = 0; i < usec_timeout; i++) {
2830 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2831 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2832 				       INVALIDATE_DCACHE_COMPLETE))
2833 			break;
2834 		udelay(1);
2835 	}
2836 
2837 	if (i >= usec_timeout) {
2838 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2839 		return -EINVAL;
2840 	}
2841 
2842 	/* Trigger an invalidation of the L1 instruction caches */
2843 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2844 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2845 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2846 
2847 	/* Wait for invalidation complete */
2848 	for (i = 0; i < usec_timeout; i++) {
2849 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2850 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2851 				       INVALIDATE_CACHE_COMPLETE))
2852 			break;
2853 		udelay(1);
2854 	}
2855 
2856 	if (i >= usec_timeout) {
2857 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2858 		return -EINVAL;
2859 	}
2860 
2861 	return 0;
2862 }
2863 
2864 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2865 {
2866 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2867 	const struct gfx_firmware_header_v2_0 *me_hdr;
2868 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2869 	uint32_t pipe_id, tmp;
2870 
2871 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2872 		adev->gfx.mec_fw->data;
2873 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2874 		adev->gfx.me_fw->data;
2875 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2876 		adev->gfx.pfp_fw->data;
2877 
2878 	/* config pfp program start addr */
2879 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2880 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2881 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2882 			(pfp_hdr->ucode_start_addr_hi << 30) |
2883 			(pfp_hdr->ucode_start_addr_lo >> 2));
2884 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2885 			pfp_hdr->ucode_start_addr_hi >> 2);
2886 	}
2887 	soc21_grbm_select(adev, 0, 0, 0, 0);
2888 
2889 	/* reset pfp pipe */
2890 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2891 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2892 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2893 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2894 
2895 	/* clear pfp pipe reset */
2896 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2897 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2898 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2899 
2900 	/* config me program start addr */
2901 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2902 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2903 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2904 			(me_hdr->ucode_start_addr_hi << 30) |
2905 			(me_hdr->ucode_start_addr_lo >> 2) );
2906 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2907 			me_hdr->ucode_start_addr_hi>>2);
2908 	}
2909 	soc21_grbm_select(adev, 0, 0, 0, 0);
2910 
2911 	/* reset me pipe */
2912 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2913 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2914 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2915 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2916 
2917 	/* clear me pipe reset */
2918 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2919 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2920 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2921 
2922 	/* config mec program start addr */
2923 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2924 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2925 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2926 					mec_hdr->ucode_start_addr_lo >> 2 |
2927 					mec_hdr->ucode_start_addr_hi << 30);
2928 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2929 					mec_hdr->ucode_start_addr_hi >> 2);
2930 	}
2931 	soc21_grbm_select(adev, 0, 0, 0, 0);
2932 
2933 	/* reset mec pipe */
2934 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2935 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2936 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2937 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2938 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2939 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2940 
2941 	/* clear mec pipe reset */
2942 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2943 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2944 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2945 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2946 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2947 }
2948 
2949 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2950 {
2951 	uint32_t cp_status;
2952 	uint32_t bootload_status;
2953 	int i, r;
2954 	uint64_t addr, addr2;
2955 
2956 	for (i = 0; i < adev->usec_timeout; i++) {
2957 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2958 
2959 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2960 			    IP_VERSION(11, 0, 1) ||
2961 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
2962 			    IP_VERSION(11, 0, 4) ||
2963 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
2964 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
2965 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
2966 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
2967 			bootload_status = RREG32_SOC15(GC, 0,
2968 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2969 		else
2970 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2971 
2972 		if ((cp_status == 0) &&
2973 		    (REG_GET_FIELD(bootload_status,
2974 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2975 			break;
2976 		}
2977 		udelay(1);
2978 	}
2979 
2980 	if (i >= adev->usec_timeout) {
2981 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2982 		return -ETIMEDOUT;
2983 	}
2984 
2985 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2986 		if (adev->gfx.rs64_enable) {
2987 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2988 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2989 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2990 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2991 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2992 			if (r)
2993 				return r;
2994 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2995 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2996 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2997 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2998 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2999 			if (r)
3000 				return r;
3001 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3002 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
3003 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
3004 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
3005 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
3006 			if (r)
3007 				return r;
3008 		} else {
3009 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3010 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
3011 			r = gfx_v11_0_config_me_cache(adev, addr);
3012 			if (r)
3013 				return r;
3014 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3015 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
3016 			r = gfx_v11_0_config_pfp_cache(adev, addr);
3017 			if (r)
3018 				return r;
3019 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
3020 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
3021 			r = gfx_v11_0_config_mec_cache(adev, addr);
3022 			if (r)
3023 				return r;
3024 		}
3025 	}
3026 
3027 	return 0;
3028 }
3029 
3030 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3031 {
3032 	int i;
3033 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3034 
3035 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3036 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3037 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3038 
3039 	for (i = 0; i < adev->usec_timeout; i++) {
3040 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
3041 			break;
3042 		udelay(1);
3043 	}
3044 
3045 	if (i >= adev->usec_timeout)
3046 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
3047 
3048 	return 0;
3049 }
3050 
3051 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
3052 {
3053 	int r;
3054 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3055 	const __le32 *fw_data;
3056 	unsigned i, fw_size;
3057 
3058 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3059 		adev->gfx.pfp_fw->data;
3060 
3061 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3062 
3063 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3064 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3065 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
3066 
3067 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
3068 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3069 				      &adev->gfx.pfp.pfp_fw_obj,
3070 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3071 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3072 	if (r) {
3073 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
3074 		gfx_v11_0_pfp_fini(adev);
3075 		return r;
3076 	}
3077 
3078 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
3079 
3080 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3081 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3082 
3083 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
3084 
3085 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
3086 
3087 	for (i = 0; i < pfp_hdr->jt_size; i++)
3088 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
3089 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
3090 
3091 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3092 
3093 	return 0;
3094 }
3095 
3096 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
3097 {
3098 	int r;
3099 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
3100 	const __le32 *fw_ucode, *fw_data;
3101 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3102 	uint32_t tmp;
3103 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3104 
3105 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
3106 		adev->gfx.pfp_fw->data;
3107 
3108 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3109 
3110 	/* instruction */
3111 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
3112 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
3113 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
3114 	/* data */
3115 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3116 		le32_to_cpu(pfp_hdr->data_offset_bytes));
3117 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
3118 
3119 	/* 64kb align */
3120 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3121 				      64 * 1024,
3122 				      AMDGPU_GEM_DOMAIN_VRAM |
3123 				      AMDGPU_GEM_DOMAIN_GTT,
3124 				      &adev->gfx.pfp.pfp_fw_obj,
3125 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3126 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3127 	if (r) {
3128 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
3129 		gfx_v11_0_pfp_fini(adev);
3130 		return r;
3131 	}
3132 
3133 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3134 				      64 * 1024,
3135 				      AMDGPU_GEM_DOMAIN_VRAM |
3136 				      AMDGPU_GEM_DOMAIN_GTT,
3137 				      &adev->gfx.pfp.pfp_fw_data_obj,
3138 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
3139 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
3140 	if (r) {
3141 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
3142 		gfx_v11_0_pfp_fini(adev);
3143 		return r;
3144 	}
3145 
3146 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
3147 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
3148 
3149 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3150 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
3151 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3152 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
3153 
3154 	if (amdgpu_emu_mode == 1)
3155 		adev->hdp.funcs->flush_hdp(adev, NULL);
3156 
3157 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
3158 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3159 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
3160 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3161 
3162 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
3163 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
3164 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
3165 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
3166 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
3167 
3168 	/*
3169 	 * Programming any of the CP_PFP_IC_BASE registers
3170 	 * forces invalidation of the ME L1 I$. Wait for the
3171 	 * invalidation complete
3172 	 */
3173 	for (i = 0; i < usec_timeout; i++) {
3174 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3175 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3176 			INVALIDATE_CACHE_COMPLETE))
3177 			break;
3178 		udelay(1);
3179 	}
3180 
3181 	if (i >= usec_timeout) {
3182 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3183 		return -EINVAL;
3184 	}
3185 
3186 	/* Prime the L1 instruction caches */
3187 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3188 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
3189 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
3190 	/* Waiting for cache primed*/
3191 	for (i = 0; i < usec_timeout; i++) {
3192 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3193 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3194 			ICACHE_PRIMED))
3195 			break;
3196 		udelay(1);
3197 	}
3198 
3199 	if (i >= usec_timeout) {
3200 		dev_err(adev->dev, "failed to prime instruction cache\n");
3201 		return -EINVAL;
3202 	}
3203 
3204 	mutex_lock(&adev->srbm_mutex);
3205 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3206 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3207 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3208 			(pfp_hdr->ucode_start_addr_hi << 30) |
3209 			(pfp_hdr->ucode_start_addr_lo >> 2) );
3210 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3211 			pfp_hdr->ucode_start_addr_hi>>2);
3212 
3213 		/*
3214 		 * Program CP_ME_CNTL to reset given PIPE to take
3215 		 * effect of CP_PFP_PRGRM_CNTR_START.
3216 		 */
3217 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3218 		if (pipe_id == 0)
3219 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3220 					PFP_PIPE0_RESET, 1);
3221 		else
3222 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3223 					PFP_PIPE1_RESET, 1);
3224 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3225 
3226 		/* Clear pfp pipe0 reset bit. */
3227 		if (pipe_id == 0)
3228 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3229 					PFP_PIPE0_RESET, 0);
3230 		else
3231 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3232 					PFP_PIPE1_RESET, 0);
3233 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3234 
3235 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
3236 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3237 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
3238 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3239 	}
3240 	soc21_grbm_select(adev, 0, 0, 0, 0);
3241 	mutex_unlock(&adev->srbm_mutex);
3242 
3243 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3244 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3245 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3246 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3247 
3248 	/* Invalidate the data caches */
3249 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3250 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3251 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3252 
3253 	for (i = 0; i < usec_timeout; i++) {
3254 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3255 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3256 			INVALIDATE_DCACHE_COMPLETE))
3257 			break;
3258 		udelay(1);
3259 	}
3260 
3261 	if (i >= usec_timeout) {
3262 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3263 		return -EINVAL;
3264 	}
3265 
3266 	return 0;
3267 }
3268 
3269 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
3270 {
3271 	int r;
3272 	const struct gfx_firmware_header_v1_0 *me_hdr;
3273 	const __le32 *fw_data;
3274 	unsigned i, fw_size;
3275 
3276 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3277 		adev->gfx.me_fw->data;
3278 
3279 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3280 
3281 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3282 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3283 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
3284 
3285 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
3286 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3287 				      &adev->gfx.me.me_fw_obj,
3288 				      &adev->gfx.me.me_fw_gpu_addr,
3289 				      (void **)&adev->gfx.me.me_fw_ptr);
3290 	if (r) {
3291 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
3292 		gfx_v11_0_me_fini(adev);
3293 		return r;
3294 	}
3295 
3296 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
3297 
3298 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3299 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3300 
3301 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
3302 
3303 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
3304 
3305 	for (i = 0; i < me_hdr->jt_size; i++)
3306 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
3307 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
3308 
3309 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
3310 
3311 	return 0;
3312 }
3313 
3314 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
3315 {
3316 	int r;
3317 	const struct gfx_firmware_header_v2_0 *me_hdr;
3318 	const __le32 *fw_ucode, *fw_data;
3319 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3320 	uint32_t tmp;
3321 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3322 
3323 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
3324 		adev->gfx.me_fw->data;
3325 
3326 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3327 
3328 	/* instruction */
3329 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
3330 		le32_to_cpu(me_hdr->ucode_offset_bytes));
3331 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
3332 	/* data */
3333 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3334 		le32_to_cpu(me_hdr->data_offset_bytes));
3335 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
3336 
3337 	/* 64kb align*/
3338 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3339 				      64 * 1024,
3340 				      AMDGPU_GEM_DOMAIN_VRAM |
3341 				      AMDGPU_GEM_DOMAIN_GTT,
3342 				      &adev->gfx.me.me_fw_obj,
3343 				      &adev->gfx.me.me_fw_gpu_addr,
3344 				      (void **)&adev->gfx.me.me_fw_ptr);
3345 	if (r) {
3346 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3347 		gfx_v11_0_me_fini(adev);
3348 		return r;
3349 	}
3350 
3351 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3352 				      64 * 1024,
3353 				      AMDGPU_GEM_DOMAIN_VRAM |
3354 				      AMDGPU_GEM_DOMAIN_GTT,
3355 				      &adev->gfx.me.me_fw_data_obj,
3356 				      &adev->gfx.me.me_fw_data_gpu_addr,
3357 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3358 	if (r) {
3359 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3360 		gfx_v11_0_pfp_fini(adev);
3361 		return r;
3362 	}
3363 
3364 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3365 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3366 
3367 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3368 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3369 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3370 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3371 
3372 	if (amdgpu_emu_mode == 1)
3373 		adev->hdp.funcs->flush_hdp(adev, NULL);
3374 
3375 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3376 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3377 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3378 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3379 
3380 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3381 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3382 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3383 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3384 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3385 
3386 	/*
3387 	 * Programming any of the CP_ME_IC_BASE registers
3388 	 * forces invalidation of the ME L1 I$. Wait for the
3389 	 * invalidation complete
3390 	 */
3391 	for (i = 0; i < usec_timeout; i++) {
3392 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3393 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3394 			INVALIDATE_CACHE_COMPLETE))
3395 			break;
3396 		udelay(1);
3397 	}
3398 
3399 	if (i >= usec_timeout) {
3400 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3401 		return -EINVAL;
3402 	}
3403 
3404 	/* Prime the instruction caches */
3405 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3406 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3407 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3408 
3409 	/* Waiting for instruction cache primed*/
3410 	for (i = 0; i < usec_timeout; i++) {
3411 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3412 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3413 			ICACHE_PRIMED))
3414 			break;
3415 		udelay(1);
3416 	}
3417 
3418 	if (i >= usec_timeout) {
3419 		dev_err(adev->dev, "failed to prime instruction cache\n");
3420 		return -EINVAL;
3421 	}
3422 
3423 	mutex_lock(&adev->srbm_mutex);
3424 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3425 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3426 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3427 			(me_hdr->ucode_start_addr_hi << 30) |
3428 			(me_hdr->ucode_start_addr_lo >> 2) );
3429 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3430 			me_hdr->ucode_start_addr_hi>>2);
3431 
3432 		/*
3433 		 * Program CP_ME_CNTL to reset given PIPE to take
3434 		 * effect of CP_PFP_PRGRM_CNTR_START.
3435 		 */
3436 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3437 		if (pipe_id == 0)
3438 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3439 					ME_PIPE0_RESET, 1);
3440 		else
3441 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3442 					ME_PIPE1_RESET, 1);
3443 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3444 
3445 		/* Clear pfp pipe0 reset bit. */
3446 		if (pipe_id == 0)
3447 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3448 					ME_PIPE0_RESET, 0);
3449 		else
3450 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3451 					ME_PIPE1_RESET, 0);
3452 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3453 
3454 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3455 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3456 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3457 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3458 	}
3459 	soc21_grbm_select(adev, 0, 0, 0, 0);
3460 	mutex_unlock(&adev->srbm_mutex);
3461 
3462 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3463 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3464 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3465 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3466 
3467 	/* Invalidate the data caches */
3468 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3469 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3470 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3471 
3472 	for (i = 0; i < usec_timeout; i++) {
3473 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3474 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3475 			INVALIDATE_DCACHE_COMPLETE))
3476 			break;
3477 		udelay(1);
3478 	}
3479 
3480 	if (i >= usec_timeout) {
3481 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3482 		return -EINVAL;
3483 	}
3484 
3485 	return 0;
3486 }
3487 
3488 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3489 {
3490 	int r;
3491 
3492 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3493 		return -EINVAL;
3494 
3495 	gfx_v11_0_cp_gfx_enable(adev, false);
3496 
3497 	if (adev->gfx.rs64_enable)
3498 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3499 	else
3500 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3501 	if (r) {
3502 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3503 		return r;
3504 	}
3505 
3506 	if (adev->gfx.rs64_enable)
3507 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3508 	else
3509 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3510 	if (r) {
3511 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3512 		return r;
3513 	}
3514 
3515 	return 0;
3516 }
3517 
3518 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3519 {
3520 	struct amdgpu_ring *ring;
3521 	const struct cs_section_def *sect = NULL;
3522 	const struct cs_extent_def *ext = NULL;
3523 	int r, i;
3524 	int ctx_reg_offset;
3525 
3526 	/* init the CP */
3527 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3528 		     adev->gfx.config.max_hw_contexts - 1);
3529 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3530 
3531 	if (!amdgpu_async_gfx_ring)
3532 		gfx_v11_0_cp_gfx_enable(adev, true);
3533 
3534 	ring = &adev->gfx.gfx_ring[0];
3535 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3536 	if (r) {
3537 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3538 		return r;
3539 	}
3540 
3541 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3542 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3543 
3544 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3545 	amdgpu_ring_write(ring, 0x80000000);
3546 	amdgpu_ring_write(ring, 0x80000000);
3547 
3548 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3549 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3550 			if (sect->id == SECT_CONTEXT) {
3551 				amdgpu_ring_write(ring,
3552 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3553 							  ext->reg_count));
3554 				amdgpu_ring_write(ring, ext->reg_index -
3555 						  PACKET3_SET_CONTEXT_REG_START);
3556 				for (i = 0; i < ext->reg_count; i++)
3557 					amdgpu_ring_write(ring, ext->extent[i]);
3558 			}
3559 		}
3560 	}
3561 
3562 	ctx_reg_offset =
3563 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3564 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3565 	amdgpu_ring_write(ring, ctx_reg_offset);
3566 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3567 
3568 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3569 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3570 
3571 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3572 	amdgpu_ring_write(ring, 0);
3573 
3574 	amdgpu_ring_commit(ring);
3575 
3576 	/* submit cs packet to copy state 0 to next available state */
3577 	if (adev->gfx.num_gfx_rings > 1) {
3578 		/* maximum supported gfx ring is 2 */
3579 		ring = &adev->gfx.gfx_ring[1];
3580 		r = amdgpu_ring_alloc(ring, 2);
3581 		if (r) {
3582 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3583 			return r;
3584 		}
3585 
3586 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3587 		amdgpu_ring_write(ring, 0);
3588 
3589 		amdgpu_ring_commit(ring);
3590 	}
3591 	return 0;
3592 }
3593 
3594 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3595 					 CP_PIPE_ID pipe)
3596 {
3597 	u32 tmp;
3598 
3599 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3600 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3601 
3602 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3603 }
3604 
3605 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3606 					  struct amdgpu_ring *ring)
3607 {
3608 	u32 tmp;
3609 
3610 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3611 	if (ring->use_doorbell) {
3612 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3613 				    DOORBELL_OFFSET, ring->doorbell_index);
3614 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3615 				    DOORBELL_EN, 1);
3616 	} else {
3617 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3618 				    DOORBELL_EN, 0);
3619 	}
3620 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3621 
3622 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3623 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3624 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3625 
3626 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3627 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3628 }
3629 
3630 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3631 {
3632 	struct amdgpu_ring *ring;
3633 	u32 tmp;
3634 	u32 rb_bufsz;
3635 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3636 
3637 	/* Set the write pointer delay */
3638 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3639 
3640 	/* set the RB to use vmid 0 */
3641 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3642 
3643 	/* Init gfx ring 0 for pipe 0 */
3644 	mutex_lock(&adev->srbm_mutex);
3645 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3646 
3647 	/* Set ring buffer size */
3648 	ring = &adev->gfx.gfx_ring[0];
3649 	rb_bufsz = order_base_2(ring->ring_size / 8);
3650 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3651 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3652 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3653 
3654 	/* Initialize the ring buffer's write pointers */
3655 	ring->wptr = 0;
3656 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3657 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3658 
3659 	/* set the wb address whether it's enabled or not */
3660 	rptr_addr = ring->rptr_gpu_addr;
3661 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3662 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3663 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3664 
3665 	wptr_gpu_addr = ring->wptr_gpu_addr;
3666 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3667 		     lower_32_bits(wptr_gpu_addr));
3668 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3669 		     upper_32_bits(wptr_gpu_addr));
3670 
3671 	mdelay(1);
3672 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3673 
3674 	rb_addr = ring->gpu_addr >> 8;
3675 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3676 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3677 
3678 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3679 
3680 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3681 	mutex_unlock(&adev->srbm_mutex);
3682 
3683 	/* Init gfx ring 1 for pipe 1 */
3684 	if (adev->gfx.num_gfx_rings > 1) {
3685 		mutex_lock(&adev->srbm_mutex);
3686 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3687 		/* maximum supported gfx ring is 2 */
3688 		ring = &adev->gfx.gfx_ring[1];
3689 		rb_bufsz = order_base_2(ring->ring_size / 8);
3690 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3691 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3692 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3693 		/* Initialize the ring buffer's write pointers */
3694 		ring->wptr = 0;
3695 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3696 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3697 		/* Set the wb address whether it's enabled or not */
3698 		rptr_addr = ring->rptr_gpu_addr;
3699 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3700 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3701 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3702 		wptr_gpu_addr = ring->wptr_gpu_addr;
3703 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3704 			     lower_32_bits(wptr_gpu_addr));
3705 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3706 			     upper_32_bits(wptr_gpu_addr));
3707 
3708 		mdelay(1);
3709 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3710 
3711 		rb_addr = ring->gpu_addr >> 8;
3712 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3713 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3714 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3715 
3716 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3717 		mutex_unlock(&adev->srbm_mutex);
3718 	}
3719 	/* Switch to pipe 0 */
3720 	mutex_lock(&adev->srbm_mutex);
3721 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3722 	mutex_unlock(&adev->srbm_mutex);
3723 
3724 	/* start the ring */
3725 	gfx_v11_0_cp_gfx_start(adev);
3726 
3727 	return 0;
3728 }
3729 
3730 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3731 {
3732 	u32 data;
3733 
3734 	if (adev->gfx.rs64_enable) {
3735 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3736 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3737 							 enable ? 0 : 1);
3738 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3739 							 enable ? 0 : 1);
3740 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3741 							 enable ? 0 : 1);
3742 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3743 							 enable ? 0 : 1);
3744 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3745 							 enable ? 0 : 1);
3746 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3747 							 enable ? 1 : 0);
3748 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3749 				                         enable ? 1 : 0);
3750 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3751 							 enable ? 1 : 0);
3752 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3753 							 enable ? 1 : 0);
3754 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3755 							 enable ? 0 : 1);
3756 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3757 	} else {
3758 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3759 
3760 		if (enable) {
3761 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3762 			if (!adev->enable_mes_kiq)
3763 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3764 						     MEC_ME2_HALT, 0);
3765 		} else {
3766 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3767 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3768 		}
3769 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3770 	}
3771 
3772 	udelay(50);
3773 }
3774 
3775 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3776 {
3777 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3778 	const __le32 *fw_data;
3779 	unsigned i, fw_size;
3780 	u32 *fw = NULL;
3781 	int r;
3782 
3783 	if (!adev->gfx.mec_fw)
3784 		return -EINVAL;
3785 
3786 	gfx_v11_0_cp_compute_enable(adev, false);
3787 
3788 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3789 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3790 
3791 	fw_data = (const __le32 *)
3792 		(adev->gfx.mec_fw->data +
3793 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3794 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3795 
3796 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3797 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3798 					  &adev->gfx.mec.mec_fw_obj,
3799 					  &adev->gfx.mec.mec_fw_gpu_addr,
3800 					  (void **)&fw);
3801 	if (r) {
3802 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3803 		gfx_v11_0_mec_fini(adev);
3804 		return r;
3805 	}
3806 
3807 	memcpy(fw, fw_data, fw_size);
3808 
3809 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3810 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3811 
3812 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3813 
3814 	/* MEC1 */
3815 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3816 
3817 	for (i = 0; i < mec_hdr->jt_size; i++)
3818 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3819 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3820 
3821 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3822 
3823 	return 0;
3824 }
3825 
3826 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3827 {
3828 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3829 	const __le32 *fw_ucode, *fw_data;
3830 	u32 tmp, fw_ucode_size, fw_data_size;
3831 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3832 	u32 *fw_ucode_ptr, *fw_data_ptr;
3833 	int r;
3834 
3835 	if (!adev->gfx.mec_fw)
3836 		return -EINVAL;
3837 
3838 	gfx_v11_0_cp_compute_enable(adev, false);
3839 
3840 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3841 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3842 
3843 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3844 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3845 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3846 
3847 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3848 				le32_to_cpu(mec_hdr->data_offset_bytes));
3849 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3850 
3851 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3852 				      64 * 1024,
3853 				      AMDGPU_GEM_DOMAIN_VRAM |
3854 				      AMDGPU_GEM_DOMAIN_GTT,
3855 				      &adev->gfx.mec.mec_fw_obj,
3856 				      &adev->gfx.mec.mec_fw_gpu_addr,
3857 				      (void **)&fw_ucode_ptr);
3858 	if (r) {
3859 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3860 		gfx_v11_0_mec_fini(adev);
3861 		return r;
3862 	}
3863 
3864 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3865 				      64 * 1024,
3866 				      AMDGPU_GEM_DOMAIN_VRAM |
3867 				      AMDGPU_GEM_DOMAIN_GTT,
3868 				      &adev->gfx.mec.mec_fw_data_obj,
3869 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3870 				      (void **)&fw_data_ptr);
3871 	if (r) {
3872 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3873 		gfx_v11_0_mec_fini(adev);
3874 		return r;
3875 	}
3876 
3877 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3878 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3879 
3880 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3881 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3882 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3883 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3884 
3885 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3886 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3887 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3888 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3889 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3890 
3891 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3892 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3893 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3894 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3895 
3896 	mutex_lock(&adev->srbm_mutex);
3897 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3898 		soc21_grbm_select(adev, 1, i, 0, 0);
3899 
3900 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3901 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3902 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3903 
3904 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3905 					mec_hdr->ucode_start_addr_lo >> 2 |
3906 					mec_hdr->ucode_start_addr_hi << 30);
3907 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3908 					mec_hdr->ucode_start_addr_hi >> 2);
3909 
3910 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3911 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3912 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3913 	}
3914 	mutex_unlock(&adev->srbm_mutex);
3915 	soc21_grbm_select(adev, 0, 0, 0, 0);
3916 
3917 	/* Trigger an invalidation of the L1 instruction caches */
3918 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3919 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3920 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3921 
3922 	/* Wait for invalidation complete */
3923 	for (i = 0; i < usec_timeout; i++) {
3924 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3925 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3926 				       INVALIDATE_DCACHE_COMPLETE))
3927 			break;
3928 		udelay(1);
3929 	}
3930 
3931 	if (i >= usec_timeout) {
3932 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3933 		return -EINVAL;
3934 	}
3935 
3936 	/* Trigger an invalidation of the L1 instruction caches */
3937 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3938 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3939 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3940 
3941 	/* Wait for invalidation complete */
3942 	for (i = 0; i < usec_timeout; i++) {
3943 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3944 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3945 				       INVALIDATE_CACHE_COMPLETE))
3946 			break;
3947 		udelay(1);
3948 	}
3949 
3950 	if (i >= usec_timeout) {
3951 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3952 		return -EINVAL;
3953 	}
3954 
3955 	return 0;
3956 }
3957 
3958 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3959 {
3960 	uint32_t tmp;
3961 	struct amdgpu_device *adev = ring->adev;
3962 
3963 	/* tell RLC which is KIQ queue */
3964 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3965 	tmp &= 0xffffff00;
3966 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3967 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
3968 }
3969 
3970 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3971 {
3972 	/* set graphics engine doorbell range */
3973 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3974 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3975 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3976 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3977 
3978 	/* set compute engine doorbell range */
3979 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3980 		     (adev->doorbell_index.kiq * 2) << 2);
3981 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3982 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3983 }
3984 
3985 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
3986 					   struct v11_gfx_mqd *mqd,
3987 					   struct amdgpu_mqd_prop *prop)
3988 {
3989 	bool priority = 0;
3990 	u32 tmp;
3991 
3992 	/* set up default queue priority level
3993 	 * 0x0 = low priority, 0x1 = high priority
3994 	 */
3995 	if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
3996 		priority = 1;
3997 
3998 	tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT;
3999 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
4000 	mqd->cp_gfx_hqd_queue_priority = tmp;
4001 }
4002 
4003 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
4004 				  struct amdgpu_mqd_prop *prop)
4005 {
4006 	struct v11_gfx_mqd *mqd = m;
4007 	uint64_t hqd_gpu_addr, wb_gpu_addr;
4008 	uint32_t tmp;
4009 	uint32_t rb_bufsz;
4010 
4011 	/* set up gfx hqd wptr */
4012 	mqd->cp_gfx_hqd_wptr = 0;
4013 	mqd->cp_gfx_hqd_wptr_hi = 0;
4014 
4015 	/* set the pointer to the MQD */
4016 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
4017 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4018 
4019 	/* set up mqd control */
4020 	tmp = regCP_GFX_MQD_CONTROL_DEFAULT;
4021 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
4022 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
4023 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
4024 	mqd->cp_gfx_mqd_control = tmp;
4025 
4026 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
4027 	tmp = regCP_GFX_HQD_VMID_DEFAULT;
4028 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
4029 	mqd->cp_gfx_hqd_vmid = 0;
4030 
4031 	/* set up gfx queue priority */
4032 	gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
4033 
4034 	/* set up time quantum */
4035 	tmp = regCP_GFX_HQD_QUANTUM_DEFAULT;
4036 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
4037 	mqd->cp_gfx_hqd_quantum = tmp;
4038 
4039 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
4040 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4041 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
4042 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
4043 
4044 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
4045 	wb_gpu_addr = prop->rptr_gpu_addr;
4046 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
4047 	mqd->cp_gfx_hqd_rptr_addr_hi =
4048 		upper_32_bits(wb_gpu_addr) & 0xffff;
4049 
4050 	/* set up rb_wptr_poll addr */
4051 	wb_gpu_addr = prop->wptr_gpu_addr;
4052 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4053 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4054 
4055 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
4056 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
4057 	tmp = regCP_GFX_HQD_CNTL_DEFAULT;
4058 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
4059 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
4060 #ifdef __BIG_ENDIAN
4061 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
4062 #endif
4063 	mqd->cp_gfx_hqd_cntl = tmp;
4064 
4065 	/* set up cp_doorbell_control */
4066 	tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT;
4067 	if (prop->use_doorbell) {
4068 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4069 				    DOORBELL_OFFSET, prop->doorbell_index);
4070 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4071 				    DOORBELL_EN, 1);
4072 	} else
4073 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4074 				    DOORBELL_EN, 0);
4075 	mqd->cp_rb_doorbell_control = tmp;
4076 
4077 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4078 	mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT;
4079 
4080 	/* active the queue */
4081 	mqd->cp_gfx_hqd_active = 1;
4082 
4083 	return 0;
4084 }
4085 
4086 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
4087 {
4088 	struct amdgpu_device *adev = ring->adev;
4089 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
4090 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
4091 
4092 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4093 		memset((void *)mqd, 0, sizeof(*mqd));
4094 		mutex_lock(&adev->srbm_mutex);
4095 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4096 		amdgpu_ring_init_mqd(ring);
4097 		soc21_grbm_select(adev, 0, 0, 0, 0);
4098 		mutex_unlock(&adev->srbm_mutex);
4099 		if (adev->gfx.me.mqd_backup[mqd_idx])
4100 			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4101 	} else {
4102 		/* restore mqd with the backup copy */
4103 		if (adev->gfx.me.mqd_backup[mqd_idx])
4104 			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
4105 		/* reset the ring */
4106 		ring->wptr = 0;
4107 		*ring->wptr_cpu_addr = 0;
4108 		amdgpu_ring_clear_ring(ring);
4109 	}
4110 
4111 	return 0;
4112 }
4113 
4114 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
4115 {
4116 	int r, i;
4117 
4118 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4119 		r = gfx_v11_0_kgq_init_queue(&adev->gfx.gfx_ring[i], false);
4120 		if (r)
4121 			return r;
4122 	}
4123 
4124 	r = amdgpu_gfx_enable_kgq(adev, 0);
4125 	if (r)
4126 		return r;
4127 
4128 	return gfx_v11_0_cp_gfx_start(adev);
4129 }
4130 
4131 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
4132 				      struct amdgpu_mqd_prop *prop)
4133 {
4134 	struct v11_compute_mqd *mqd = m;
4135 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4136 	uint32_t tmp;
4137 
4138 	mqd->header = 0xC0310800;
4139 	mqd->compute_pipelinestat_enable = 0x00000001;
4140 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4141 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4142 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4143 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4144 	mqd->compute_misc_reserved = 0x00000007;
4145 
4146 	eop_base_addr = prop->eop_gpu_addr >> 8;
4147 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4148 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4149 
4150 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4151 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
4152 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4153 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
4154 
4155 	mqd->cp_hqd_eop_control = tmp;
4156 
4157 	/* enable doorbell? */
4158 	tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
4159 
4160 	if (prop->use_doorbell) {
4161 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4162 				    DOORBELL_OFFSET, prop->doorbell_index);
4163 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4164 				    DOORBELL_EN, 1);
4165 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4166 				    DOORBELL_SOURCE, 0);
4167 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4168 				    DOORBELL_HIT, 0);
4169 	} else {
4170 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4171 				    DOORBELL_EN, 0);
4172 	}
4173 
4174 	mqd->cp_hqd_pq_doorbell_control = tmp;
4175 
4176 	/* disable the queue if it's active */
4177 	mqd->cp_hqd_dequeue_request = 0;
4178 	mqd->cp_hqd_pq_rptr = 0;
4179 	mqd->cp_hqd_pq_wptr_lo = 0;
4180 	mqd->cp_hqd_pq_wptr_hi = 0;
4181 
4182 	/* set the pointer to the MQD */
4183 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
4184 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4185 
4186 	/* set MQD vmid to 0 */
4187 	tmp = regCP_MQD_CONTROL_DEFAULT;
4188 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4189 	mqd->cp_mqd_control = tmp;
4190 
4191 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4192 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4193 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4194 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4195 
4196 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4197 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
4198 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4199 			    (order_base_2(prop->queue_size / 4) - 1));
4200 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4201 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4202 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
4203 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
4204 			    prop->allow_tunneling);
4205 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4206 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4207 	mqd->cp_hqd_pq_control = tmp;
4208 
4209 	/* set the wb address whether it's enabled or not */
4210 	wb_gpu_addr = prop->rptr_gpu_addr;
4211 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4212 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4213 		upper_32_bits(wb_gpu_addr) & 0xffff;
4214 
4215 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4216 	wb_gpu_addr = prop->wptr_gpu_addr;
4217 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4218 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4219 
4220 	tmp = 0;
4221 	/* enable the doorbell if requested */
4222 	if (prop->use_doorbell) {
4223 		tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
4224 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4225 				DOORBELL_OFFSET, prop->doorbell_index);
4226 
4227 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4228 				    DOORBELL_EN, 1);
4229 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4230 				    DOORBELL_SOURCE, 0);
4231 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4232 				    DOORBELL_HIT, 0);
4233 	}
4234 
4235 	mqd->cp_hqd_pq_doorbell_control = tmp;
4236 
4237 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4238 	mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
4239 
4240 	/* set the vmid for the queue */
4241 	mqd->cp_hqd_vmid = 0;
4242 
4243 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
4244 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4245 	mqd->cp_hqd_persistent_state = tmp;
4246 
4247 	/* set MIN_IB_AVAIL_SIZE */
4248 	tmp = regCP_HQD_IB_CONTROL_DEFAULT;
4249 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4250 	mqd->cp_hqd_ib_control = tmp;
4251 
4252 	/* set static priority for a compute queue/ring */
4253 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4254 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4255 
4256 	mqd->cp_hqd_active = prop->hqd_active;
4257 
4258 	return 0;
4259 }
4260 
4261 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4262 {
4263 	struct amdgpu_device *adev = ring->adev;
4264 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4265 	int j;
4266 
4267 	/* inactivate the queue */
4268 	if (amdgpu_sriov_vf(adev))
4269 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4270 
4271 	/* disable wptr polling */
4272 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4273 
4274 	/* write the EOP addr */
4275 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4276 	       mqd->cp_hqd_eop_base_addr_lo);
4277 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4278 	       mqd->cp_hqd_eop_base_addr_hi);
4279 
4280 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4281 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4282 	       mqd->cp_hqd_eop_control);
4283 
4284 	/* enable doorbell? */
4285 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4286 	       mqd->cp_hqd_pq_doorbell_control);
4287 
4288 	/* disable the queue if it's active */
4289 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4290 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4291 		for (j = 0; j < adev->usec_timeout; j++) {
4292 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4293 				break;
4294 			udelay(1);
4295 		}
4296 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4297 		       mqd->cp_hqd_dequeue_request);
4298 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4299 		       mqd->cp_hqd_pq_rptr);
4300 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4301 		       mqd->cp_hqd_pq_wptr_lo);
4302 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4303 		       mqd->cp_hqd_pq_wptr_hi);
4304 	}
4305 
4306 	/* set the pointer to the MQD */
4307 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4308 	       mqd->cp_mqd_base_addr_lo);
4309 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4310 	       mqd->cp_mqd_base_addr_hi);
4311 
4312 	/* set MQD vmid to 0 */
4313 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4314 	       mqd->cp_mqd_control);
4315 
4316 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4317 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4318 	       mqd->cp_hqd_pq_base_lo);
4319 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4320 	       mqd->cp_hqd_pq_base_hi);
4321 
4322 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4323 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4324 	       mqd->cp_hqd_pq_control);
4325 
4326 	/* set the wb address whether it's enabled or not */
4327 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4328 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4329 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4330 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4331 
4332 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4333 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4334 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4335 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4336 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4337 
4338 	/* enable the doorbell if requested */
4339 	if (ring->use_doorbell) {
4340 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4341 			(adev->doorbell_index.kiq * 2) << 2);
4342 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4343 			(adev->doorbell_index.userqueue_end * 2) << 2);
4344 	}
4345 
4346 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4347 	       mqd->cp_hqd_pq_doorbell_control);
4348 
4349 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4350 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4351 	       mqd->cp_hqd_pq_wptr_lo);
4352 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4353 	       mqd->cp_hqd_pq_wptr_hi);
4354 
4355 	/* set the vmid for the queue */
4356 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4357 
4358 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4359 	       mqd->cp_hqd_persistent_state);
4360 
4361 	/* activate the queue */
4362 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4363 	       mqd->cp_hqd_active);
4364 
4365 	if (ring->use_doorbell)
4366 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4367 
4368 	return 0;
4369 }
4370 
4371 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4372 {
4373 	struct amdgpu_device *adev = ring->adev;
4374 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4375 
4376 	gfx_v11_0_kiq_setting(ring);
4377 
4378 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4379 		/* reset MQD to a clean status */
4380 		if (adev->gfx.kiq[0].mqd_backup)
4381 			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4382 
4383 		/* reset ring buffer */
4384 		ring->wptr = 0;
4385 		amdgpu_ring_clear_ring(ring);
4386 
4387 		mutex_lock(&adev->srbm_mutex);
4388 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4389 		gfx_v11_0_kiq_init_register(ring);
4390 		soc21_grbm_select(adev, 0, 0, 0, 0);
4391 		mutex_unlock(&adev->srbm_mutex);
4392 	} else {
4393 		memset((void *)mqd, 0, sizeof(*mqd));
4394 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4395 			amdgpu_ring_clear_ring(ring);
4396 		mutex_lock(&adev->srbm_mutex);
4397 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4398 		amdgpu_ring_init_mqd(ring);
4399 		gfx_v11_0_kiq_init_register(ring);
4400 		soc21_grbm_select(adev, 0, 0, 0, 0);
4401 		mutex_unlock(&adev->srbm_mutex);
4402 
4403 		if (adev->gfx.kiq[0].mqd_backup)
4404 			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4405 	}
4406 
4407 	return 0;
4408 }
4409 
4410 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
4411 {
4412 	struct amdgpu_device *adev = ring->adev;
4413 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4414 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4415 
4416 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4417 		memset((void *)mqd, 0, sizeof(*mqd));
4418 		mutex_lock(&adev->srbm_mutex);
4419 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4420 		amdgpu_ring_init_mqd(ring);
4421 		soc21_grbm_select(adev, 0, 0, 0, 0);
4422 		mutex_unlock(&adev->srbm_mutex);
4423 
4424 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4425 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4426 	} else {
4427 		/* restore MQD to a clean status */
4428 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4429 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4430 		/* reset ring buffer */
4431 		ring->wptr = 0;
4432 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4433 		amdgpu_ring_clear_ring(ring);
4434 	}
4435 
4436 	return 0;
4437 }
4438 
4439 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4440 {
4441 	gfx_v11_0_kiq_init_queue(&adev->gfx.kiq[0].ring);
4442 	return 0;
4443 }
4444 
4445 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4446 {
4447 	int i, r;
4448 
4449 	if (!amdgpu_async_gfx_ring)
4450 		gfx_v11_0_cp_compute_enable(adev, true);
4451 
4452 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4453 		r = gfx_v11_0_kcq_init_queue(&adev->gfx.compute_ring[i], false);
4454 		if (r)
4455 			return r;
4456 	}
4457 
4458 	return amdgpu_gfx_enable_kcq(adev, 0);
4459 }
4460 
4461 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4462 {
4463 	int r, i;
4464 	struct amdgpu_ring *ring;
4465 
4466 	if (!(adev->flags & AMD_IS_APU))
4467 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4468 
4469 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4470 		/* legacy firmware loading */
4471 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4472 		if (r)
4473 			return r;
4474 
4475 		if (adev->gfx.rs64_enable)
4476 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4477 		else
4478 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4479 		if (r)
4480 			return r;
4481 	}
4482 
4483 	gfx_v11_0_cp_set_doorbell_range(adev);
4484 
4485 	if (amdgpu_async_gfx_ring) {
4486 		gfx_v11_0_cp_compute_enable(adev, true);
4487 		gfx_v11_0_cp_gfx_enable(adev, true);
4488 	}
4489 
4490 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4491 		r = amdgpu_mes_kiq_hw_init(adev);
4492 	else
4493 		r = gfx_v11_0_kiq_resume(adev);
4494 	if (r)
4495 		return r;
4496 
4497 	r = gfx_v11_0_kcq_resume(adev);
4498 	if (r)
4499 		return r;
4500 
4501 	if (!amdgpu_async_gfx_ring) {
4502 		r = gfx_v11_0_cp_gfx_resume(adev);
4503 		if (r)
4504 			return r;
4505 	} else {
4506 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4507 		if (r)
4508 			return r;
4509 	}
4510 
4511 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4512 		ring = &adev->gfx.gfx_ring[i];
4513 		r = amdgpu_ring_test_helper(ring);
4514 		if (r)
4515 			return r;
4516 	}
4517 
4518 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4519 		ring = &adev->gfx.compute_ring[i];
4520 		r = amdgpu_ring_test_helper(ring);
4521 		if (r)
4522 			return r;
4523 	}
4524 
4525 	return 0;
4526 }
4527 
4528 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4529 {
4530 	gfx_v11_0_cp_gfx_enable(adev, enable);
4531 	gfx_v11_0_cp_compute_enable(adev, enable);
4532 }
4533 
4534 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4535 {
4536 	int r;
4537 	bool value;
4538 
4539 	r = adev->gfxhub.funcs->gart_enable(adev);
4540 	if (r)
4541 		return r;
4542 
4543 	adev->hdp.funcs->flush_hdp(adev, NULL);
4544 
4545 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4546 		false : true;
4547 
4548 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4549 	/* TODO investigate why this and the hdp flush above is needed,
4550 	 * are we missing a flush somewhere else? */
4551 	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4552 
4553 	return 0;
4554 }
4555 
4556 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4557 {
4558 	u32 tmp;
4559 
4560 	/* select RS64 */
4561 	if (adev->gfx.rs64_enable) {
4562 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4563 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4564 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4565 
4566 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4567 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4568 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4569 	}
4570 
4571 	if (amdgpu_emu_mode == 1)
4572 		msleep(100);
4573 }
4574 
4575 static int get_gb_addr_config(struct amdgpu_device * adev)
4576 {
4577 	u32 gb_addr_config;
4578 
4579 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4580 	if (gb_addr_config == 0)
4581 		return -EINVAL;
4582 
4583 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4584 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4585 
4586 	adev->gfx.config.gb_addr_config = gb_addr_config;
4587 
4588 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4589 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4590 				      GB_ADDR_CONFIG, NUM_PIPES);
4591 
4592 	adev->gfx.config.max_tile_pipes =
4593 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4594 
4595 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4596 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4597 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4598 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4599 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4600 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4601 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4602 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4603 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4604 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4605 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4606 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4607 
4608 	return 0;
4609 }
4610 
4611 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4612 {
4613 	uint32_t data;
4614 
4615 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4616 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4617 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4618 
4619 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4620 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4621 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4622 }
4623 
4624 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
4625 {
4626 	int r;
4627 	struct amdgpu_device *adev = ip_block->adev;
4628 
4629 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4630 				       adev->gfx.cleaner_shader_ptr);
4631 
4632 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4633 		if (adev->gfx.imu.funcs) {
4634 			/* RLC autoload sequence 1: Program rlc ram */
4635 			if (adev->gfx.imu.funcs->program_rlc_ram)
4636 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4637 			/* rlc autoload firmware */
4638 			r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4639 			if (r)
4640 				return r;
4641 		}
4642 	} else {
4643 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4644 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4645 				if (adev->gfx.imu.funcs->load_microcode)
4646 					adev->gfx.imu.funcs->load_microcode(adev);
4647 				if (adev->gfx.imu.funcs->setup_imu)
4648 					adev->gfx.imu.funcs->setup_imu(adev);
4649 				if (adev->gfx.imu.funcs->start_imu)
4650 					adev->gfx.imu.funcs->start_imu(adev);
4651 			}
4652 
4653 			/* disable gpa mode in backdoor loading */
4654 			gfx_v11_0_disable_gpa_mode(adev);
4655 		}
4656 	}
4657 
4658 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4659 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4660 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4661 		if (r) {
4662 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4663 			return r;
4664 		}
4665 	}
4666 
4667 	adev->gfx.is_poweron = true;
4668 
4669 	if(get_gb_addr_config(adev))
4670 		DRM_WARN("Invalid gb_addr_config !\n");
4671 
4672 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4673 	    adev->gfx.rs64_enable)
4674 		gfx_v11_0_config_gfx_rs64(adev);
4675 
4676 	r = gfx_v11_0_gfxhub_enable(adev);
4677 	if (r)
4678 		return r;
4679 
4680 	if (!amdgpu_emu_mode)
4681 		gfx_v11_0_init_golden_registers(adev);
4682 
4683 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4684 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4685 		/**
4686 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4687 		 * loaded firstly, so in direct type, it has to load smc ucode
4688 		 * here before rlc.
4689 		 */
4690 		r = amdgpu_pm_load_smu_firmware(adev, NULL);
4691 		if (r)
4692 			return r;
4693 	}
4694 
4695 	gfx_v11_0_constants_init(adev);
4696 
4697 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4698 		gfx_v11_0_select_cp_fw_arch(adev);
4699 
4700 	if (adev->nbio.funcs->gc_doorbell_init)
4701 		adev->nbio.funcs->gc_doorbell_init(adev);
4702 
4703 	r = gfx_v11_0_rlc_resume(adev);
4704 	if (r)
4705 		return r;
4706 
4707 	/*
4708 	 * init golden registers and rlc resume may override some registers,
4709 	 * reconfig them here
4710 	 */
4711 	gfx_v11_0_tcp_harvest(adev);
4712 
4713 	r = gfx_v11_0_cp_resume(adev);
4714 	if (r)
4715 		return r;
4716 
4717 	/* get IMU version from HW if it's not set */
4718 	if (!adev->gfx.imu_fw_version)
4719 		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4720 
4721 	return r;
4722 }
4723 
4724 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
4725 {
4726 	struct amdgpu_device *adev = ip_block->adev;
4727 
4728 	cancel_delayed_work_sync(&adev->gfx.idle_work);
4729 
4730 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4731 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4732 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4733 
4734 	if (!adev->no_hw_access) {
4735 		if (amdgpu_async_gfx_ring) {
4736 			if (amdgpu_gfx_disable_kgq(adev, 0))
4737 				DRM_ERROR("KGQ disable failed\n");
4738 		}
4739 
4740 		if (amdgpu_gfx_disable_kcq(adev, 0))
4741 			DRM_ERROR("KCQ disable failed\n");
4742 
4743 		amdgpu_mes_kiq_hw_fini(adev);
4744 	}
4745 
4746 	if (amdgpu_sriov_vf(adev))
4747 		/* Remove the steps disabling CPG and clearing KIQ position,
4748 		 * so that CP could perform IDLE-SAVE during switch. Those
4749 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
4750 		 * not reproduced on gfx11.
4751 		 */
4752 		return 0;
4753 
4754 	gfx_v11_0_cp_enable(adev, false);
4755 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4756 
4757 	adev->gfxhub.funcs->gart_disable(adev);
4758 
4759 	adev->gfx.is_poweron = false;
4760 
4761 	return 0;
4762 }
4763 
4764 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
4765 {
4766 	return gfx_v11_0_hw_fini(ip_block);
4767 }
4768 
4769 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
4770 {
4771 	return gfx_v11_0_hw_init(ip_block);
4772 }
4773 
4774 static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block)
4775 {
4776 	struct amdgpu_device *adev = ip_block->adev;
4777 
4778 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4779 				GRBM_STATUS, GUI_ACTIVE))
4780 		return false;
4781 	else
4782 		return true;
4783 }
4784 
4785 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4786 {
4787 	unsigned i;
4788 	u32 tmp;
4789 	struct amdgpu_device *adev = ip_block->adev;
4790 
4791 	for (i = 0; i < adev->usec_timeout; i++) {
4792 		/* read MC_STATUS */
4793 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4794 			GRBM_STATUS__GUI_ACTIVE_MASK;
4795 
4796 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4797 			return 0;
4798 		udelay(1);
4799 	}
4800 	return -ETIMEDOUT;
4801 }
4802 
4803 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
4804 				      bool req)
4805 {
4806 	u32 i, tmp, val;
4807 
4808 	for (i = 0; i < adev->usec_timeout; i++) {
4809 		/* Request with MeId=2, PipeId=0 */
4810 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
4811 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
4812 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
4813 
4814 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
4815 		if (req) {
4816 			if (val == tmp)
4817 				break;
4818 		} else {
4819 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
4820 					    REQUEST, 1);
4821 
4822 			/* unlocked or locked by firmware */
4823 			if (val != tmp)
4824 				break;
4825 		}
4826 		udelay(1);
4827 	}
4828 
4829 	if (i >= adev->usec_timeout)
4830 		return -EINVAL;
4831 
4832 	return 0;
4833 }
4834 
4835 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
4836 {
4837 	u32 grbm_soft_reset = 0;
4838 	u32 tmp;
4839 	int r, i, j, k;
4840 	struct amdgpu_device *adev = ip_block->adev;
4841 
4842 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4843 
4844 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4845 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4846 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4847 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4848 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4849 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4850 
4851 	mutex_lock(&adev->srbm_mutex);
4852 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4853 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4854 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4855 				soc21_grbm_select(adev, i, k, j, 0);
4856 
4857 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4858 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4859 			}
4860 		}
4861 	}
4862 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4863 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4864 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4865 				soc21_grbm_select(adev, i, k, j, 0);
4866 
4867 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4868 			}
4869 		}
4870 	}
4871 	soc21_grbm_select(adev, 0, 0, 0, 0);
4872 	mutex_unlock(&adev->srbm_mutex);
4873 
4874 	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
4875 	mutex_lock(&adev->gfx.reset_sem_mutex);
4876 	r = gfx_v11_0_request_gfx_index_mutex(adev, true);
4877 	if (r) {
4878 		mutex_unlock(&adev->gfx.reset_sem_mutex);
4879 		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
4880 		return r;
4881 	}
4882 
4883 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4884 
4885 	// Read CP_VMID_RESET register three times.
4886 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4887 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4888 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4889 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4890 
4891 	/* release the gfx mutex */
4892 	r = gfx_v11_0_request_gfx_index_mutex(adev, false);
4893 	mutex_unlock(&adev->gfx.reset_sem_mutex);
4894 	if (r) {
4895 		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
4896 		return r;
4897 	}
4898 
4899 	for (i = 0; i < adev->usec_timeout; i++) {
4900 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4901 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4902 			break;
4903 		udelay(1);
4904 	}
4905 	if (i >= adev->usec_timeout) {
4906 		printk("Failed to wait all pipes clean\n");
4907 		return -EINVAL;
4908 	}
4909 
4910 	/**********  trigger soft reset  ***********/
4911 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4912 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4913 					SOFT_RESET_CP, 1);
4914 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4915 					SOFT_RESET_GFX, 1);
4916 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4917 					SOFT_RESET_CPF, 1);
4918 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4919 					SOFT_RESET_CPC, 1);
4920 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4921 					SOFT_RESET_CPG, 1);
4922 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4923 	/**********  exit soft reset  ***********/
4924 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4925 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4926 					SOFT_RESET_CP, 0);
4927 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4928 					SOFT_RESET_GFX, 0);
4929 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4930 					SOFT_RESET_CPF, 0);
4931 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4932 					SOFT_RESET_CPC, 0);
4933 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4934 					SOFT_RESET_CPG, 0);
4935 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4936 
4937 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4938 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4939 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4940 
4941 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4942 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4943 
4944 	for (i = 0; i < adev->usec_timeout; i++) {
4945 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4946 			break;
4947 		udelay(1);
4948 	}
4949 	if (i >= adev->usec_timeout) {
4950 		printk("Failed to wait CP_VMID_RESET to 0\n");
4951 		return -EINVAL;
4952 	}
4953 
4954 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4955 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4956 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4957 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4958 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4959 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4960 
4961 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4962 
4963 	return gfx_v11_0_cp_resume(adev);
4964 }
4965 
4966 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4967 {
4968 	int i, r;
4969 	struct amdgpu_device *adev = ip_block->adev;
4970 	struct amdgpu_ring *ring;
4971 	long tmo = msecs_to_jiffies(1000);
4972 
4973 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4974 		ring = &adev->gfx.gfx_ring[i];
4975 		r = amdgpu_ring_test_ib(ring, tmo);
4976 		if (r)
4977 			return true;
4978 	}
4979 
4980 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4981 		ring = &adev->gfx.compute_ring[i];
4982 		r = amdgpu_ring_test_ib(ring, tmo);
4983 		if (r)
4984 			return true;
4985 	}
4986 
4987 	return false;
4988 }
4989 
4990 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
4991 {
4992 	struct amdgpu_device *adev = ip_block->adev;
4993 	/**
4994 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
4995 	 */
4996 	return amdgpu_mes_resume(adev);
4997 }
4998 
4999 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5000 {
5001 	uint64_t clock;
5002 	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
5003 
5004 	if (amdgpu_sriov_vf(adev)) {
5005 		amdgpu_gfx_off_ctrl(adev, false);
5006 		mutex_lock(&adev->gfx.gpu_clock_mutex);
5007 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5008 		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5009 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5010 		if (clock_counter_hi_pre != clock_counter_hi_after)
5011 			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5012 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
5013 		amdgpu_gfx_off_ctrl(adev, true);
5014 	} else {
5015 		preempt_disable();
5016 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5017 		clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5018 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5019 		if (clock_counter_hi_pre != clock_counter_hi_after)
5020 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5021 		preempt_enable();
5022 	}
5023 	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
5024 
5025 	return clock;
5026 }
5027 
5028 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5029 					   uint32_t vmid,
5030 					   uint32_t gds_base, uint32_t gds_size,
5031 					   uint32_t gws_base, uint32_t gws_size,
5032 					   uint32_t oa_base, uint32_t oa_size)
5033 {
5034 	struct amdgpu_device *adev = ring->adev;
5035 
5036 	/* GDS Base */
5037 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5038 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
5039 				    gds_base);
5040 
5041 	/* GDS Size */
5042 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5043 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
5044 				    gds_size);
5045 
5046 	/* GWS */
5047 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5048 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
5049 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5050 
5051 	/* OA */
5052 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5053 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
5054 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
5055 }
5056 
5057 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
5058 {
5059 	struct amdgpu_device *adev = ip_block->adev;
5060 
5061 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
5062 
5063 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
5064 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5065 					  AMDGPU_MAX_COMPUTE_RINGS);
5066 
5067 	gfx_v11_0_set_kiq_pm4_funcs(adev);
5068 	gfx_v11_0_set_ring_funcs(adev);
5069 	gfx_v11_0_set_irq_funcs(adev);
5070 	gfx_v11_0_set_gds_init(adev);
5071 	gfx_v11_0_set_rlc_funcs(adev);
5072 	gfx_v11_0_set_mqd_funcs(adev);
5073 	gfx_v11_0_set_imu_funcs(adev);
5074 
5075 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
5076 
5077 	return gfx_v11_0_init_microcode(adev);
5078 }
5079 
5080 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
5081 {
5082 	struct amdgpu_device *adev = ip_block->adev;
5083 	int r;
5084 
5085 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5086 	if (r)
5087 		return r;
5088 
5089 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5090 	if (r)
5091 		return r;
5092 
5093 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
5094 	if (r)
5095 		return r;
5096 	return 0;
5097 }
5098 
5099 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
5100 {
5101 	uint32_t rlc_cntl;
5102 
5103 	/* if RLC is not enabled, do nothing */
5104 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
5105 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
5106 }
5107 
5108 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5109 {
5110 	uint32_t data;
5111 	unsigned i;
5112 
5113 	data = RLC_SAFE_MODE__CMD_MASK;
5114 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5115 
5116 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
5117 
5118 	/* wait for RLC_SAFE_MODE */
5119 	for (i = 0; i < adev->usec_timeout; i++) {
5120 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
5121 				   RLC_SAFE_MODE, CMD))
5122 			break;
5123 		udelay(1);
5124 	}
5125 }
5126 
5127 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5128 {
5129 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
5130 }
5131 
5132 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
5133 				      bool enable)
5134 {
5135 	uint32_t def, data;
5136 
5137 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
5138 		return;
5139 
5140 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5141 
5142 	if (enable)
5143 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5144 	else
5145 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5146 
5147 	if (def != data)
5148 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5149 }
5150 
5151 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
5152 				       bool enable)
5153 {
5154 	uint32_t def, data;
5155 
5156 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
5157 		return;
5158 
5159 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5160 
5161 	if (enable)
5162 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5163 	else
5164 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5165 
5166 	if (def != data)
5167 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5168 }
5169 
5170 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
5171 					   bool enable)
5172 {
5173 	uint32_t def, data;
5174 
5175 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
5176 		return;
5177 
5178 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5179 
5180 	if (enable)
5181 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5182 	else
5183 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5184 
5185 	if (def != data)
5186 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5187 }
5188 
5189 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5190 						       bool enable)
5191 {
5192 	uint32_t data, def;
5193 
5194 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
5195 		return;
5196 
5197 	/* It is disabled by HW by default */
5198 	if (enable) {
5199 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5200 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
5201 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5202 
5203 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5204 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5205 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5206 
5207 			if (def != data)
5208 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5209 		}
5210 	} else {
5211 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5212 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5213 
5214 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5215 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5216 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5217 
5218 			if (def != data)
5219 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5220 		}
5221 	}
5222 }
5223 
5224 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5225 						       bool enable)
5226 {
5227 	uint32_t def, data;
5228 
5229 	if (!(adev->cg_flags &
5230 	      (AMD_CG_SUPPORT_GFX_CGCG |
5231 	      AMD_CG_SUPPORT_GFX_CGLS |
5232 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
5233 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
5234 		return;
5235 
5236 	if (enable) {
5237 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5238 
5239 		/* unset CGCG override */
5240 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5241 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5242 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5243 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5244 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
5245 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5246 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5247 
5248 		/* update CGCG override bits */
5249 		if (def != data)
5250 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5251 
5252 		/* enable cgcg FSM(0x0000363F) */
5253 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5254 
5255 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5256 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
5257 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5258 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5259 		}
5260 
5261 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5262 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
5263 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5264 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5265 		}
5266 
5267 		if (def != data)
5268 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5269 
5270 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5271 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5272 
5273 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5274 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5275 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5276 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5277 		}
5278 
5279 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5280 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5281 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5282 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5283 		}
5284 
5285 		if (def != data)
5286 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5287 
5288 		/* set IDLE_POLL_COUNT(0x00900100) */
5289 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5290 
5291 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5292 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5293 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5294 
5295 		if (def != data)
5296 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5297 
5298 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5299 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5300 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5301 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5302 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5303 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5304 
5305 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5306 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5307 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5308 
5309 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5310 		if (adev->sdma.num_instances > 1) {
5311 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5312 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5313 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5314 		}
5315 	} else {
5316 		/* Program RLC_CGCG_CGLS_CTRL */
5317 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5318 
5319 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5320 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5321 
5322 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5323 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5324 
5325 		if (def != data)
5326 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5327 
5328 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5329 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5330 
5331 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5332 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5333 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5334 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5335 
5336 		if (def != data)
5337 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5338 
5339 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5340 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5341 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5342 
5343 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5344 		if (adev->sdma.num_instances > 1) {
5345 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5346 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5347 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5348 		}
5349 	}
5350 }
5351 
5352 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5353 					    bool enable)
5354 {
5355 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5356 
5357 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5358 
5359 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5360 
5361 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5362 
5363 	gfx_v11_0_update_sram_fgcg(adev, enable);
5364 
5365 	gfx_v11_0_update_perf_clk(adev, enable);
5366 
5367 	if (adev->cg_flags &
5368 	    (AMD_CG_SUPPORT_GFX_MGCG |
5369 	     AMD_CG_SUPPORT_GFX_CGLS |
5370 	     AMD_CG_SUPPORT_GFX_CGCG |
5371 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5372 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5373 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5374 
5375 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5376 
5377 	return 0;
5378 }
5379 
5380 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5381 {
5382 	u32 reg, pre_data, data;
5383 
5384 	amdgpu_gfx_off_ctrl(adev, false);
5385 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5386 	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
5387 		pre_data = RREG32_NO_KIQ(reg);
5388 	else
5389 		pre_data = RREG32(reg);
5390 
5391 	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
5392 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5393 
5394 	if (pre_data != data) {
5395 		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
5396 			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5397 		} else
5398 			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5399 	}
5400 	amdgpu_gfx_off_ctrl(adev, true);
5401 
5402 	if (ring
5403 		&& amdgpu_sriov_is_pp_one_vf(adev)
5404 		&& (pre_data != data)
5405 		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
5406 			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
5407 		amdgpu_ring_emit_wreg(ring, reg, data);
5408 	}
5409 }
5410 
5411 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5412 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5413 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5414 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5415 	.init = gfx_v11_0_rlc_init,
5416 	.get_csb_size = gfx_v11_0_get_csb_size,
5417 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5418 	.resume = gfx_v11_0_rlc_resume,
5419 	.stop = gfx_v11_0_rlc_stop,
5420 	.reset = gfx_v11_0_rlc_reset,
5421 	.start = gfx_v11_0_rlc_start,
5422 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5423 };
5424 
5425 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5426 {
5427 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5428 
5429 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5430 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5431 	else
5432 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5433 
5434 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5435 
5436 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5437 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5438 		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5439 		case IP_VERSION(11, 0, 1):
5440 		case IP_VERSION(11, 0, 4):
5441 		case IP_VERSION(11, 5, 0):
5442 		case IP_VERSION(11, 5, 1):
5443 		case IP_VERSION(11, 5, 2):
5444 		case IP_VERSION(11, 5, 3):
5445 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5446 			break;
5447 		default:
5448 			break;
5449 		}
5450 	}
5451 }
5452 
5453 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5454 {
5455 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5456 
5457 	gfx_v11_cntl_power_gating(adev, enable);
5458 
5459 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5460 }
5461 
5462 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5463 					   enum amd_powergating_state state)
5464 {
5465 	struct amdgpu_device *adev = ip_block->adev;
5466 	bool enable = (state == AMD_PG_STATE_GATE);
5467 
5468 	if (amdgpu_sriov_vf(adev))
5469 		return 0;
5470 
5471 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5472 	case IP_VERSION(11, 0, 0):
5473 	case IP_VERSION(11, 0, 2):
5474 	case IP_VERSION(11, 0, 3):
5475 		amdgpu_gfx_off_ctrl(adev, enable);
5476 		break;
5477 	case IP_VERSION(11, 0, 1):
5478 	case IP_VERSION(11, 0, 4):
5479 	case IP_VERSION(11, 5, 0):
5480 	case IP_VERSION(11, 5, 1):
5481 	case IP_VERSION(11, 5, 2):
5482 	case IP_VERSION(11, 5, 3):
5483 		if (!enable)
5484 			amdgpu_gfx_off_ctrl(adev, false);
5485 
5486 		gfx_v11_cntl_pg(adev, enable);
5487 
5488 		if (enable)
5489 			amdgpu_gfx_off_ctrl(adev, true);
5490 
5491 		break;
5492 	default:
5493 		break;
5494 	}
5495 
5496 	return 0;
5497 }
5498 
5499 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5500 					  enum amd_clockgating_state state)
5501 {
5502 	struct amdgpu_device *adev = ip_block->adev;
5503 
5504 	if (amdgpu_sriov_vf(adev))
5505 	        return 0;
5506 
5507 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5508 	case IP_VERSION(11, 0, 0):
5509 	case IP_VERSION(11, 0, 1):
5510 	case IP_VERSION(11, 0, 2):
5511 	case IP_VERSION(11, 0, 3):
5512 	case IP_VERSION(11, 0, 4):
5513 	case IP_VERSION(11, 5, 0):
5514 	case IP_VERSION(11, 5, 1):
5515 	case IP_VERSION(11, 5, 2):
5516 	case IP_VERSION(11, 5, 3):
5517 	        gfx_v11_0_update_gfx_clock_gating(adev,
5518 	                        state ==  AMD_CG_STATE_GATE);
5519 	        break;
5520 	default:
5521 	        break;
5522 	}
5523 
5524 	return 0;
5525 }
5526 
5527 static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
5528 {
5529 	struct amdgpu_device *adev = ip_block->adev;
5530 	int data;
5531 
5532 	/* AMD_CG_SUPPORT_GFX_MGCG */
5533 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5534 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5535 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5536 
5537 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5538 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5539 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5540 
5541 	/* AMD_CG_SUPPORT_GFX_FGCG */
5542 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5543 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5544 
5545 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5546 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5547 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5548 
5549 	/* AMD_CG_SUPPORT_GFX_CGCG */
5550 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5551 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5552 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5553 
5554 	/* AMD_CG_SUPPORT_GFX_CGLS */
5555 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5556 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5557 
5558 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5559 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5560 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5561 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5562 
5563 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5564 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5565 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5566 }
5567 
5568 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5569 {
5570 	/* gfx11 is 32bit rptr*/
5571 	return *(uint32_t *)ring->rptr_cpu_addr;
5572 }
5573 
5574 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5575 {
5576 	struct amdgpu_device *adev = ring->adev;
5577 	u64 wptr;
5578 
5579 	/* XXX check if swapping is necessary on BE */
5580 	if (ring->use_doorbell) {
5581 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5582 	} else {
5583 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5584 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5585 	}
5586 
5587 	return wptr;
5588 }
5589 
5590 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5591 {
5592 	struct amdgpu_device *adev = ring->adev;
5593 
5594 	if (ring->use_doorbell) {
5595 		/* XXX check if swapping is necessary on BE */
5596 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5597 			     ring->wptr);
5598 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5599 	} else {
5600 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5601 			     lower_32_bits(ring->wptr));
5602 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5603 			     upper_32_bits(ring->wptr));
5604 	}
5605 }
5606 
5607 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5608 {
5609 	/* gfx11 hardware is 32bit rptr */
5610 	return *(uint32_t *)ring->rptr_cpu_addr;
5611 }
5612 
5613 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5614 {
5615 	u64 wptr;
5616 
5617 	/* XXX check if swapping is necessary on BE */
5618 	if (ring->use_doorbell)
5619 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5620 	else
5621 		BUG();
5622 	return wptr;
5623 }
5624 
5625 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5626 {
5627 	struct amdgpu_device *adev = ring->adev;
5628 
5629 	/* XXX check if swapping is necessary on BE */
5630 	if (ring->use_doorbell) {
5631 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5632 			     ring->wptr);
5633 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5634 	} else {
5635 		BUG(); /* only DOORBELL method supported on gfx11 now */
5636 	}
5637 }
5638 
5639 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5640 {
5641 	struct amdgpu_device *adev = ring->adev;
5642 	u32 ref_and_mask, reg_mem_engine;
5643 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5644 
5645 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5646 		switch (ring->me) {
5647 		case 1:
5648 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5649 			break;
5650 		case 2:
5651 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5652 			break;
5653 		default:
5654 			return;
5655 		}
5656 		reg_mem_engine = 0;
5657 	} else {
5658 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
5659 		reg_mem_engine = 1; /* pfp */
5660 	}
5661 
5662 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5663 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5664 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5665 			       ref_and_mask, ref_and_mask, 0x20);
5666 }
5667 
5668 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5669 				       struct amdgpu_job *job,
5670 				       struct amdgpu_ib *ib,
5671 				       uint32_t flags)
5672 {
5673 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5674 	u32 header, control = 0;
5675 
5676 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5677 
5678 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5679 
5680 	control |= ib->length_dw | (vmid << 24);
5681 
5682 	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5683 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5684 
5685 		if (flags & AMDGPU_IB_PREEMPTED)
5686 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5687 
5688 		if (vmid)
5689 			gfx_v11_0_ring_emit_de_meta(ring,
5690 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5691 	}
5692 
5693 	if (ring->is_mes_queue)
5694 		/* inherit vmid from mqd */
5695 		control |= 0x400000;
5696 
5697 	amdgpu_ring_write(ring, header);
5698 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5699 	amdgpu_ring_write(ring,
5700 #ifdef __BIG_ENDIAN
5701 		(2 << 0) |
5702 #endif
5703 		lower_32_bits(ib->gpu_addr));
5704 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5705 	amdgpu_ring_write(ring, control);
5706 }
5707 
5708 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5709 					   struct amdgpu_job *job,
5710 					   struct amdgpu_ib *ib,
5711 					   uint32_t flags)
5712 {
5713 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5714 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5715 
5716 	if (ring->is_mes_queue)
5717 		/* inherit vmid from mqd */
5718 		control |= 0x40000000;
5719 
5720 	/* Currently, there is a high possibility to get wave ID mismatch
5721 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5722 	 * different wave IDs than the GDS expects. This situation happens
5723 	 * randomly when at least 5 compute pipes use GDS ordered append.
5724 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5725 	 * Those are probably bugs somewhere else in the kernel driver.
5726 	 *
5727 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5728 	 * GDS to 0 for this ring (me/pipe).
5729 	 */
5730 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5731 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5732 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5733 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5734 	}
5735 
5736 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5737 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5738 	amdgpu_ring_write(ring,
5739 #ifdef __BIG_ENDIAN
5740 				(2 << 0) |
5741 #endif
5742 				lower_32_bits(ib->gpu_addr));
5743 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5744 	amdgpu_ring_write(ring, control);
5745 }
5746 
5747 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5748 				     u64 seq, unsigned flags)
5749 {
5750 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5751 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5752 
5753 	/* RELEASE_MEM - flush caches, send int */
5754 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5755 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5756 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5757 				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
5758 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5759 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5760 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5761 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5762 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5763 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5764 
5765 	/*
5766 	 * the address should be Qword aligned if 64bit write, Dword
5767 	 * aligned if only send 32bit data low (discard data high)
5768 	 */
5769 	if (write64bit)
5770 		BUG_ON(addr & 0x7);
5771 	else
5772 		BUG_ON(addr & 0x3);
5773 	amdgpu_ring_write(ring, lower_32_bits(addr));
5774 	amdgpu_ring_write(ring, upper_32_bits(addr));
5775 	amdgpu_ring_write(ring, lower_32_bits(seq));
5776 	amdgpu_ring_write(ring, upper_32_bits(seq));
5777 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5778 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5779 }
5780 
5781 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5782 {
5783 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5784 	uint32_t seq = ring->fence_drv.sync_seq;
5785 	uint64_t addr = ring->fence_drv.gpu_addr;
5786 
5787 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5788 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5789 }
5790 
5791 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5792 				   uint16_t pasid, uint32_t flush_type,
5793 				   bool all_hub, uint8_t dst_sel)
5794 {
5795 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5796 	amdgpu_ring_write(ring,
5797 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5798 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5799 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5800 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5801 }
5802 
5803 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5804 					 unsigned vmid, uint64_t pd_addr)
5805 {
5806 	if (ring->is_mes_queue)
5807 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5808 	else
5809 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5810 
5811 	/* compute doesn't have PFP */
5812 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5813 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5814 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5815 		amdgpu_ring_write(ring, 0x0);
5816 	}
5817 
5818 	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
5819 	 * changed in any way.
5820 	 */
5821 	ring->set_q_mode_offs = 0;
5822 	ring->set_q_mode_ptr = NULL;
5823 }
5824 
5825 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5826 					  u64 seq, unsigned int flags)
5827 {
5828 	struct amdgpu_device *adev = ring->adev;
5829 
5830 	/* we only allocate 32bit for each seq wb address */
5831 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5832 
5833 	/* write fence seq to the "addr" */
5834 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5835 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5836 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5837 	amdgpu_ring_write(ring, lower_32_bits(addr));
5838 	amdgpu_ring_write(ring, upper_32_bits(addr));
5839 	amdgpu_ring_write(ring, lower_32_bits(seq));
5840 
5841 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5842 		/* set register to trigger INT */
5843 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5844 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5845 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5846 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5847 		amdgpu_ring_write(ring, 0);
5848 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5849 	}
5850 }
5851 
5852 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5853 					 uint32_t flags)
5854 {
5855 	uint32_t dw2 = 0;
5856 
5857 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5858 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5859 		/* set load_global_config & load_global_uconfig */
5860 		dw2 |= 0x8001;
5861 		/* set load_cs_sh_regs */
5862 		dw2 |= 0x01000000;
5863 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5864 		dw2 |= 0x10002;
5865 	}
5866 
5867 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5868 	amdgpu_ring_write(ring, dw2);
5869 	amdgpu_ring_write(ring, 0);
5870 }
5871 
5872 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5873 						   uint64_t addr)
5874 {
5875 	unsigned ret;
5876 
5877 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5878 	amdgpu_ring_write(ring, lower_32_bits(addr));
5879 	amdgpu_ring_write(ring, upper_32_bits(addr));
5880 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5881 	amdgpu_ring_write(ring, 0);
5882 	ret = ring->wptr & ring->buf_mask;
5883 	/* patch dummy value later */
5884 	amdgpu_ring_write(ring, 0);
5885 
5886 	return ret;
5887 }
5888 
5889 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5890 					   u64 shadow_va, u64 csa_va,
5891 					   u64 gds_va, bool init_shadow,
5892 					   int vmid)
5893 {
5894 	struct amdgpu_device *adev = ring->adev;
5895 	unsigned int offs, end;
5896 
5897 	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
5898 		return;
5899 
5900 	/*
5901 	 * The logic here isn't easy to understand because we need to keep state
5902 	 * accross multiple executions of the function as well as between the
5903 	 * CPU and GPU. The general idea is that the newly written GPU command
5904 	 * has a condition on the previous one and only executed if really
5905 	 * necessary.
5906 	 */
5907 
5908 	/*
5909 	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
5910 	 * executed or not. Reserve 64bits just to be on the save side.
5911 	 */
5912 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
5913 	offs = ring->wptr & ring->buf_mask;
5914 
5915 	/*
5916 	 * We start with skipping the prefix SET_Q_MODE and always executing
5917 	 * the postfix SET_Q_MODE packet. This is changed below with a
5918 	 * WRITE_DATA command when the postfix executed.
5919 	 */
5920 	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
5921 	amdgpu_ring_write(ring, 0);
5922 
5923 	if (ring->set_q_mode_offs) {
5924 		uint64_t addr;
5925 
5926 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5927 		addr += ring->set_q_mode_offs << 2;
5928 		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
5929 	}
5930 
5931 	/*
5932 	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
5933 	 * next prefix SET_Q_MODE packet executes as well.
5934 	 */
5935 	if (!shadow_va) {
5936 		uint64_t addr;
5937 
5938 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5939 		addr += offs << 2;
5940 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5941 		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5942 		amdgpu_ring_write(ring, lower_32_bits(addr));
5943 		amdgpu_ring_write(ring, upper_32_bits(addr));
5944 		amdgpu_ring_write(ring, 0x1);
5945 	}
5946 
5947 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
5948 	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
5949 	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
5950 	amdgpu_ring_write(ring, lower_32_bits(gds_va));
5951 	amdgpu_ring_write(ring, upper_32_bits(gds_va));
5952 	amdgpu_ring_write(ring, lower_32_bits(csa_va));
5953 	amdgpu_ring_write(ring, upper_32_bits(csa_va));
5954 	amdgpu_ring_write(ring, shadow_va ?
5955 			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
5956 	amdgpu_ring_write(ring, init_shadow ?
5957 			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5958 
5959 	if (ring->set_q_mode_offs)
5960 		amdgpu_ring_patch_cond_exec(ring, end);
5961 
5962 	if (shadow_va) {
5963 		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
5964 
5965 		/*
5966 		 * If the tokens match try to skip the last postfix SET_Q_MODE
5967 		 * packet to avoid saving/restoring the state all the time.
5968 		 */
5969 		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
5970 			*ring->set_q_mode_ptr = 0;
5971 
5972 		ring->set_q_mode_token = token;
5973 	} else {
5974 		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
5975 	}
5976 
5977 	ring->set_q_mode_offs = offs;
5978 }
5979 
5980 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5981 {
5982 	int i, r = 0;
5983 	struct amdgpu_device *adev = ring->adev;
5984 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5985 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5986 	unsigned long flags;
5987 
5988 	if (adev->enable_mes)
5989 		return -EINVAL;
5990 
5991 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5992 		return -EINVAL;
5993 
5994 	spin_lock_irqsave(&kiq->ring_lock, flags);
5995 
5996 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5997 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5998 		return -ENOMEM;
5999 	}
6000 
6001 	/* assert preemption condition */
6002 	amdgpu_ring_set_preempt_cond_exec(ring, false);
6003 
6004 	/* assert IB preemption, emit the trailing fence */
6005 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
6006 				   ring->trail_fence_gpu_addr,
6007 				   ++ring->trail_seq);
6008 	amdgpu_ring_commit(kiq_ring);
6009 
6010 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
6011 
6012 	/* poll the trailing fence */
6013 	for (i = 0; i < adev->usec_timeout; i++) {
6014 		if (ring->trail_seq ==
6015 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
6016 			break;
6017 		udelay(1);
6018 	}
6019 
6020 	if (i >= adev->usec_timeout) {
6021 		r = -EINVAL;
6022 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
6023 	}
6024 
6025 	/* deassert preemption condition */
6026 	amdgpu_ring_set_preempt_cond_exec(ring, true);
6027 	return r;
6028 }
6029 
6030 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
6031 {
6032 	struct amdgpu_device *adev = ring->adev;
6033 	struct v10_de_ib_state de_payload = {0};
6034 	uint64_t offset, gds_addr, de_payload_gpu_addr;
6035 	void *de_payload_cpu_addr;
6036 	int cnt;
6037 
6038 	if (ring->is_mes_queue) {
6039 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
6040 				  gfx[0].gfx_meta_data) +
6041 			offsetof(struct v10_gfx_meta_data, de_payload);
6042 		de_payload_gpu_addr =
6043 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
6044 		de_payload_cpu_addr =
6045 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
6046 
6047 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
6048 				  gfx[0].gds_backup) +
6049 			offsetof(struct v10_gfx_meta_data, de_payload);
6050 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
6051 	} else {
6052 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
6053 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
6054 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
6055 
6056 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
6057 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
6058 				 PAGE_SIZE);
6059 	}
6060 
6061 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
6062 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
6063 
6064 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
6065 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
6066 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6067 				 WRITE_DATA_DST_SEL(8) |
6068 				 WR_CONFIRM) |
6069 				 WRITE_DATA_CACHE_POLICY(0));
6070 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
6071 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
6072 
6073 	if (resume)
6074 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
6075 					   sizeof(de_payload) >> 2);
6076 	else
6077 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
6078 					   sizeof(de_payload) >> 2);
6079 }
6080 
6081 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
6082 				    bool secure)
6083 {
6084 	uint32_t v = secure ? FRAME_TMZ : 0;
6085 
6086 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
6087 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
6088 }
6089 
6090 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6091 				     uint32_t reg_val_offs)
6092 {
6093 	struct amdgpu_device *adev = ring->adev;
6094 
6095 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6096 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6097 				(5 << 8) |	/* dst: memory */
6098 				(1 << 20));	/* write confirm */
6099 	amdgpu_ring_write(ring, reg);
6100 	amdgpu_ring_write(ring, 0);
6101 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6102 				reg_val_offs * 4));
6103 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6104 				reg_val_offs * 4));
6105 }
6106 
6107 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6108 				   uint32_t val)
6109 {
6110 	uint32_t cmd = 0;
6111 
6112 	switch (ring->funcs->type) {
6113 	case AMDGPU_RING_TYPE_GFX:
6114 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6115 		break;
6116 	case AMDGPU_RING_TYPE_KIQ:
6117 		cmd = (1 << 16); /* no inc addr */
6118 		break;
6119 	default:
6120 		cmd = WR_CONFIRM;
6121 		break;
6122 	}
6123 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6124 	amdgpu_ring_write(ring, cmd);
6125 	amdgpu_ring_write(ring, reg);
6126 	amdgpu_ring_write(ring, 0);
6127 	amdgpu_ring_write(ring, val);
6128 }
6129 
6130 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6131 					uint32_t val, uint32_t mask)
6132 {
6133 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6134 }
6135 
6136 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
6137 						   uint32_t reg0, uint32_t reg1,
6138 						   uint32_t ref, uint32_t mask)
6139 {
6140 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6141 
6142 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
6143 			       ref, mask, 0x20);
6144 }
6145 
6146 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
6147 					 unsigned vmid)
6148 {
6149 	struct amdgpu_device *adev = ring->adev;
6150 	uint32_t value = 0;
6151 
6152 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6153 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6154 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6155 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6156 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
6157 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
6158 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
6159 }
6160 
6161 static void
6162 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6163 				      uint32_t me, uint32_t pipe,
6164 				      enum amdgpu_interrupt_state state)
6165 {
6166 	uint32_t cp_int_cntl, cp_int_cntl_reg;
6167 
6168 	if (!me) {
6169 		switch (pipe) {
6170 		case 0:
6171 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
6172 			break;
6173 		case 1:
6174 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
6175 			break;
6176 		default:
6177 			DRM_DEBUG("invalid pipe %d\n", pipe);
6178 			return;
6179 		}
6180 	} else {
6181 		DRM_DEBUG("invalid me %d\n", me);
6182 		return;
6183 	}
6184 
6185 	switch (state) {
6186 	case AMDGPU_IRQ_STATE_DISABLE:
6187 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6188 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6189 					    TIME_STAMP_INT_ENABLE, 0);
6190 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6191 					    GENERIC0_INT_ENABLE, 0);
6192 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6193 		break;
6194 	case AMDGPU_IRQ_STATE_ENABLE:
6195 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6196 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6197 					    TIME_STAMP_INT_ENABLE, 1);
6198 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6199 					    GENERIC0_INT_ENABLE, 1);
6200 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6201 		break;
6202 	default:
6203 		break;
6204 	}
6205 }
6206 
6207 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6208 						     int me, int pipe,
6209 						     enum amdgpu_interrupt_state state)
6210 {
6211 	u32 mec_int_cntl, mec_int_cntl_reg;
6212 
6213 	/*
6214 	 * amdgpu controls only the first MEC. That's why this function only
6215 	 * handles the setting of interrupts for this specific MEC. All other
6216 	 * pipes' interrupts are set by amdkfd.
6217 	 */
6218 
6219 	if (me == 1) {
6220 		switch (pipe) {
6221 		case 0:
6222 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6223 			break;
6224 		case 1:
6225 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
6226 			break;
6227 		case 2:
6228 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
6229 			break;
6230 		case 3:
6231 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
6232 			break;
6233 		default:
6234 			DRM_DEBUG("invalid pipe %d\n", pipe);
6235 			return;
6236 		}
6237 	} else {
6238 		DRM_DEBUG("invalid me %d\n", me);
6239 		return;
6240 	}
6241 
6242 	switch (state) {
6243 	case AMDGPU_IRQ_STATE_DISABLE:
6244 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6245 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6246 					     TIME_STAMP_INT_ENABLE, 0);
6247 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6248 					     GENERIC0_INT_ENABLE, 0);
6249 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6250 		break;
6251 	case AMDGPU_IRQ_STATE_ENABLE:
6252 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6253 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6254 					     TIME_STAMP_INT_ENABLE, 1);
6255 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6256 					     GENERIC0_INT_ENABLE, 1);
6257 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6258 		break;
6259 	default:
6260 		break;
6261 	}
6262 }
6263 
6264 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6265 					    struct amdgpu_irq_src *src,
6266 					    unsigned type,
6267 					    enum amdgpu_interrupt_state state)
6268 {
6269 	switch (type) {
6270 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6271 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
6272 		break;
6273 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
6274 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
6275 		break;
6276 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6277 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6278 		break;
6279 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6280 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6281 		break;
6282 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6283 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6284 		break;
6285 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6286 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6287 		break;
6288 	default:
6289 		break;
6290 	}
6291 	return 0;
6292 }
6293 
6294 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
6295 			     struct amdgpu_irq_src *source,
6296 			     struct amdgpu_iv_entry *entry)
6297 {
6298 	int i;
6299 	u8 me_id, pipe_id, queue_id;
6300 	struct amdgpu_ring *ring;
6301 	uint32_t mes_queue_id = entry->src_data[0];
6302 
6303 	DRM_DEBUG("IH: CP EOP\n");
6304 
6305 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
6306 		struct amdgpu_mes_queue *queue;
6307 
6308 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
6309 
6310 		spin_lock(&adev->mes.queue_id_lock);
6311 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
6312 		if (queue) {
6313 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
6314 			amdgpu_fence_process(queue->ring);
6315 		}
6316 		spin_unlock(&adev->mes.queue_id_lock);
6317 	} else {
6318 		me_id = (entry->ring_id & 0x0c) >> 2;
6319 		pipe_id = (entry->ring_id & 0x03) >> 0;
6320 		queue_id = (entry->ring_id & 0x70) >> 4;
6321 
6322 		switch (me_id) {
6323 		case 0:
6324 			if (pipe_id == 0)
6325 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6326 			else
6327 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6328 			break;
6329 		case 1:
6330 		case 2:
6331 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6332 				ring = &adev->gfx.compute_ring[i];
6333 				/* Per-queue interrupt is supported for MEC starting from VI.
6334 				 * The interrupt can only be enabled/disabled per pipe instead
6335 				 * of per queue.
6336 				 */
6337 				if ((ring->me == me_id) &&
6338 				    (ring->pipe == pipe_id) &&
6339 				    (ring->queue == queue_id))
6340 					amdgpu_fence_process(ring);
6341 			}
6342 			break;
6343 		}
6344 	}
6345 
6346 	return 0;
6347 }
6348 
6349 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6350 					      struct amdgpu_irq_src *source,
6351 					      unsigned int type,
6352 					      enum amdgpu_interrupt_state state)
6353 {
6354 	u32 cp_int_cntl_reg, cp_int_cntl;
6355 	int i, j;
6356 
6357 	switch (state) {
6358 	case AMDGPU_IRQ_STATE_DISABLE:
6359 	case AMDGPU_IRQ_STATE_ENABLE:
6360 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6361 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6362 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6363 
6364 				if (cp_int_cntl_reg) {
6365 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6366 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6367 								    PRIV_REG_INT_ENABLE,
6368 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6369 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6370 				}
6371 			}
6372 		}
6373 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6374 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6375 				/* MECs start at 1 */
6376 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6377 
6378 				if (cp_int_cntl_reg) {
6379 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6380 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6381 								    PRIV_REG_INT_ENABLE,
6382 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6383 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6384 				}
6385 			}
6386 		}
6387 		break;
6388 	default:
6389 		break;
6390 	}
6391 
6392 	return 0;
6393 }
6394 
6395 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6396 					    struct amdgpu_irq_src *source,
6397 					    unsigned type,
6398 					    enum amdgpu_interrupt_state state)
6399 {
6400 	u32 cp_int_cntl_reg, cp_int_cntl;
6401 	int i, j;
6402 
6403 	switch (state) {
6404 	case AMDGPU_IRQ_STATE_DISABLE:
6405 	case AMDGPU_IRQ_STATE_ENABLE:
6406 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6407 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6408 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6409 
6410 				if (cp_int_cntl_reg) {
6411 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6412 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6413 								    OPCODE_ERROR_INT_ENABLE,
6414 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6415 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6416 				}
6417 			}
6418 		}
6419 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6420 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6421 				/* MECs start at 1 */
6422 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6423 
6424 				if (cp_int_cntl_reg) {
6425 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6426 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6427 								    OPCODE_ERROR_INT_ENABLE,
6428 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6429 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6430 				}
6431 			}
6432 		}
6433 		break;
6434 	default:
6435 		break;
6436 	}
6437 	return 0;
6438 }
6439 
6440 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6441 					       struct amdgpu_irq_src *source,
6442 					       unsigned int type,
6443 					       enum amdgpu_interrupt_state state)
6444 {
6445 	u32 cp_int_cntl_reg, cp_int_cntl;
6446 	int i, j;
6447 
6448 	switch (state) {
6449 	case AMDGPU_IRQ_STATE_DISABLE:
6450 	case AMDGPU_IRQ_STATE_ENABLE:
6451 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6452 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6453 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6454 
6455 				if (cp_int_cntl_reg) {
6456 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6457 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6458 								    PRIV_INSTR_INT_ENABLE,
6459 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6460 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6461 				}
6462 			}
6463 		}
6464 		break;
6465 	default:
6466 		break;
6467 	}
6468 
6469 	return 0;
6470 }
6471 
6472 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6473 					struct amdgpu_iv_entry *entry)
6474 {
6475 	u8 me_id, pipe_id, queue_id;
6476 	struct amdgpu_ring *ring;
6477 	int i;
6478 
6479 	me_id = (entry->ring_id & 0x0c) >> 2;
6480 	pipe_id = (entry->ring_id & 0x03) >> 0;
6481 	queue_id = (entry->ring_id & 0x70) >> 4;
6482 
6483 	switch (me_id) {
6484 	case 0:
6485 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6486 			ring = &adev->gfx.gfx_ring[i];
6487 			if (ring->me == me_id && ring->pipe == pipe_id &&
6488 			    ring->queue == queue_id)
6489 				drm_sched_fault(&ring->sched);
6490 		}
6491 		break;
6492 	case 1:
6493 	case 2:
6494 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6495 			ring = &adev->gfx.compute_ring[i];
6496 			if (ring->me == me_id && ring->pipe == pipe_id &&
6497 			    ring->queue == queue_id)
6498 				drm_sched_fault(&ring->sched);
6499 		}
6500 		break;
6501 	default:
6502 		BUG();
6503 		break;
6504 	}
6505 }
6506 
6507 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6508 				  struct amdgpu_irq_src *source,
6509 				  struct amdgpu_iv_entry *entry)
6510 {
6511 	DRM_ERROR("Illegal register access in command stream\n");
6512 	gfx_v11_0_handle_priv_fault(adev, entry);
6513 	return 0;
6514 }
6515 
6516 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
6517 				struct amdgpu_irq_src *source,
6518 				struct amdgpu_iv_entry *entry)
6519 {
6520 	DRM_ERROR("Illegal opcode in command stream \n");
6521 	gfx_v11_0_handle_priv_fault(adev, entry);
6522 	return 0;
6523 }
6524 
6525 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6526 				   struct amdgpu_irq_src *source,
6527 				   struct amdgpu_iv_entry *entry)
6528 {
6529 	DRM_ERROR("Illegal instruction in command stream\n");
6530 	gfx_v11_0_handle_priv_fault(adev, entry);
6531 	return 0;
6532 }
6533 
6534 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
6535 				  struct amdgpu_irq_src *source,
6536 				  struct amdgpu_iv_entry *entry)
6537 {
6538 	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
6539 		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
6540 
6541 	return 0;
6542 }
6543 
6544 #if 0
6545 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6546 					     struct amdgpu_irq_src *src,
6547 					     unsigned int type,
6548 					     enum amdgpu_interrupt_state state)
6549 {
6550 	uint32_t tmp, target;
6551 	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6552 
6553 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6554 	target += ring->pipe;
6555 
6556 	switch (type) {
6557 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6558 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6559 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6560 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6561 					    GENERIC2_INT_ENABLE, 0);
6562 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6563 
6564 			tmp = RREG32_SOC15_IP(GC, target);
6565 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6566 					    GENERIC2_INT_ENABLE, 0);
6567 			WREG32_SOC15_IP(GC, target, tmp);
6568 		} else {
6569 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6570 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6571 					    GENERIC2_INT_ENABLE, 1);
6572 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6573 
6574 			tmp = RREG32_SOC15_IP(GC, target);
6575 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6576 					    GENERIC2_INT_ENABLE, 1);
6577 			WREG32_SOC15_IP(GC, target, tmp);
6578 		}
6579 		break;
6580 	default:
6581 		BUG(); /* kiq only support GENERIC2_INT now */
6582 		break;
6583 	}
6584 	return 0;
6585 }
6586 #endif
6587 
6588 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6589 {
6590 	const unsigned int gcr_cntl =
6591 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6592 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6593 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6594 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6595 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6596 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6597 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6598 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6599 
6600 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6601 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6602 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6603 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6604 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6605 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6606 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6607 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6608 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6609 }
6610 
6611 static bool gfx_v11_pipe_reset_support(struct amdgpu_device *adev)
6612 {
6613 	/* Disable the pipe reset until the CPFW fully support it.*/
6614 	dev_warn_once(adev->dev, "The CPFW hasn't support pipe reset yet.\n");
6615 	return false;
6616 }
6617 
6618 
6619 static int gfx_v11_reset_gfx_pipe(struct amdgpu_ring *ring)
6620 {
6621 	struct amdgpu_device *adev = ring->adev;
6622 	uint32_t reset_pipe = 0, clean_pipe = 0;
6623 	int r;
6624 
6625 	if (!gfx_v11_pipe_reset_support(adev))
6626 		return -EOPNOTSUPP;
6627 
6628 	gfx_v11_0_set_safe_mode(adev, 0);
6629 	mutex_lock(&adev->srbm_mutex);
6630 	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6631 
6632 	switch (ring->pipe) {
6633 	case 0:
6634 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6635 					   PFP_PIPE0_RESET, 1);
6636 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6637 					   ME_PIPE0_RESET, 1);
6638 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6639 					   PFP_PIPE0_RESET, 0);
6640 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6641 					   ME_PIPE0_RESET, 0);
6642 		break;
6643 	case 1:
6644 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6645 					   PFP_PIPE1_RESET, 1);
6646 		reset_pipe = REG_SET_FIELD(reset_pipe, CP_ME_CNTL,
6647 					   ME_PIPE1_RESET, 1);
6648 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6649 					   PFP_PIPE1_RESET, 0);
6650 		clean_pipe = REG_SET_FIELD(clean_pipe, CP_ME_CNTL,
6651 					   ME_PIPE1_RESET, 0);
6652 		break;
6653 	default:
6654 		break;
6655 	}
6656 
6657 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, reset_pipe);
6658 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, clean_pipe);
6659 
6660 	r = (RREG32(SOC15_REG_OFFSET(GC, 0, regCP_GFX_RS64_INSTR_PNTR1)) << 2) -
6661 						RS64_FW_UC_START_ADDR_LO;
6662 	soc21_grbm_select(adev, 0, 0, 0, 0);
6663 	mutex_unlock(&adev->srbm_mutex);
6664 	gfx_v11_0_unset_safe_mode(adev, 0);
6665 
6666 	dev_info(adev->dev, "The ring %s pipe reset to the ME firmware start PC: %s\n", ring->name,
6667 			r == 0 ? "successfully" : "failed");
6668 	/* FIXME: Sometimes driver can't cache the ME firmware start PC correctly,
6669 	 * so the pipe reset status relies on the later gfx ring test result.
6670 	 */
6671 	return 0;
6672 }
6673 
6674 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
6675 {
6676 	struct amdgpu_device *adev = ring->adev;
6677 	int r;
6678 
6679 	if (amdgpu_sriov_vf(adev))
6680 		return -EINVAL;
6681 
6682 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
6683 	if (r) {
6684 
6685 		dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
6686 		r = gfx_v11_reset_gfx_pipe(ring);
6687 		if (r)
6688 			return r;
6689 	}
6690 
6691 	r = gfx_v11_0_kgq_init_queue(ring, true);
6692 	if (r) {
6693 		dev_err(adev->dev, "failed to init kgq\n");
6694 		return r;
6695 	}
6696 
6697 	r = amdgpu_mes_map_legacy_queue(adev, ring);
6698 	if (r) {
6699 		dev_err(adev->dev, "failed to remap kgq\n");
6700 		return r;
6701 	}
6702 
6703 	return amdgpu_ring_test_ring(ring);
6704 }
6705 
6706 static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring)
6707 {
6708 
6709 	struct amdgpu_device *adev = ring->adev;
6710 	uint32_t reset_pipe = 0, clean_pipe = 0;
6711 	int r;
6712 
6713 	if (!gfx_v11_pipe_reset_support(adev))
6714 		return -EOPNOTSUPP;
6715 
6716 	gfx_v11_0_set_safe_mode(adev, 0);
6717 	mutex_lock(&adev->srbm_mutex);
6718 	soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6719 
6720 	reset_pipe = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
6721 	clean_pipe = reset_pipe;
6722 
6723 	if (adev->gfx.rs64_enable) {
6724 
6725 		switch (ring->pipe) {
6726 		case 0:
6727 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6728 						   MEC_PIPE0_RESET, 1);
6729 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6730 						   MEC_PIPE0_RESET, 0);
6731 			break;
6732 		case 1:
6733 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6734 						   MEC_PIPE1_RESET, 1);
6735 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6736 						   MEC_PIPE1_RESET, 0);
6737 			break;
6738 		case 2:
6739 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6740 						   MEC_PIPE2_RESET, 1);
6741 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6742 						   MEC_PIPE2_RESET, 0);
6743 			break;
6744 		case 3:
6745 			reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_RS64_CNTL,
6746 						   MEC_PIPE3_RESET, 1);
6747 			clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_RS64_CNTL,
6748 						   MEC_PIPE3_RESET, 0);
6749 			break;
6750 		default:
6751 			break;
6752 		}
6753 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, reset_pipe);
6754 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, clean_pipe);
6755 		r = (RREG32_SOC15(GC, 0, regCP_MEC_RS64_INSTR_PNTR) << 2) -
6756 					RS64_FW_UC_START_ADDR_LO;
6757 	} else {
6758 		if (ring->me == 1) {
6759 			switch (ring->pipe) {
6760 			case 0:
6761 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6762 							   MEC_ME1_PIPE0_RESET, 1);
6763 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6764 							   MEC_ME1_PIPE0_RESET, 0);
6765 				break;
6766 			case 1:
6767 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6768 							   MEC_ME1_PIPE1_RESET, 1);
6769 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6770 							   MEC_ME1_PIPE1_RESET, 0);
6771 				break;
6772 			case 2:
6773 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6774 							   MEC_ME1_PIPE2_RESET, 1);
6775 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6776 							   MEC_ME1_PIPE2_RESET, 0);
6777 				break;
6778 			case 3:
6779 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6780 							   MEC_ME1_PIPE3_RESET, 1);
6781 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6782 							   MEC_ME1_PIPE3_RESET, 0);
6783 				break;
6784 			default:
6785 				break;
6786 			}
6787 			/* mec1 fw pc: CP_MEC1_INSTR_PNTR */
6788 		} else {
6789 			switch (ring->pipe) {
6790 			case 0:
6791 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6792 							   MEC_ME2_PIPE0_RESET, 1);
6793 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6794 							   MEC_ME2_PIPE0_RESET, 0);
6795 				break;
6796 			case 1:
6797 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6798 							   MEC_ME2_PIPE1_RESET, 1);
6799 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6800 							   MEC_ME2_PIPE1_RESET, 0);
6801 				break;
6802 			case 2:
6803 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6804 							   MEC_ME2_PIPE2_RESET, 1);
6805 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6806 							   MEC_ME2_PIPE2_RESET, 0);
6807 				break;
6808 			case 3:
6809 				reset_pipe = REG_SET_FIELD(reset_pipe, CP_MEC_CNTL,
6810 							   MEC_ME2_PIPE3_RESET, 1);
6811 				clean_pipe = REG_SET_FIELD(clean_pipe, CP_MEC_CNTL,
6812 							   MEC_ME2_PIPE3_RESET, 0);
6813 				break;
6814 			default:
6815 				break;
6816 			}
6817 			/* mec2 fw pc: CP:CP_MEC2_INSTR_PNTR */
6818 		}
6819 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, reset_pipe);
6820 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, clean_pipe);
6821 		r = RREG32(SOC15_REG_OFFSET(GC, 0, regCP_MEC1_INSTR_PNTR));
6822 	}
6823 
6824 	soc21_grbm_select(adev, 0, 0, 0, 0);
6825 	mutex_unlock(&adev->srbm_mutex);
6826 	gfx_v11_0_unset_safe_mode(adev, 0);
6827 
6828 	dev_info(adev->dev, "The ring %s pipe resets to MEC FW start PC: %s\n", ring->name,
6829 			r == 0 ? "successfully" : "failed");
6830 	/*FIXME:Sometimes driver can't cache the MEC firmware start PC correctly, so the pipe
6831 	 * reset status relies on the compute ring test result.
6832 	 */
6833 	return 0;
6834 }
6835 
6836 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
6837 {
6838 	struct amdgpu_device *adev = ring->adev;
6839 	int r = 0;
6840 
6841 	if (amdgpu_sriov_vf(adev))
6842 		return -EINVAL;
6843 
6844 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
6845 	if (r) {
6846 		dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
6847 		r = gfx_v11_0_reset_compute_pipe(ring);
6848 		if (r)
6849 			return r;
6850 	}
6851 
6852 	r = gfx_v11_0_kcq_init_queue(ring, true);
6853 	if (r) {
6854 		dev_err(adev->dev, "fail to init kcq\n");
6855 		return r;
6856 	}
6857 	r = amdgpu_mes_map_legacy_queue(adev, ring);
6858 	if (r) {
6859 		dev_err(adev->dev, "failed to remap kcq\n");
6860 		return r;
6861 	}
6862 
6863 	return amdgpu_ring_test_ring(ring);
6864 }
6865 
6866 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
6867 {
6868 	struct amdgpu_device *adev = ip_block->adev;
6869 	uint32_t i, j, k, reg, index = 0;
6870 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6871 
6872 	if (!adev->gfx.ip_dump_core)
6873 		return;
6874 
6875 	for (i = 0; i < reg_count; i++)
6876 		drm_printf(p, "%-50s \t 0x%08x\n",
6877 			   gc_reg_list_11_0[i].reg_name,
6878 			   adev->gfx.ip_dump_core[i]);
6879 
6880 	/* print compute queue registers for all instances */
6881 	if (!adev->gfx.ip_dump_compute_queues)
6882 		return;
6883 
6884 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6885 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
6886 		   adev->gfx.mec.num_mec,
6887 		   adev->gfx.mec.num_pipe_per_mec,
6888 		   adev->gfx.mec.num_queue_per_pipe);
6889 
6890 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6891 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6892 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6893 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
6894 				for (reg = 0; reg < reg_count; reg++) {
6895 					drm_printf(p, "%-50s \t 0x%08x\n",
6896 						   gc_cp_reg_list_11[reg].reg_name,
6897 						   adev->gfx.ip_dump_compute_queues[index + reg]);
6898 				}
6899 				index += reg_count;
6900 			}
6901 		}
6902 	}
6903 
6904 	/* print gfx queue registers for all instances */
6905 	if (!adev->gfx.ip_dump_gfx_queues)
6906 		return;
6907 
6908 	index = 0;
6909 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6910 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
6911 		   adev->gfx.me.num_me,
6912 		   adev->gfx.me.num_pipe_per_me,
6913 		   adev->gfx.me.num_queue_per_pipe);
6914 
6915 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6916 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6917 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6918 				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
6919 				for (reg = 0; reg < reg_count; reg++) {
6920 					drm_printf(p, "%-50s \t 0x%08x\n",
6921 						   gc_gfx_queue_reg_list_11[reg].reg_name,
6922 						   adev->gfx.ip_dump_gfx_queues[index + reg]);
6923 				}
6924 				index += reg_count;
6925 			}
6926 		}
6927 	}
6928 }
6929 
6930 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
6931 {
6932 	struct amdgpu_device *adev = ip_block->adev;
6933 	uint32_t i, j, k, reg, index = 0;
6934 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6935 
6936 	if (!adev->gfx.ip_dump_core)
6937 		return;
6938 
6939 	amdgpu_gfx_off_ctrl(adev, false);
6940 	for (i = 0; i < reg_count; i++)
6941 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
6942 	amdgpu_gfx_off_ctrl(adev, true);
6943 
6944 	/* dump compute queue registers for all instances */
6945 	if (!adev->gfx.ip_dump_compute_queues)
6946 		return;
6947 
6948 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6949 	amdgpu_gfx_off_ctrl(adev, false);
6950 	mutex_lock(&adev->srbm_mutex);
6951 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6952 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6953 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6954 				/* ME0 is for GFX so start from 1 for CP */
6955 				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
6956 				for (reg = 0; reg < reg_count; reg++) {
6957 					adev->gfx.ip_dump_compute_queues[index + reg] =
6958 						RREG32(SOC15_REG_ENTRY_OFFSET(
6959 							gc_cp_reg_list_11[reg]));
6960 				}
6961 				index += reg_count;
6962 			}
6963 		}
6964 	}
6965 	soc21_grbm_select(adev, 0, 0, 0, 0);
6966 	mutex_unlock(&adev->srbm_mutex);
6967 	amdgpu_gfx_off_ctrl(adev, true);
6968 
6969 	/* dump gfx queue registers for all instances */
6970 	if (!adev->gfx.ip_dump_gfx_queues)
6971 		return;
6972 
6973 	index = 0;
6974 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6975 	amdgpu_gfx_off_ctrl(adev, false);
6976 	mutex_lock(&adev->srbm_mutex);
6977 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6978 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6979 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6980 				soc21_grbm_select(adev, i, j, k, 0);
6981 
6982 				for (reg = 0; reg < reg_count; reg++) {
6983 					adev->gfx.ip_dump_gfx_queues[index + reg] =
6984 						RREG32(SOC15_REG_ENTRY_OFFSET(
6985 							gc_gfx_queue_reg_list_11[reg]));
6986 				}
6987 				index += reg_count;
6988 			}
6989 		}
6990 	}
6991 	soc21_grbm_select(adev, 0, 0, 0, 0);
6992 	mutex_unlock(&adev->srbm_mutex);
6993 	amdgpu_gfx_off_ctrl(adev, true);
6994 }
6995 
6996 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
6997 {
6998 	/* Emit the cleaner shader */
6999 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7000 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
7001 }
7002 
7003 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
7004 {
7005 	amdgpu_gfx_profile_ring_begin_use(ring);
7006 
7007 	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7008 }
7009 
7010 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
7011 {
7012 	amdgpu_gfx_profile_ring_end_use(ring);
7013 
7014 	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7015 }
7016 
7017 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
7018 	.name = "gfx_v11_0",
7019 	.early_init = gfx_v11_0_early_init,
7020 	.late_init = gfx_v11_0_late_init,
7021 	.sw_init = gfx_v11_0_sw_init,
7022 	.sw_fini = gfx_v11_0_sw_fini,
7023 	.hw_init = gfx_v11_0_hw_init,
7024 	.hw_fini = gfx_v11_0_hw_fini,
7025 	.suspend = gfx_v11_0_suspend,
7026 	.resume = gfx_v11_0_resume,
7027 	.is_idle = gfx_v11_0_is_idle,
7028 	.wait_for_idle = gfx_v11_0_wait_for_idle,
7029 	.soft_reset = gfx_v11_0_soft_reset,
7030 	.check_soft_reset = gfx_v11_0_check_soft_reset,
7031 	.post_soft_reset = gfx_v11_0_post_soft_reset,
7032 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
7033 	.set_powergating_state = gfx_v11_0_set_powergating_state,
7034 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
7035 	.dump_ip_state = gfx_v11_ip_dump,
7036 	.print_ip_state = gfx_v11_ip_print,
7037 };
7038 
7039 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
7040 	.type = AMDGPU_RING_TYPE_GFX,
7041 	.align_mask = 0xff,
7042 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7043 	.support_64bit_ptrs = true,
7044 	.secure_submission_supported = true,
7045 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
7046 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
7047 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
7048 	.emit_frame_size = /* totally 247 maximum if 16 IBs */
7049 		5 + /* update_spm_vmid */
7050 		5 + /* COND_EXEC */
7051 		22 + /* SET_Q_PREEMPTION_MODE */
7052 		7 + /* PIPELINE_SYNC */
7053 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7054 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7055 		4 + /* VM_FLUSH */
7056 		8 + /* FENCE for VM_FLUSH */
7057 		20 + /* GDS switch */
7058 		5 + /* COND_EXEC */
7059 		7 + /* HDP_flush */
7060 		4 + /* VGT_flush */
7061 		31 + /*	DE_META */
7062 		3 + /* CNTX_CTRL */
7063 		5 + /* HDP_INVL */
7064 		22 + /* SET_Q_PREEMPTION_MODE */
7065 		8 + 8 + /* FENCE x2 */
7066 		8 + /* gfx_v11_0_emit_mem_sync */
7067 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
7068 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
7069 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
7070 	.emit_fence = gfx_v11_0_ring_emit_fence,
7071 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
7072 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
7073 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
7074 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7075 	.test_ring = gfx_v11_0_ring_test_ring,
7076 	.test_ib = gfx_v11_0_ring_test_ib,
7077 	.insert_nop = gfx_v11_ring_insert_nop,
7078 	.pad_ib = amdgpu_ring_generic_pad_ib,
7079 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
7080 	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
7081 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
7082 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
7083 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
7084 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7085 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7086 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7087 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
7088 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
7089 	.reset = gfx_v11_0_reset_kgq,
7090 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
7091 	.begin_use = gfx_v11_0_ring_begin_use,
7092 	.end_use = gfx_v11_0_ring_end_use,
7093 };
7094 
7095 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
7096 	.type = AMDGPU_RING_TYPE_COMPUTE,
7097 	.align_mask = 0xff,
7098 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7099 	.support_64bit_ptrs = true,
7100 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
7101 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
7102 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
7103 	.emit_frame_size =
7104 		5 + /* update_spm_vmid */
7105 		20 + /* gfx_v11_0_ring_emit_gds_switch */
7106 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
7107 		5 + /* hdp invalidate */
7108 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
7109 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7110 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7111 		2 + /* gfx_v11_0_ring_emit_vm_flush */
7112 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
7113 		8 + /* gfx_v11_0_emit_mem_sync */
7114 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
7115 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
7116 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7117 	.emit_fence = gfx_v11_0_ring_emit_fence,
7118 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
7119 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
7120 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
7121 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
7122 	.test_ring = gfx_v11_0_ring_test_ring,
7123 	.test_ib = gfx_v11_0_ring_test_ib,
7124 	.insert_nop = gfx_v11_ring_insert_nop,
7125 	.pad_ib = amdgpu_ring_generic_pad_ib,
7126 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7127 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7128 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7129 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
7130 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
7131 	.reset = gfx_v11_0_reset_kcq,
7132 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
7133 	.begin_use = gfx_v11_0_ring_begin_use,
7134 	.end_use = gfx_v11_0_ring_end_use,
7135 };
7136 
7137 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
7138 	.type = AMDGPU_RING_TYPE_KIQ,
7139 	.align_mask = 0xff,
7140 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
7141 	.support_64bit_ptrs = true,
7142 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
7143 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
7144 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
7145 	.emit_frame_size =
7146 		20 + /* gfx_v11_0_ring_emit_gds_switch */
7147 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
7148 		5 + /*hdp invalidate */
7149 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
7150 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7151 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7152 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7153 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
7154 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
7155 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
7156 	.test_ring = gfx_v11_0_ring_test_ring,
7157 	.test_ib = gfx_v11_0_ring_test_ib,
7158 	.insert_nop = amdgpu_ring_insert_nop,
7159 	.pad_ib = amdgpu_ring_generic_pad_ib,
7160 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
7161 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7162 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7163 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7164 };
7165 
7166 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
7167 {
7168 	int i;
7169 
7170 	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
7171 
7172 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7173 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
7174 
7175 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7176 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
7177 }
7178 
7179 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
7180 	.set = gfx_v11_0_set_eop_interrupt_state,
7181 	.process = gfx_v11_0_eop_irq,
7182 };
7183 
7184 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
7185 	.set = gfx_v11_0_set_priv_reg_fault_state,
7186 	.process = gfx_v11_0_priv_reg_irq,
7187 };
7188 
7189 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
7190 	.set = gfx_v11_0_set_bad_op_fault_state,
7191 	.process = gfx_v11_0_bad_op_irq,
7192 };
7193 
7194 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
7195 	.set = gfx_v11_0_set_priv_inst_fault_state,
7196 	.process = gfx_v11_0_priv_inst_irq,
7197 };
7198 
7199 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
7200 	.process = gfx_v11_0_rlc_gc_fed_irq,
7201 };
7202 
7203 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
7204 {
7205 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7206 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
7207 
7208 	adev->gfx.priv_reg_irq.num_types = 1;
7209 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
7210 
7211 	adev->gfx.bad_op_irq.num_types = 1;
7212 	adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
7213 
7214 	adev->gfx.priv_inst_irq.num_types = 1;
7215 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
7216 
7217 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
7218 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
7219 
7220 }
7221 
7222 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
7223 {
7224 	if (adev->flags & AMD_IS_APU)
7225 		adev->gfx.imu.mode = MISSION_MODE;
7226 	else
7227 		adev->gfx.imu.mode = DEBUG_MODE;
7228 
7229 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
7230 }
7231 
7232 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
7233 {
7234 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
7235 }
7236 
7237 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
7238 {
7239 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
7240 			    adev->gfx.config.max_sh_per_se *
7241 			    adev->gfx.config.max_shader_engines;
7242 
7243 	adev->gds.gds_size = 0x1000;
7244 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
7245 	adev->gds.gws_size = 64;
7246 	adev->gds.oa_size = 16;
7247 }
7248 
7249 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
7250 {
7251 	/* set gfx eng mqd */
7252 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
7253 		sizeof(struct v11_gfx_mqd);
7254 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
7255 		gfx_v11_0_gfx_mqd_init;
7256 	/* set compute eng mqd */
7257 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
7258 		sizeof(struct v11_compute_mqd);
7259 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
7260 		gfx_v11_0_compute_mqd_init;
7261 }
7262 
7263 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
7264 							  u32 bitmap)
7265 {
7266 	u32 data;
7267 
7268 	if (!bitmap)
7269 		return;
7270 
7271 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7272 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7273 
7274 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
7275 }
7276 
7277 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
7278 {
7279 	u32 data, wgp_bitmask;
7280 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
7281 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
7282 
7283 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7284 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7285 
7286 	wgp_bitmask =
7287 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
7288 
7289 	return (~data) & wgp_bitmask;
7290 }
7291 
7292 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
7293 {
7294 	u32 wgp_idx, wgp_active_bitmap;
7295 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
7296 
7297 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
7298 	cu_active_bitmap = 0;
7299 
7300 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
7301 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
7302 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
7303 		if (wgp_active_bitmap & (1 << wgp_idx))
7304 			cu_active_bitmap |= cu_bitmap_per_wgp;
7305 	}
7306 
7307 	return cu_active_bitmap;
7308 }
7309 
7310 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
7311 				 struct amdgpu_cu_info *cu_info)
7312 {
7313 	int i, j, k, counter, active_cu_number = 0;
7314 	u32 mask, bitmap;
7315 	unsigned disable_masks[8 * 2];
7316 
7317 	if (!adev || !cu_info)
7318 		return -EINVAL;
7319 
7320 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
7321 
7322 	mutex_lock(&adev->grbm_idx_mutex);
7323 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7324 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7325 			bitmap = i * adev->gfx.config.max_sh_per_se + j;
7326 			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
7327 				continue;
7328 			mask = 1;
7329 			counter = 0;
7330 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7331 			if (i < 8 && j < 2)
7332 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
7333 					adev, disable_masks[i * 2 + j]);
7334 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
7335 
7336 			/**
7337 			 * GFX11 could support more than 4 SEs, while the bitmap
7338 			 * in cu_info struct is 4x4 and ioctl interface struct
7339 			 * drm_amdgpu_info_device should keep stable.
7340 			 * So we use last two columns of bitmap to store cu mask for
7341 			 * SEs 4 to 7, the layout of the bitmap is as below:
7342 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
7343 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
7344 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
7345 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
7346 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
7347 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
7348 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
7349 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
7350 			 */
7351 			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
7352 
7353 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
7354 				if (bitmap & mask)
7355 					counter++;
7356 
7357 				mask <<= 1;
7358 			}
7359 			active_cu_number += counter;
7360 		}
7361 	}
7362 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7363 	mutex_unlock(&adev->grbm_idx_mutex);
7364 
7365 	cu_info->number = active_cu_number;
7366 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7367 
7368 	return 0;
7369 }
7370 
7371 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
7372 {
7373 	.type = AMD_IP_BLOCK_TYPE_GFX,
7374 	.major = 11,
7375 	.minor = 0,
7376 	.rev = 0,
7377 	.funcs = &gfx_v11_0_ip_funcs,
7378 };
7379