xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision 76e3b62db9bf2dbedc5f41070684fdec64cd71a6)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "imu_v11_0.h"
33 #include "soc21.h"
34 #include "nvd.h"
35 
36 #include "gc/gc_11_0_0_offset.h"
37 #include "gc/gc_11_0_0_sh_mask.h"
38 #include "smuio/smuio_13_0_6_offset.h"
39 #include "smuio/smuio_13_0_6_sh_mask.h"
40 #include "navi10_enum.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
42 
43 #include "soc15.h"
44 #include "clearstate_gfx11.h"
45 #include "v11_structs.h"
46 #include "gfx_v11_0.h"
47 #include "gfx_v11_0_cleaner_shader.h"
48 #include "gfx_v11_0_3.h"
49 #include "nbio_v4_3.h"
50 #include "mes_v11_0.h"
51 
52 #define GFX11_NUM_GFX_RINGS		1
53 #define GFX11_MEC_HPD_SIZE	2048
54 
55 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
56 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
57 
58 #define regCGTT_WD_CLK_CTRL		0x5086
59 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
60 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
62 #define regPC_CONFIG_CNTL_1		0x194d
63 #define regPC_CONFIG_CNTL_1_BASE_IDX	1
64 
65 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
84 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
85 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
86 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
87 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
88 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
89 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
91 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
93 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
94 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
95 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
97 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
98 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
99 MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin");
101 MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin");
102 MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin");
103 
104 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
105 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
106 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
107 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
108 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
109 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
110 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
111 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
112 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
113 	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
114 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
115 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
116 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
117 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
118 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
119 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
120 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
121 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
122 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
123 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
124 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
125 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
126 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
127 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
128 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
129 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
130 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
131 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
132 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
133 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
134 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
135 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
136 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
137 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
138 	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
139 	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
140 	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
141 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
142 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
143 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
144 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
145 	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
146 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
147 	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
148 	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
149 	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
150 	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
151 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
152 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
153 	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
154 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
155 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
156 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
157 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
158 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
159 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
160 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
161 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
162 	/* cp header registers */
163 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
164 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
165 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
166 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
167 	/* SE status registers */
168 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
169 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
170 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
171 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
172 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
173 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
174 };
175 
176 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
177 	/* compute registers */
178 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
179 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
180 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
181 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
182 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
183 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
184 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
185 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
186 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
187 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
188 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
189 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
190 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
191 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
192 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
193 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
194 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
195 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
196 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
197 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
198 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
199 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
200 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
201 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
202 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
203 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
204 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
205 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
206 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
207 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
208 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
209 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
210 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
211 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
212 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
213 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
214 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
215 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
216 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
217 };
218 
219 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
220 	/* gfx queue registers */
221 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
222 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
223 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
224 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
225 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
226 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
227 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
228 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
229 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
230 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
231 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
232 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
233 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
234 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
235 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
236 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
237 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
238 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
239 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
240 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
241 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
242 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
243 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
244 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
245 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
246 };
247 
248 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
250 };
251 
252 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
253 {
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
255 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
256 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
257 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
258 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
263 };
264 
265 #define DEFAULT_SH_MEM_CONFIG \
266 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
267 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
268 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
269 
270 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
271 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
272 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
273 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
274 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
275 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
276 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
277 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
278                                  struct amdgpu_cu_info *cu_info);
279 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
280 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
281 				   u32 sh_num, u32 instance, int xcc_id);
282 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
283 
284 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
285 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
286 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
287 				     uint32_t val);
288 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
289 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
290 					   uint16_t pasid, uint32_t flush_type,
291 					   bool all_hub, uint8_t dst_sel);
292 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
293 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
294 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
295 				      bool enable);
296 
297 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
298 {
299 	struct amdgpu_device *adev = kiq_ring->adev;
300 	u64 shader_mc_addr;
301 
302 	/* Cleaner shader MC address */
303 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
304 
305 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
306 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
307 			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
308 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
309 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
310 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
311 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
312 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
313 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
314 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
315 }
316 
317 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
318 				 struct amdgpu_ring *ring)
319 {
320 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
321 	uint64_t wptr_addr = ring->wptr_gpu_addr;
322 	uint32_t me = 0, eng_sel = 0;
323 
324 	switch (ring->funcs->type) {
325 	case AMDGPU_RING_TYPE_COMPUTE:
326 		me = 1;
327 		eng_sel = 0;
328 		break;
329 	case AMDGPU_RING_TYPE_GFX:
330 		me = 0;
331 		eng_sel = 4;
332 		break;
333 	case AMDGPU_RING_TYPE_MES:
334 		me = 2;
335 		eng_sel = 5;
336 		break;
337 	default:
338 		WARN_ON(1);
339 	}
340 
341 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
342 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
343 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
344 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
345 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
346 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
347 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
348 			  PACKET3_MAP_QUEUES_ME((me)) |
349 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
350 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
351 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
352 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
353 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
354 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
355 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
356 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
357 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
358 }
359 
360 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
361 				   struct amdgpu_ring *ring,
362 				   enum amdgpu_unmap_queues_action action,
363 				   u64 gpu_addr, u64 seq)
364 {
365 	struct amdgpu_device *adev = kiq_ring->adev;
366 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
367 
368 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
369 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
370 		return;
371 	}
372 
373 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
374 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
375 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
376 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
377 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
378 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
379 	amdgpu_ring_write(kiq_ring,
380 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
381 
382 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
383 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
384 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
385 		amdgpu_ring_write(kiq_ring, seq);
386 	} else {
387 		amdgpu_ring_write(kiq_ring, 0);
388 		amdgpu_ring_write(kiq_ring, 0);
389 		amdgpu_ring_write(kiq_ring, 0);
390 	}
391 }
392 
393 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
394 				   struct amdgpu_ring *ring,
395 				   u64 addr,
396 				   u64 seq)
397 {
398 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
399 
400 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
401 	amdgpu_ring_write(kiq_ring,
402 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
403 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
404 			  PACKET3_QUERY_STATUS_COMMAND(2));
405 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
406 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
407 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
408 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
409 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
410 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
411 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
412 }
413 
414 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
415 				uint16_t pasid, uint32_t flush_type,
416 				bool all_hub)
417 {
418 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
419 }
420 
421 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
422 	.kiq_set_resources = gfx11_kiq_set_resources,
423 	.kiq_map_queues = gfx11_kiq_map_queues,
424 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
425 	.kiq_query_status = gfx11_kiq_query_status,
426 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
427 	.set_resources_size = 8,
428 	.map_queues_size = 7,
429 	.unmap_queues_size = 6,
430 	.query_status_size = 7,
431 	.invalidate_tlbs_size = 2,
432 };
433 
434 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
435 {
436 	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
437 }
438 
439 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
440 {
441 	if (amdgpu_sriov_vf(adev))
442 		return;
443 
444 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
445 	case IP_VERSION(11, 0, 1):
446 	case IP_VERSION(11, 0, 4):
447 		soc15_program_register_sequence(adev,
448 						golden_settings_gc_11_0_1,
449 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
450 		break;
451 	default:
452 		break;
453 	}
454 	soc15_program_register_sequence(adev,
455 					golden_settings_gc_11_0,
456 					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
457 
458 }
459 
460 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
461 				       bool wc, uint32_t reg, uint32_t val)
462 {
463 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
464 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
465 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
466 	amdgpu_ring_write(ring, reg);
467 	amdgpu_ring_write(ring, 0);
468 	amdgpu_ring_write(ring, val);
469 }
470 
471 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
472 				  int mem_space, int opt, uint32_t addr0,
473 				  uint32_t addr1, uint32_t ref, uint32_t mask,
474 				  uint32_t inv)
475 {
476 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
477 	amdgpu_ring_write(ring,
478 			  /* memory (1) or register (0) */
479 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
480 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
481 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
482 			   WAIT_REG_MEM_ENGINE(eng_sel)));
483 
484 	if (mem_space)
485 		BUG_ON(addr0 & 0x3); /* Dword align */
486 	amdgpu_ring_write(ring, addr0);
487 	amdgpu_ring_write(ring, addr1);
488 	amdgpu_ring_write(ring, ref);
489 	amdgpu_ring_write(ring, mask);
490 	amdgpu_ring_write(ring, inv); /* poll interval */
491 }
492 
493 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
494 {
495 	/* Header itself is a NOP packet */
496 	if (num_nop == 1) {
497 		amdgpu_ring_write(ring, ring->funcs->nop);
498 		return;
499 	}
500 
501 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
502 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
503 
504 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
505 	amdgpu_ring_insert_nop(ring, num_nop - 1);
506 }
507 
508 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
509 {
510 	struct amdgpu_device *adev = ring->adev;
511 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
512 	uint32_t tmp = 0;
513 	unsigned i;
514 	int r;
515 
516 	WREG32(scratch, 0xCAFEDEAD);
517 	r = amdgpu_ring_alloc(ring, 5);
518 	if (r) {
519 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
520 			  ring->idx, r);
521 		return r;
522 	}
523 
524 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
525 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
526 	} else {
527 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
528 		amdgpu_ring_write(ring, scratch -
529 				  PACKET3_SET_UCONFIG_REG_START);
530 		amdgpu_ring_write(ring, 0xDEADBEEF);
531 	}
532 	amdgpu_ring_commit(ring);
533 
534 	for (i = 0; i < adev->usec_timeout; i++) {
535 		tmp = RREG32(scratch);
536 		if (tmp == 0xDEADBEEF)
537 			break;
538 		if (amdgpu_emu_mode == 1)
539 			msleep(1);
540 		else
541 			udelay(1);
542 	}
543 
544 	if (i >= adev->usec_timeout)
545 		r = -ETIMEDOUT;
546 	return r;
547 }
548 
549 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
550 {
551 	struct amdgpu_device *adev = ring->adev;
552 	struct amdgpu_ib ib;
553 	struct dma_fence *f = NULL;
554 	unsigned index;
555 	uint64_t gpu_addr;
556 	volatile uint32_t *cpu_ptr;
557 	long r;
558 
559 	/* MES KIQ fw hasn't indirect buffer support for now */
560 	if (adev->enable_mes_kiq &&
561 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
562 		return 0;
563 
564 	memset(&ib, 0, sizeof(ib));
565 
566 	if (ring->is_mes_queue) {
567 		uint32_t padding, offset;
568 
569 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
570 		padding = amdgpu_mes_ctx_get_offs(ring,
571 						  AMDGPU_MES_CTX_PADDING_OFFS);
572 
573 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
574 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
575 
576 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
577 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
578 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
579 	} else {
580 		r = amdgpu_device_wb_get(adev, &index);
581 		if (r)
582 			return r;
583 
584 		gpu_addr = adev->wb.gpu_addr + (index * 4);
585 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
586 		cpu_ptr = &adev->wb.wb[index];
587 
588 		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
589 		if (r) {
590 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
591 			goto err1;
592 		}
593 	}
594 
595 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
596 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
597 	ib.ptr[2] = lower_32_bits(gpu_addr);
598 	ib.ptr[3] = upper_32_bits(gpu_addr);
599 	ib.ptr[4] = 0xDEADBEEF;
600 	ib.length_dw = 5;
601 
602 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
603 	if (r)
604 		goto err2;
605 
606 	r = dma_fence_wait_timeout(f, false, timeout);
607 	if (r == 0) {
608 		r = -ETIMEDOUT;
609 		goto err2;
610 	} else if (r < 0) {
611 		goto err2;
612 	}
613 
614 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
615 		r = 0;
616 	else
617 		r = -EINVAL;
618 err2:
619 	if (!ring->is_mes_queue)
620 		amdgpu_ib_free(&ib, NULL);
621 	dma_fence_put(f);
622 err1:
623 	if (!ring->is_mes_queue)
624 		amdgpu_device_wb_free(adev, index);
625 	return r;
626 }
627 
628 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
629 {
630 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
631 	amdgpu_ucode_release(&adev->gfx.me_fw);
632 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
633 	amdgpu_ucode_release(&adev->gfx.mec_fw);
634 
635 	kfree(adev->gfx.rlc.register_list_format);
636 }
637 
638 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
639 {
640 	const struct psp_firmware_header_v1_0 *toc_hdr;
641 	int err = 0;
642 
643 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
644 				   AMDGPU_UCODE_REQUIRED,
645 				   "amdgpu/%s_toc.bin", ucode_prefix);
646 	if (err)
647 		goto out;
648 
649 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
650 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
651 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
652 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
653 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
654 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
655 	return 0;
656 out:
657 	amdgpu_ucode_release(&adev->psp.toc_fw);
658 	return err;
659 }
660 
661 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
662 {
663 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
664 	case IP_VERSION(11, 0, 0):
665 	case IP_VERSION(11, 0, 2):
666 	case IP_VERSION(11, 0, 3):
667 		if ((adev->gfx.me_fw_version >= 1505) &&
668 		    (adev->gfx.pfp_fw_version >= 1600) &&
669 		    (adev->gfx.mec_fw_version >= 512)) {
670 			if (amdgpu_sriov_vf(adev))
671 				adev->gfx.cp_gfx_shadow = true;
672 			else
673 				adev->gfx.cp_gfx_shadow = false;
674 		}
675 		break;
676 	default:
677 		adev->gfx.cp_gfx_shadow = false;
678 		break;
679 	}
680 }
681 
682 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
683 {
684 	char ucode_prefix[25];
685 	int err;
686 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
687 	uint16_t version_major;
688 	uint16_t version_minor;
689 
690 	DRM_DEBUG("\n");
691 
692 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
693 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
694 				   AMDGPU_UCODE_REQUIRED,
695 				   "amdgpu/%s_pfp.bin", ucode_prefix);
696 	if (err)
697 		goto out;
698 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
699 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
700 				(union amdgpu_firmware_header *)
701 				adev->gfx.pfp_fw->data, 2, 0);
702 	if (adev->gfx.rs64_enable) {
703 		dev_info(adev->dev, "CP RS64 enable\n");
704 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
705 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
706 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
707 	} else {
708 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
709 	}
710 
711 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
712 				   AMDGPU_UCODE_REQUIRED,
713 				   "amdgpu/%s_me.bin", ucode_prefix);
714 	if (err)
715 		goto out;
716 	if (adev->gfx.rs64_enable) {
717 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
718 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
719 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
720 	} else {
721 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
722 	}
723 
724 	if (!amdgpu_sriov_vf(adev)) {
725 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
726 		    adev->pdev->revision == 0xCE)
727 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
728 						   AMDGPU_UCODE_REQUIRED,
729 						   "amdgpu/gc_11_0_0_rlc_1.bin");
730 		else
731 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
732 						   AMDGPU_UCODE_REQUIRED,
733 						   "amdgpu/%s_rlc.bin", ucode_prefix);
734 		if (err)
735 			goto out;
736 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
737 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
738 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
739 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
740 		if (err)
741 			goto out;
742 	}
743 
744 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
745 				   AMDGPU_UCODE_REQUIRED,
746 				   "amdgpu/%s_mec.bin", ucode_prefix);
747 	if (err)
748 		goto out;
749 	if (adev->gfx.rs64_enable) {
750 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
751 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
752 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
753 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
754 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
755 	} else {
756 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
757 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
758 	}
759 
760 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
761 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
762 
763 	/* only one MEC for gfx 11.0.0. */
764 	adev->gfx.mec2_fw = NULL;
765 
766 	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
767 
768 	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
769 		err = adev->gfx.imu.funcs->init_microcode(adev);
770 		if (err)
771 			DRM_ERROR("Failed to init imu firmware!\n");
772 		return err;
773 	}
774 
775 out:
776 	if (err) {
777 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
778 		amdgpu_ucode_release(&adev->gfx.me_fw);
779 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
780 		amdgpu_ucode_release(&adev->gfx.mec_fw);
781 	}
782 
783 	return err;
784 }
785 
786 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
787 {
788 	u32 count = 0;
789 	const struct cs_section_def *sect = NULL;
790 	const struct cs_extent_def *ext = NULL;
791 
792 	/* begin clear state */
793 	count += 2;
794 	/* context control state */
795 	count += 3;
796 
797 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
798 		for (ext = sect->section; ext->extent != NULL; ++ext) {
799 			if (sect->id == SECT_CONTEXT)
800 				count += 2 + ext->reg_count;
801 			else
802 				return 0;
803 		}
804 	}
805 
806 	/* set PA_SC_TILE_STEERING_OVERRIDE */
807 	count += 3;
808 	/* end clear state */
809 	count += 2;
810 	/* clear state */
811 	count += 2;
812 
813 	return count;
814 }
815 
816 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
817 				    volatile u32 *buffer)
818 {
819 	u32 count = 0, i;
820 	const struct cs_section_def *sect = NULL;
821 	const struct cs_extent_def *ext = NULL;
822 	int ctx_reg_offset;
823 
824 	if (adev->gfx.rlc.cs_data == NULL)
825 		return;
826 	if (buffer == NULL)
827 		return;
828 
829 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
830 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
831 
832 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
833 	buffer[count++] = cpu_to_le32(0x80000000);
834 	buffer[count++] = cpu_to_le32(0x80000000);
835 
836 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
837 		for (ext = sect->section; ext->extent != NULL; ++ext) {
838 			if (sect->id == SECT_CONTEXT) {
839 				buffer[count++] =
840 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
841 				buffer[count++] = cpu_to_le32(ext->reg_index -
842 						PACKET3_SET_CONTEXT_REG_START);
843 				for (i = 0; i < ext->reg_count; i++)
844 					buffer[count++] = cpu_to_le32(ext->extent[i]);
845 			} else {
846 				return;
847 			}
848 		}
849 	}
850 
851 	ctx_reg_offset =
852 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
853 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
854 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
855 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
856 
857 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
858 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
859 
860 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
861 	buffer[count++] = cpu_to_le32(0);
862 }
863 
864 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
865 {
866 	/* clear state block */
867 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
868 			&adev->gfx.rlc.clear_state_gpu_addr,
869 			(void **)&adev->gfx.rlc.cs_ptr);
870 
871 	/* jump table block */
872 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
873 			&adev->gfx.rlc.cp_table_gpu_addr,
874 			(void **)&adev->gfx.rlc.cp_table_ptr);
875 }
876 
877 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
878 {
879 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
880 
881 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
882 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
883 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
884 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
885 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
886 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
887 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
888 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
889 	adev->gfx.rlc.rlcg_reg_access_supported = true;
890 }
891 
892 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
893 {
894 	const struct cs_section_def *cs_data;
895 	int r;
896 
897 	adev->gfx.rlc.cs_data = gfx11_cs_data;
898 
899 	cs_data = adev->gfx.rlc.cs_data;
900 
901 	if (cs_data) {
902 		/* init clear state block */
903 		r = amdgpu_gfx_rlc_init_csb(adev);
904 		if (r)
905 			return r;
906 	}
907 
908 	/* init spm vmid with 0xf */
909 	if (adev->gfx.rlc.funcs->update_spm_vmid)
910 		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
911 
912 	return 0;
913 }
914 
915 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
916 {
917 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
918 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
919 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
920 }
921 
922 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
923 {
924 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
925 
926 	amdgpu_gfx_graphics_queue_acquire(adev);
927 }
928 
929 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
930 {
931 	int r;
932 	u32 *hpd;
933 	size_t mec_hpd_size;
934 
935 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
936 
937 	/* take ownership of the relevant compute queues */
938 	amdgpu_gfx_compute_queue_acquire(adev);
939 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
940 
941 	if (mec_hpd_size) {
942 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
943 					      AMDGPU_GEM_DOMAIN_GTT,
944 					      &adev->gfx.mec.hpd_eop_obj,
945 					      &adev->gfx.mec.hpd_eop_gpu_addr,
946 					      (void **)&hpd);
947 		if (r) {
948 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
949 			gfx_v11_0_mec_fini(adev);
950 			return r;
951 		}
952 
953 		memset(hpd, 0, mec_hpd_size);
954 
955 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
956 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
957 	}
958 
959 	return 0;
960 }
961 
962 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
963 {
964 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
965 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
966 		(address << SQ_IND_INDEX__INDEX__SHIFT));
967 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
968 }
969 
970 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
971 			   uint32_t thread, uint32_t regno,
972 			   uint32_t num, uint32_t *out)
973 {
974 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
975 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
976 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
977 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
978 		(SQ_IND_INDEX__AUTO_INCR_MASK));
979 	while (num--)
980 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
981 }
982 
983 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
984 {
985 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
986 	 * field when performing a select_se_sh so it should be
987 	 * zero here */
988 	WARN_ON(simd != 0);
989 
990 	/* type 3 wave data */
991 	dst[(*no_fields)++] = 3;
992 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
993 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
994 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
995 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
996 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
997 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
998 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
999 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
1000 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
1001 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
1002 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
1003 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
1004 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
1005 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
1006 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
1007 }
1008 
1009 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1010 				     uint32_t wave, uint32_t start,
1011 				     uint32_t size, uint32_t *dst)
1012 {
1013 	WARN_ON(simd != 0);
1014 
1015 	wave_read_regs(
1016 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
1017 		dst);
1018 }
1019 
1020 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1021 				      uint32_t wave, uint32_t thread,
1022 				      uint32_t start, uint32_t size,
1023 				      uint32_t *dst)
1024 {
1025 	wave_read_regs(
1026 		adev, wave, thread,
1027 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1028 }
1029 
1030 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
1031 					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1032 {
1033 	soc21_grbm_select(adev, me, pipe, q, vm);
1034 }
1035 
1036 /* all sizes are in bytes */
1037 #define MQD_SHADOW_BASE_SIZE      73728
1038 #define MQD_SHADOW_BASE_ALIGNMENT 256
1039 #define MQD_FWWORKAREA_SIZE       484
1040 #define MQD_FWWORKAREA_ALIGNMENT  256
1041 
1042 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
1043 					 struct amdgpu_gfx_shadow_info *shadow_info)
1044 {
1045 	if (adev->gfx.cp_gfx_shadow) {
1046 		shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
1047 		shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
1048 		shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
1049 		shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
1050 		return 0;
1051 	} else {
1052 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
1053 		return -ENOTSUPP;
1054 	}
1055 }
1056 
1057 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
1058 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
1059 	.select_se_sh = &gfx_v11_0_select_se_sh,
1060 	.read_wave_data = &gfx_v11_0_read_wave_data,
1061 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
1062 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
1063 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1064 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1065 	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
1066 };
1067 
1068 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1069 {
1070 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1071 	case IP_VERSION(11, 0, 0):
1072 	case IP_VERSION(11, 0, 2):
1073 		adev->gfx.config.max_hw_contexts = 8;
1074 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1075 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1076 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1077 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1078 		break;
1079 	case IP_VERSION(11, 0, 3):
1080 		adev->gfx.ras = &gfx_v11_0_3_ras;
1081 		adev->gfx.config.max_hw_contexts = 8;
1082 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1083 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1084 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1085 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1086 		break;
1087 	case IP_VERSION(11, 0, 1):
1088 	case IP_VERSION(11, 0, 4):
1089 	case IP_VERSION(11, 5, 0):
1090 	case IP_VERSION(11, 5, 1):
1091 	case IP_VERSION(11, 5, 2):
1092 	case IP_VERSION(11, 5, 3):
1093 		adev->gfx.config.max_hw_contexts = 8;
1094 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1095 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1096 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1097 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1098 		break;
1099 	default:
1100 		BUG();
1101 		break;
1102 	}
1103 
1104 	return 0;
1105 }
1106 
1107 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1108 				   int me, int pipe, int queue)
1109 {
1110 	struct amdgpu_ring *ring;
1111 	unsigned int irq_type;
1112 	unsigned int hw_prio;
1113 
1114 	ring = &adev->gfx.gfx_ring[ring_id];
1115 
1116 	ring->me = me;
1117 	ring->pipe = pipe;
1118 	ring->queue = queue;
1119 
1120 	ring->ring_obj = NULL;
1121 	ring->use_doorbell = true;
1122 
1123 	if (!ring_id)
1124 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1125 	else
1126 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1127 	ring->vm_hub = AMDGPU_GFXHUB(0);
1128 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1129 
1130 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1131 	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
1132 		AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1133 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1134 				hw_prio, NULL);
1135 }
1136 
1137 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1138 				       int mec, int pipe, int queue)
1139 {
1140 	int r;
1141 	unsigned irq_type;
1142 	struct amdgpu_ring *ring;
1143 	unsigned int hw_prio;
1144 
1145 	ring = &adev->gfx.compute_ring[ring_id];
1146 
1147 	/* mec0 is me1 */
1148 	ring->me = mec + 1;
1149 	ring->pipe = pipe;
1150 	ring->queue = queue;
1151 
1152 	ring->ring_obj = NULL;
1153 	ring->use_doorbell = true;
1154 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1155 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1156 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1157 	ring->vm_hub = AMDGPU_GFXHUB(0);
1158 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1159 
1160 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1161 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1162 		+ ring->pipe;
1163 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1164 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1165 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1166 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1167 			     hw_prio, NULL);
1168 	if (r)
1169 		return r;
1170 
1171 	return 0;
1172 }
1173 
1174 static struct {
1175 	SOC21_FIRMWARE_ID	id;
1176 	unsigned int		offset;
1177 	unsigned int		size;
1178 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1179 
1180 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1181 {
1182 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1183 
1184 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1185 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1186 		rlc_autoload_info[ucode->id].id = ucode->id;
1187 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1188 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1189 
1190 		ucode++;
1191 	}
1192 }
1193 
1194 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1195 {
1196 	uint32_t total_size = 0;
1197 	SOC21_FIRMWARE_ID id;
1198 
1199 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1200 
1201 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1202 		total_size += rlc_autoload_info[id].size;
1203 
1204 	/* In case the offset in rlc toc ucode is aligned */
1205 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1206 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1207 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1208 
1209 	return total_size;
1210 }
1211 
1212 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1213 {
1214 	int r;
1215 	uint32_t total_size;
1216 
1217 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1218 
1219 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1220 				      AMDGPU_GEM_DOMAIN_VRAM |
1221 				      AMDGPU_GEM_DOMAIN_GTT,
1222 				      &adev->gfx.rlc.rlc_autoload_bo,
1223 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
1224 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1225 
1226 	if (r) {
1227 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1228 		return r;
1229 	}
1230 
1231 	return 0;
1232 }
1233 
1234 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1235 					      SOC21_FIRMWARE_ID id,
1236 			    		      const void *fw_data,
1237 					      uint32_t fw_size,
1238 					      uint32_t *fw_autoload_mask)
1239 {
1240 	uint32_t toc_offset;
1241 	uint32_t toc_fw_size;
1242 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1243 
1244 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1245 		return;
1246 
1247 	toc_offset = rlc_autoload_info[id].offset;
1248 	toc_fw_size = rlc_autoload_info[id].size;
1249 
1250 	if (fw_size == 0)
1251 		fw_size = toc_fw_size;
1252 
1253 	if (fw_size > toc_fw_size)
1254 		fw_size = toc_fw_size;
1255 
1256 	memcpy(ptr + toc_offset, fw_data, fw_size);
1257 
1258 	if (fw_size < toc_fw_size)
1259 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1260 
1261 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1262 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1263 }
1264 
1265 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1266 							uint32_t *fw_autoload_mask)
1267 {
1268 	void *data;
1269 	uint32_t size;
1270 	uint64_t *toc_ptr;
1271 
1272 	*(uint64_t *)fw_autoload_mask |= 0x1;
1273 
1274 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1275 
1276 	data = adev->psp.toc.start_addr;
1277 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1278 
1279 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1280 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1281 
1282 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1283 					data, size, fw_autoload_mask);
1284 }
1285 
1286 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1287 							uint32_t *fw_autoload_mask)
1288 {
1289 	const __le32 *fw_data;
1290 	uint32_t fw_size;
1291 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1292 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1293 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1294 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1295 	uint16_t version_major, version_minor;
1296 
1297 	if (adev->gfx.rs64_enable) {
1298 		/* pfp ucode */
1299 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1300 			adev->gfx.pfp_fw->data;
1301 		/* instruction */
1302 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1303 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1304 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1305 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1306 						fw_data, fw_size, fw_autoload_mask);
1307 		/* data */
1308 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1309 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1310 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1311 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1312 						fw_data, fw_size, fw_autoload_mask);
1313 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1314 						fw_data, fw_size, fw_autoload_mask);
1315 		/* me ucode */
1316 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1317 			adev->gfx.me_fw->data;
1318 		/* instruction */
1319 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1320 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1321 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1322 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1323 						fw_data, fw_size, fw_autoload_mask);
1324 		/* data */
1325 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1326 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1327 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1328 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1329 						fw_data, fw_size, fw_autoload_mask);
1330 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1331 						fw_data, fw_size, fw_autoload_mask);
1332 		/* mec ucode */
1333 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1334 			adev->gfx.mec_fw->data;
1335 		/* instruction */
1336 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1337 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1338 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1339 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1340 						fw_data, fw_size, fw_autoload_mask);
1341 		/* data */
1342 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1343 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1344 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1345 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1346 						fw_data, fw_size, fw_autoload_mask);
1347 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1348 						fw_data, fw_size, fw_autoload_mask);
1349 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1350 						fw_data, fw_size, fw_autoload_mask);
1351 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1352 						fw_data, fw_size, fw_autoload_mask);
1353 	} else {
1354 		/* pfp ucode */
1355 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1356 			adev->gfx.pfp_fw->data;
1357 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1358 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1359 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1360 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1361 						fw_data, fw_size, fw_autoload_mask);
1362 
1363 		/* me ucode */
1364 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1365 			adev->gfx.me_fw->data;
1366 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1367 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1368 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1369 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1370 						fw_data, fw_size, fw_autoload_mask);
1371 
1372 		/* mec ucode */
1373 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1374 			adev->gfx.mec_fw->data;
1375 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1376 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1377 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1378 			cp_hdr->jt_size * 4;
1379 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1380 						fw_data, fw_size, fw_autoload_mask);
1381 	}
1382 
1383 	/* rlc ucode */
1384 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1385 		adev->gfx.rlc_fw->data;
1386 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1387 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1388 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1389 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1390 					fw_data, fw_size, fw_autoload_mask);
1391 
1392 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1393 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1394 	if (version_major == 2) {
1395 		if (version_minor >= 2) {
1396 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1397 
1398 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1399 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1400 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1401 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1402 					fw_data, fw_size, fw_autoload_mask);
1403 
1404 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1405 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1406 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1407 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1408 					fw_data, fw_size, fw_autoload_mask);
1409 		}
1410 	}
1411 }
1412 
1413 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1414 							uint32_t *fw_autoload_mask)
1415 {
1416 	const __le32 *fw_data;
1417 	uint32_t fw_size;
1418 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1419 
1420 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1421 		adev->sdma.instance[0].fw->data;
1422 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1423 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1424 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1425 
1426 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1427 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1428 
1429 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1430 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1431 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1432 
1433 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1434 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1435 }
1436 
1437 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1438 							uint32_t *fw_autoload_mask)
1439 {
1440 	const __le32 *fw_data;
1441 	unsigned fw_size;
1442 	const struct mes_firmware_header_v1_0 *mes_hdr;
1443 	int pipe, ucode_id, data_id;
1444 
1445 	for (pipe = 0; pipe < 2; pipe++) {
1446 		if (pipe==0) {
1447 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1448 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1449 		} else {
1450 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1451 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1452 		}
1453 
1454 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1455 			adev->mes.fw[pipe]->data;
1456 
1457 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1458 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1459 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1460 
1461 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1462 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1463 
1464 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1465 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1466 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1467 
1468 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1469 				data_id, fw_data, fw_size, fw_autoload_mask);
1470 	}
1471 }
1472 
1473 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1474 {
1475 	uint32_t rlc_g_offset, rlc_g_size;
1476 	uint64_t gpu_addr;
1477 	uint32_t autoload_fw_id[2];
1478 
1479 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1480 
1481 	/* RLC autoload sequence 2: copy ucode */
1482 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1483 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1484 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1485 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1486 
1487 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1488 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1489 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1490 
1491 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1492 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1493 
1494 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1495 
1496 	/* RLC autoload sequence 3: load IMU fw */
1497 	if (adev->gfx.imu.funcs->load_microcode)
1498 		adev->gfx.imu.funcs->load_microcode(adev);
1499 	/* RLC autoload sequence 4 init IMU fw */
1500 	if (adev->gfx.imu.funcs->setup_imu)
1501 		adev->gfx.imu.funcs->setup_imu(adev);
1502 	if (adev->gfx.imu.funcs->start_imu)
1503 		adev->gfx.imu.funcs->start_imu(adev);
1504 
1505 	/* RLC autoload sequence 5 disable gpa mode */
1506 	gfx_v11_0_disable_gpa_mode(adev);
1507 
1508 	return 0;
1509 }
1510 
1511 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
1512 {
1513 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
1514 	uint32_t *ptr;
1515 	uint32_t inst;
1516 
1517 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1518 	if (!ptr) {
1519 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1520 		adev->gfx.ip_dump_core = NULL;
1521 	} else {
1522 		adev->gfx.ip_dump_core = ptr;
1523 	}
1524 
1525 	/* Allocate memory for compute queue registers for all the instances */
1526 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
1527 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1528 		adev->gfx.mec.num_queue_per_pipe;
1529 
1530 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1531 	if (!ptr) {
1532 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1533 		adev->gfx.ip_dump_compute_queues = NULL;
1534 	} else {
1535 		adev->gfx.ip_dump_compute_queues = ptr;
1536 	}
1537 
1538 	/* Allocate memory for gfx queue registers for all the instances */
1539 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
1540 	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1541 		adev->gfx.me.num_queue_per_pipe;
1542 
1543 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1544 	if (!ptr) {
1545 		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1546 		adev->gfx.ip_dump_gfx_queues = NULL;
1547 	} else {
1548 		adev->gfx.ip_dump_gfx_queues = ptr;
1549 	}
1550 }
1551 
1552 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
1553 {
1554 	int i, j, k, r, ring_id = 0;
1555 	int xcc_id = 0;
1556 	struct amdgpu_device *adev = ip_block->adev;
1557 
1558 	INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler);
1559 
1560 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1561 	case IP_VERSION(11, 0, 0):
1562 	case IP_VERSION(11, 0, 2):
1563 	case IP_VERSION(11, 0, 3):
1564 		adev->gfx.me.num_me = 1;
1565 		adev->gfx.me.num_pipe_per_me = 1;
1566 		adev->gfx.me.num_queue_per_pipe = 1;
1567 		adev->gfx.mec.num_mec = 2;
1568 		adev->gfx.mec.num_pipe_per_mec = 4;
1569 		adev->gfx.mec.num_queue_per_pipe = 4;
1570 		break;
1571 	case IP_VERSION(11, 0, 1):
1572 	case IP_VERSION(11, 0, 4):
1573 	case IP_VERSION(11, 5, 0):
1574 	case IP_VERSION(11, 5, 1):
1575 	case IP_VERSION(11, 5, 2):
1576 	case IP_VERSION(11, 5, 3):
1577 		adev->gfx.me.num_me = 1;
1578 		adev->gfx.me.num_pipe_per_me = 1;
1579 		adev->gfx.me.num_queue_per_pipe = 1;
1580 		adev->gfx.mec.num_mec = 1;
1581 		adev->gfx.mec.num_pipe_per_mec = 4;
1582 		adev->gfx.mec.num_queue_per_pipe = 4;
1583 		break;
1584 	default:
1585 		adev->gfx.me.num_me = 1;
1586 		adev->gfx.me.num_pipe_per_me = 1;
1587 		adev->gfx.me.num_queue_per_pipe = 1;
1588 		adev->gfx.mec.num_mec = 1;
1589 		adev->gfx.mec.num_pipe_per_mec = 4;
1590 		adev->gfx.mec.num_queue_per_pipe = 8;
1591 		break;
1592 	}
1593 
1594 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1595 	case IP_VERSION(11, 0, 0):
1596 	case IP_VERSION(11, 0, 2):
1597 	case IP_VERSION(11, 0, 3):
1598 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1599 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1600 		if (adev->gfx.me_fw_version  >= 2280 &&
1601 		    adev->gfx.pfp_fw_version >= 2370 &&
1602 		    adev->gfx.mec_fw_version >= 2450  &&
1603 		    adev->mes.fw_version[0] >= 99) {
1604 			adev->gfx.enable_cleaner_shader = true;
1605 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1606 			if (r) {
1607 				adev->gfx.enable_cleaner_shader = false;
1608 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1609 			}
1610 		}
1611 		break;
1612 	default:
1613 		adev->gfx.enable_cleaner_shader = false;
1614 		break;
1615 	}
1616 
1617 	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1618 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1619 	    amdgpu_sriov_is_pp_one_vf(adev))
1620 		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1621 
1622 	/* EOP Event */
1623 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1624 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1625 			      &adev->gfx.eop_irq);
1626 	if (r)
1627 		return r;
1628 
1629 	/* Bad opcode Event */
1630 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1631 			      GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1632 			      &adev->gfx.bad_op_irq);
1633 	if (r)
1634 		return r;
1635 
1636 	/* Privileged reg */
1637 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1638 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1639 			      &adev->gfx.priv_reg_irq);
1640 	if (r)
1641 		return r;
1642 
1643 	/* Privileged inst */
1644 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1645 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1646 			      &adev->gfx.priv_inst_irq);
1647 	if (r)
1648 		return r;
1649 
1650 	/* FED error */
1651 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1652 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1653 				  &adev->gfx.rlc_gc_fed_irq);
1654 	if (r)
1655 		return r;
1656 
1657 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1658 
1659 	gfx_v11_0_me_init(adev);
1660 
1661 	r = gfx_v11_0_rlc_init(adev);
1662 	if (r) {
1663 		DRM_ERROR("Failed to init rlc BOs!\n");
1664 		return r;
1665 	}
1666 
1667 	r = gfx_v11_0_mec_init(adev);
1668 	if (r) {
1669 		DRM_ERROR("Failed to init MEC BOs!\n");
1670 		return r;
1671 	}
1672 
1673 	/* set up the gfx ring */
1674 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1675 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1676 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1677 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1678 					continue;
1679 
1680 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1681 							    i, k, j);
1682 				if (r)
1683 					return r;
1684 				ring_id++;
1685 			}
1686 		}
1687 	}
1688 
1689 	ring_id = 0;
1690 	/* set up the compute queues - allocate horizontally across pipes */
1691 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1692 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1693 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1694 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1695 								     k, j))
1696 					continue;
1697 
1698 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1699 								i, k, j);
1700 				if (r)
1701 					return r;
1702 
1703 				ring_id++;
1704 			}
1705 		}
1706 	}
1707 
1708 	adev->gfx.gfx_supported_reset =
1709 		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
1710 	adev->gfx.compute_supported_reset =
1711 		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
1712 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1713 	case IP_VERSION(11, 0, 0):
1714 	case IP_VERSION(11, 0, 2):
1715 	case IP_VERSION(11, 0, 3):
1716 		if ((adev->gfx.me_fw_version >= 2280) &&
1717 			    (adev->gfx.mec_fw_version >= 2410)) {
1718 				adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1719 				adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1720 		}
1721 		break;
1722 	default:
1723 		break;
1724 	}
1725 
1726 	if (!adev->enable_mes_kiq) {
1727 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1728 		if (r) {
1729 			DRM_ERROR("Failed to init KIQ BOs!\n");
1730 			return r;
1731 		}
1732 
1733 		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1734 		if (r)
1735 			return r;
1736 	}
1737 
1738 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1739 	if (r)
1740 		return r;
1741 
1742 	/* allocate visible FB for rlc auto-loading fw */
1743 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1744 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1745 		if (r)
1746 			return r;
1747 	}
1748 
1749 	r = gfx_v11_0_gpu_early_init(adev);
1750 	if (r)
1751 		return r;
1752 
1753 	if (amdgpu_gfx_ras_sw_init(adev)) {
1754 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1755 		return -EINVAL;
1756 	}
1757 
1758 	gfx_v11_0_alloc_ip_dump(adev);
1759 
1760 	r = amdgpu_gfx_sysfs_init(adev);
1761 	if (r)
1762 		return r;
1763 
1764 	return 0;
1765 }
1766 
1767 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1768 {
1769 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1770 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1771 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1772 
1773 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1774 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1775 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1776 }
1777 
1778 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1779 {
1780 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1781 			      &adev->gfx.me.me_fw_gpu_addr,
1782 			      (void **)&adev->gfx.me.me_fw_ptr);
1783 
1784 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1785 			       &adev->gfx.me.me_fw_data_gpu_addr,
1786 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1787 }
1788 
1789 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1790 {
1791 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1792 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1793 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1794 }
1795 
1796 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
1797 {
1798 	int i;
1799 	struct amdgpu_device *adev = ip_block->adev;
1800 
1801 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1802 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1803 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1804 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1805 
1806 	amdgpu_gfx_mqd_sw_fini(adev, 0);
1807 
1808 	if (!adev->enable_mes_kiq) {
1809 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1810 		amdgpu_gfx_kiq_fini(adev, 0);
1811 	}
1812 
1813 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
1814 
1815 	gfx_v11_0_pfp_fini(adev);
1816 	gfx_v11_0_me_fini(adev);
1817 	gfx_v11_0_rlc_fini(adev);
1818 	gfx_v11_0_mec_fini(adev);
1819 
1820 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1821 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1822 
1823 	gfx_v11_0_free_microcode(adev);
1824 
1825 	amdgpu_gfx_sysfs_fini(adev);
1826 
1827 	kfree(adev->gfx.ip_dump_core);
1828 	kfree(adev->gfx.ip_dump_compute_queues);
1829 	kfree(adev->gfx.ip_dump_gfx_queues);
1830 
1831 	return 0;
1832 }
1833 
1834 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1835 				   u32 sh_num, u32 instance, int xcc_id)
1836 {
1837 	u32 data;
1838 
1839 	if (instance == 0xffffffff)
1840 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1841 				     INSTANCE_BROADCAST_WRITES, 1);
1842 	else
1843 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1844 				     instance);
1845 
1846 	if (se_num == 0xffffffff)
1847 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1848 				     1);
1849 	else
1850 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1851 
1852 	if (sh_num == 0xffffffff)
1853 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1854 				     1);
1855 	else
1856 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1857 
1858 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1859 }
1860 
1861 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1862 {
1863 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1864 
1865 	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
1866 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1867 					   CC_GC_SA_UNIT_DISABLE,
1868 					   SA_DISABLE);
1869 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
1870 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1871 						 GC_USER_SA_UNIT_DISABLE,
1872 						 SA_DISABLE);
1873 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1874 					    adev->gfx.config.max_shader_engines);
1875 
1876 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1877 }
1878 
1879 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1880 {
1881 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1882 	u32 rb_mask;
1883 
1884 	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1885 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1886 					    CC_RB_BACKEND_DISABLE,
1887 					    BACKEND_DISABLE);
1888 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1889 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1890 						 GC_USER_RB_BACKEND_DISABLE,
1891 						 BACKEND_DISABLE);
1892 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1893 					    adev->gfx.config.max_shader_engines);
1894 
1895 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1896 }
1897 
1898 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1899 {
1900 	u32 rb_bitmap_per_sa;
1901 	u32 rb_bitmap_width_per_sa;
1902 	u32 max_sa;
1903 	u32 active_sa_bitmap;
1904 	u32 global_active_rb_bitmap;
1905 	u32 active_rb_bitmap = 0;
1906 	u32 i;
1907 
1908 	/* query sa bitmap from SA_UNIT_DISABLE registers */
1909 	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
1910 	/* query rb bitmap from RB_BACKEND_DISABLE registers */
1911 	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
1912 
1913 	/* generate active rb bitmap according to active sa bitmap */
1914 	max_sa = adev->gfx.config.max_shader_engines *
1915 		 adev->gfx.config.max_sh_per_se;
1916 	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1917 				 adev->gfx.config.max_sh_per_se;
1918 	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
1919 
1920 	for (i = 0; i < max_sa; i++) {
1921 		if (active_sa_bitmap & (1 << i))
1922 			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
1923 	}
1924 
1925 	active_rb_bitmap &= global_active_rb_bitmap;
1926 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1927 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1928 }
1929 
1930 #define DEFAULT_SH_MEM_BASES	(0x6000)
1931 #define LDS_APP_BASE           0x1
1932 #define SCRATCH_APP_BASE       0x2
1933 
1934 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1935 {
1936 	int i;
1937 	uint32_t sh_mem_bases;
1938 	uint32_t data;
1939 
1940 	/*
1941 	 * Configure apertures:
1942 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1943 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1944 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1945 	 */
1946 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1947 			SCRATCH_APP_BASE;
1948 
1949 	mutex_lock(&adev->srbm_mutex);
1950 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1951 		soc21_grbm_select(adev, 0, 0, 0, i);
1952 		/* CP and shaders */
1953 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1954 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1955 
1956 		/* Enable trap for each kfd vmid. */
1957 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1958 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1959 		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1960 	}
1961 	soc21_grbm_select(adev, 0, 0, 0, 0);
1962 	mutex_unlock(&adev->srbm_mutex);
1963 
1964 	/*
1965 	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
1966 	 * access. These should be enabled by FW for target VMIDs.
1967 	 */
1968 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1969 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1970 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1971 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1972 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1973 	}
1974 }
1975 
1976 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1977 {
1978 	int vmid;
1979 
1980 	/*
1981 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1982 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1983 	 * the driver can enable them for graphics. VMID0 should maintain
1984 	 * access so that HWS firmware can save/restore entries.
1985 	 */
1986 	for (vmid = 1; vmid < 16; vmid++) {
1987 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1988 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1989 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1990 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1991 	}
1992 }
1993 
1994 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1995 {
1996 	/* TODO: harvest feature to be added later. */
1997 }
1998 
1999 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
2000 {
2001 	/* TCCs are global (not instanced). */
2002 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
2003 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
2004 
2005 	adev->gfx.config.tcc_disabled_mask =
2006 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
2007 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
2008 }
2009 
2010 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
2011 {
2012 	u32 tmp;
2013 	int i;
2014 
2015 	if (!amdgpu_sriov_vf(adev))
2016 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2017 
2018 	gfx_v11_0_setup_rb(adev);
2019 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
2020 	gfx_v11_0_get_tcc_info(adev);
2021 	adev->gfx.config.pa_sc_tile_steering_override = 0;
2022 
2023 	/* Set whether texture coordinate truncation is conformant. */
2024 	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
2025 	adev->gfx.config.ta_cntl2_truncate_coord_mode =
2026 		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
2027 
2028 	/* XXX SH_MEM regs */
2029 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2030 	mutex_lock(&adev->srbm_mutex);
2031 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2032 		soc21_grbm_select(adev, 0, 0, 0, i);
2033 		/* CP and shaders */
2034 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2035 		if (i != 0) {
2036 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2037 				(adev->gmc.private_aperture_start >> 48));
2038 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2039 				(adev->gmc.shared_aperture_start >> 48));
2040 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
2041 		}
2042 	}
2043 	soc21_grbm_select(adev, 0, 0, 0, 0);
2044 
2045 	mutex_unlock(&adev->srbm_mutex);
2046 
2047 	gfx_v11_0_init_compute_vmid(adev);
2048 	gfx_v11_0_init_gds_vmid(adev);
2049 }
2050 
2051 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
2052 				      int me, int pipe)
2053 {
2054 	if (me != 0)
2055 		return 0;
2056 
2057 	switch (pipe) {
2058 	case 0:
2059 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
2060 	case 1:
2061 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
2062 	default:
2063 		return 0;
2064 	}
2065 }
2066 
2067 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
2068 				      int me, int pipe)
2069 {
2070 	/*
2071 	 * amdgpu controls only the first MEC. That's why this function only
2072 	 * handles the setting of interrupts for this specific MEC. All other
2073 	 * pipes' interrupts are set by amdkfd.
2074 	 */
2075 	if (me != 1)
2076 		return 0;
2077 
2078 	switch (pipe) {
2079 	case 0:
2080 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
2081 	case 1:
2082 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
2083 	case 2:
2084 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
2085 	case 3:
2086 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
2087 	default:
2088 		return 0;
2089 	}
2090 }
2091 
2092 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2093 					       bool enable)
2094 {
2095 	u32 tmp, cp_int_cntl_reg;
2096 	int i, j;
2097 
2098 	if (amdgpu_sriov_vf(adev))
2099 		return;
2100 
2101 	for (i = 0; i < adev->gfx.me.num_me; i++) {
2102 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
2103 			cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
2104 
2105 			if (cp_int_cntl_reg) {
2106 				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
2107 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
2108 						    enable ? 1 : 0);
2109 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
2110 						    enable ? 1 : 0);
2111 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
2112 						    enable ? 1 : 0);
2113 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
2114 						    enable ? 1 : 0);
2115 				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
2116 			}
2117 		}
2118 	}
2119 }
2120 
2121 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
2122 {
2123 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2124 
2125 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
2126 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2127 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
2128 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2129 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
2130 
2131 	return 0;
2132 }
2133 
2134 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
2135 {
2136 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
2137 
2138 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2139 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
2140 }
2141 
2142 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
2143 {
2144 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2145 	udelay(50);
2146 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2147 	udelay(50);
2148 }
2149 
2150 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
2151 					     bool enable)
2152 {
2153 	uint32_t rlc_pg_cntl;
2154 
2155 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
2156 
2157 	if (!enable) {
2158 		/* RLC_PG_CNTL[23] = 0 (default)
2159 		 * RLC will wait for handshake acks with SMU
2160 		 * GFXOFF will be enabled
2161 		 * RLC_PG_CNTL[23] = 1
2162 		 * RLC will not issue any message to SMU
2163 		 * hence no handshake between SMU & RLC
2164 		 * GFXOFF will be disabled
2165 		 */
2166 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2167 	} else
2168 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2169 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
2170 }
2171 
2172 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
2173 {
2174 	/* TODO: enable rlc & smu handshake until smu
2175 	 * and gfxoff feature works as expected */
2176 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
2177 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
2178 
2179 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2180 	udelay(50);
2181 }
2182 
2183 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
2184 {
2185 	uint32_t tmp;
2186 
2187 	/* enable Save Restore Machine */
2188 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
2189 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2190 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
2191 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
2192 }
2193 
2194 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
2195 {
2196 	const struct rlc_firmware_header_v2_0 *hdr;
2197 	const __le32 *fw_data;
2198 	unsigned i, fw_size;
2199 
2200 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2201 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2202 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2203 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2204 
2205 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
2206 		     RLCG_UCODE_LOADING_START_ADDRESS);
2207 
2208 	for (i = 0; i < fw_size; i++)
2209 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
2210 			     le32_to_cpup(fw_data++));
2211 
2212 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2213 }
2214 
2215 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
2216 {
2217 	const struct rlc_firmware_header_v2_2 *hdr;
2218 	const __le32 *fw_data;
2219 	unsigned i, fw_size;
2220 	u32 tmp;
2221 
2222 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
2223 
2224 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2225 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
2226 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
2227 
2228 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
2229 
2230 	for (i = 0; i < fw_size; i++) {
2231 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2232 			msleep(1);
2233 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
2234 				le32_to_cpup(fw_data++));
2235 	}
2236 
2237 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2238 
2239 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2240 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
2241 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
2242 
2243 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
2244 	for (i = 0; i < fw_size; i++) {
2245 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2246 			msleep(1);
2247 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
2248 				le32_to_cpup(fw_data++));
2249 	}
2250 
2251 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2252 
2253 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
2254 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
2255 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
2256 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
2257 }
2258 
2259 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
2260 {
2261 	const struct rlc_firmware_header_v2_3 *hdr;
2262 	const __le32 *fw_data;
2263 	unsigned i, fw_size;
2264 	u32 tmp;
2265 
2266 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
2267 
2268 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2269 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
2270 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
2271 
2272 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
2273 
2274 	for (i = 0; i < fw_size; i++) {
2275 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2276 			msleep(1);
2277 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
2278 				le32_to_cpup(fw_data++));
2279 	}
2280 
2281 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2282 
2283 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2284 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2285 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2286 
2287 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2288 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2289 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2290 
2291 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2292 
2293 	for (i = 0; i < fw_size; i++) {
2294 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2295 			msleep(1);
2296 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2297 				le32_to_cpup(fw_data++));
2298 	}
2299 
2300 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2301 
2302 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2303 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2304 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2305 }
2306 
2307 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2308 {
2309 	const struct rlc_firmware_header_v2_0 *hdr;
2310 	uint16_t version_major;
2311 	uint16_t version_minor;
2312 
2313 	if (!adev->gfx.rlc_fw)
2314 		return -EINVAL;
2315 
2316 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2317 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2318 
2319 	version_major = le16_to_cpu(hdr->header.header_version_major);
2320 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2321 
2322 	if (version_major == 2) {
2323 		gfx_v11_0_load_rlcg_microcode(adev);
2324 		if (amdgpu_dpm == 1) {
2325 			if (version_minor >= 2)
2326 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2327 			if (version_minor == 3)
2328 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2329 		}
2330 
2331 		return 0;
2332 	}
2333 
2334 	return -EINVAL;
2335 }
2336 
2337 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2338 {
2339 	int r;
2340 
2341 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2342 		gfx_v11_0_init_csb(adev);
2343 
2344 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2345 			gfx_v11_0_rlc_enable_srm(adev);
2346 	} else {
2347 		if (amdgpu_sriov_vf(adev)) {
2348 			gfx_v11_0_init_csb(adev);
2349 			return 0;
2350 		}
2351 
2352 		adev->gfx.rlc.funcs->stop(adev);
2353 
2354 		/* disable CG */
2355 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2356 
2357 		/* disable PG */
2358 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2359 
2360 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2361 			/* legacy rlc firmware loading */
2362 			r = gfx_v11_0_rlc_load_microcode(adev);
2363 			if (r)
2364 				return r;
2365 		}
2366 
2367 		gfx_v11_0_init_csb(adev);
2368 
2369 		adev->gfx.rlc.funcs->start(adev);
2370 	}
2371 	return 0;
2372 }
2373 
2374 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2375 {
2376 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2377 	uint32_t tmp;
2378 	int i;
2379 
2380 	/* Trigger an invalidation of the L1 instruction caches */
2381 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2382 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2383 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2384 
2385 	/* Wait for invalidation complete */
2386 	for (i = 0; i < usec_timeout; i++) {
2387 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2388 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2389 					INVALIDATE_CACHE_COMPLETE))
2390 			break;
2391 		udelay(1);
2392 	}
2393 
2394 	if (i >= usec_timeout) {
2395 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2396 		return -EINVAL;
2397 	}
2398 
2399 	if (amdgpu_emu_mode == 1)
2400 		adev->hdp.funcs->flush_hdp(adev, NULL);
2401 
2402 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2403 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2404 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2405 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2406 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2407 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2408 
2409 	/* Program me ucode address into intruction cache address register */
2410 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2411 			lower_32_bits(addr) & 0xFFFFF000);
2412 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2413 			upper_32_bits(addr));
2414 
2415 	return 0;
2416 }
2417 
2418 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2419 {
2420 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2421 	uint32_t tmp;
2422 	int i;
2423 
2424 	/* Trigger an invalidation of the L1 instruction caches */
2425 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2426 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2427 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2428 
2429 	/* Wait for invalidation complete */
2430 	for (i = 0; i < usec_timeout; i++) {
2431 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2432 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2433 					INVALIDATE_CACHE_COMPLETE))
2434 			break;
2435 		udelay(1);
2436 	}
2437 
2438 	if (i >= usec_timeout) {
2439 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2440 		return -EINVAL;
2441 	}
2442 
2443 	if (amdgpu_emu_mode == 1)
2444 		adev->hdp.funcs->flush_hdp(adev, NULL);
2445 
2446 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2447 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2448 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2449 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2450 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2451 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2452 
2453 	/* Program pfp ucode address into intruction cache address register */
2454 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2455 			lower_32_bits(addr) & 0xFFFFF000);
2456 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2457 			upper_32_bits(addr));
2458 
2459 	return 0;
2460 }
2461 
2462 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2463 {
2464 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2465 	uint32_t tmp;
2466 	int i;
2467 
2468 	/* Trigger an invalidation of the L1 instruction caches */
2469 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2470 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2471 
2472 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2473 
2474 	/* Wait for invalidation complete */
2475 	for (i = 0; i < usec_timeout; i++) {
2476 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2477 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2478 					INVALIDATE_CACHE_COMPLETE))
2479 			break;
2480 		udelay(1);
2481 	}
2482 
2483 	if (i >= usec_timeout) {
2484 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2485 		return -EINVAL;
2486 	}
2487 
2488 	if (amdgpu_emu_mode == 1)
2489 		adev->hdp.funcs->flush_hdp(adev, NULL);
2490 
2491 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2492 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2493 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2494 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2495 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2496 
2497 	/* Program mec1 ucode address into intruction cache address register */
2498 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2499 			lower_32_bits(addr) & 0xFFFFF000);
2500 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2501 			upper_32_bits(addr));
2502 
2503 	return 0;
2504 }
2505 
2506 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2507 {
2508 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2509 	uint32_t tmp;
2510 	unsigned i, pipe_id;
2511 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2512 
2513 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2514 		adev->gfx.pfp_fw->data;
2515 
2516 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2517 		lower_32_bits(addr));
2518 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2519 		upper_32_bits(addr));
2520 
2521 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2522 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2523 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2524 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2525 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2526 
2527 	/*
2528 	 * Programming any of the CP_PFP_IC_BASE registers
2529 	 * forces invalidation of the ME L1 I$. Wait for the
2530 	 * invalidation complete
2531 	 */
2532 	for (i = 0; i < usec_timeout; i++) {
2533 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2534 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2535 			INVALIDATE_CACHE_COMPLETE))
2536 			break;
2537 		udelay(1);
2538 	}
2539 
2540 	if (i >= usec_timeout) {
2541 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2542 		return -EINVAL;
2543 	}
2544 
2545 	/* Prime the L1 instruction caches */
2546 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2547 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2548 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2549 	/* Waiting for cache primed*/
2550 	for (i = 0; i < usec_timeout; i++) {
2551 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2552 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2553 			ICACHE_PRIMED))
2554 			break;
2555 		udelay(1);
2556 	}
2557 
2558 	if (i >= usec_timeout) {
2559 		dev_err(adev->dev, "failed to prime instruction cache\n");
2560 		return -EINVAL;
2561 	}
2562 
2563 	mutex_lock(&adev->srbm_mutex);
2564 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2565 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2566 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2567 			(pfp_hdr->ucode_start_addr_hi << 30) |
2568 			(pfp_hdr->ucode_start_addr_lo >> 2));
2569 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2570 			pfp_hdr->ucode_start_addr_hi >> 2);
2571 
2572 		/*
2573 		 * Program CP_ME_CNTL to reset given PIPE to take
2574 		 * effect of CP_PFP_PRGRM_CNTR_START.
2575 		 */
2576 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2577 		if (pipe_id == 0)
2578 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2579 					PFP_PIPE0_RESET, 1);
2580 		else
2581 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2582 					PFP_PIPE1_RESET, 1);
2583 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2584 
2585 		/* Clear pfp pipe0 reset bit. */
2586 		if (pipe_id == 0)
2587 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2588 					PFP_PIPE0_RESET, 0);
2589 		else
2590 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2591 					PFP_PIPE1_RESET, 0);
2592 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2593 
2594 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2595 			lower_32_bits(addr2));
2596 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2597 			upper_32_bits(addr2));
2598 	}
2599 	soc21_grbm_select(adev, 0, 0, 0, 0);
2600 	mutex_unlock(&adev->srbm_mutex);
2601 
2602 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2603 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2604 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2605 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2606 
2607 	/* Invalidate the data caches */
2608 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2609 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2610 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2611 
2612 	for (i = 0; i < usec_timeout; i++) {
2613 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2614 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2615 			INVALIDATE_DCACHE_COMPLETE))
2616 			break;
2617 		udelay(1);
2618 	}
2619 
2620 	if (i >= usec_timeout) {
2621 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2622 		return -EINVAL;
2623 	}
2624 
2625 	return 0;
2626 }
2627 
2628 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2629 {
2630 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2631 	uint32_t tmp;
2632 	unsigned i, pipe_id;
2633 	const struct gfx_firmware_header_v2_0 *me_hdr;
2634 
2635 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2636 		adev->gfx.me_fw->data;
2637 
2638 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2639 		lower_32_bits(addr));
2640 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2641 		upper_32_bits(addr));
2642 
2643 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2644 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2645 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2646 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2647 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2648 
2649 	/*
2650 	 * Programming any of the CP_ME_IC_BASE registers
2651 	 * forces invalidation of the ME L1 I$. Wait for the
2652 	 * invalidation complete
2653 	 */
2654 	for (i = 0; i < usec_timeout; i++) {
2655 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2656 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2657 			INVALIDATE_CACHE_COMPLETE))
2658 			break;
2659 		udelay(1);
2660 	}
2661 
2662 	if (i >= usec_timeout) {
2663 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2664 		return -EINVAL;
2665 	}
2666 
2667 	/* Prime the instruction caches */
2668 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2669 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2670 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2671 
2672 	/* Waiting for instruction cache primed*/
2673 	for (i = 0; i < usec_timeout; i++) {
2674 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2675 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2676 			ICACHE_PRIMED))
2677 			break;
2678 		udelay(1);
2679 	}
2680 
2681 	if (i >= usec_timeout) {
2682 		dev_err(adev->dev, "failed to prime instruction cache\n");
2683 		return -EINVAL;
2684 	}
2685 
2686 	mutex_lock(&adev->srbm_mutex);
2687 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2688 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2689 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2690 			(me_hdr->ucode_start_addr_hi << 30) |
2691 			(me_hdr->ucode_start_addr_lo >> 2) );
2692 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2693 			me_hdr->ucode_start_addr_hi>>2);
2694 
2695 		/*
2696 		 * Program CP_ME_CNTL to reset given PIPE to take
2697 		 * effect of CP_PFP_PRGRM_CNTR_START.
2698 		 */
2699 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2700 		if (pipe_id == 0)
2701 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2702 					ME_PIPE0_RESET, 1);
2703 		else
2704 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2705 					ME_PIPE1_RESET, 1);
2706 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2707 
2708 		/* Clear pfp pipe0 reset bit. */
2709 		if (pipe_id == 0)
2710 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2711 					ME_PIPE0_RESET, 0);
2712 		else
2713 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2714 					ME_PIPE1_RESET, 0);
2715 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2716 
2717 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2718 			lower_32_bits(addr2));
2719 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2720 			upper_32_bits(addr2));
2721 	}
2722 	soc21_grbm_select(adev, 0, 0, 0, 0);
2723 	mutex_unlock(&adev->srbm_mutex);
2724 
2725 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2726 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2727 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2728 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2729 
2730 	/* Invalidate the data caches */
2731 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2732 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2733 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2734 
2735 	for (i = 0; i < usec_timeout; i++) {
2736 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2737 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2738 			INVALIDATE_DCACHE_COMPLETE))
2739 			break;
2740 		udelay(1);
2741 	}
2742 
2743 	if (i >= usec_timeout) {
2744 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2745 		return -EINVAL;
2746 	}
2747 
2748 	return 0;
2749 }
2750 
2751 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2752 {
2753 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2754 	uint32_t tmp;
2755 	unsigned i;
2756 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2757 
2758 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2759 		adev->gfx.mec_fw->data;
2760 
2761 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2762 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2763 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2764 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2765 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2766 
2767 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2768 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2769 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2770 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2771 
2772 	mutex_lock(&adev->srbm_mutex);
2773 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2774 		soc21_grbm_select(adev, 1, i, 0, 0);
2775 
2776 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2777 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2778 		     upper_32_bits(addr2));
2779 
2780 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2781 					mec_hdr->ucode_start_addr_lo >> 2 |
2782 					mec_hdr->ucode_start_addr_hi << 30);
2783 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2784 					mec_hdr->ucode_start_addr_hi >> 2);
2785 
2786 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2787 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2788 		     upper_32_bits(addr));
2789 	}
2790 	mutex_unlock(&adev->srbm_mutex);
2791 	soc21_grbm_select(adev, 0, 0, 0, 0);
2792 
2793 	/* Trigger an invalidation of the L1 instruction caches */
2794 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2795 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2796 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2797 
2798 	/* Wait for invalidation complete */
2799 	for (i = 0; i < usec_timeout; i++) {
2800 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2801 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2802 				       INVALIDATE_DCACHE_COMPLETE))
2803 			break;
2804 		udelay(1);
2805 	}
2806 
2807 	if (i >= usec_timeout) {
2808 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2809 		return -EINVAL;
2810 	}
2811 
2812 	/* Trigger an invalidation of the L1 instruction caches */
2813 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2814 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2815 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2816 
2817 	/* Wait for invalidation complete */
2818 	for (i = 0; i < usec_timeout; i++) {
2819 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2820 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2821 				       INVALIDATE_CACHE_COMPLETE))
2822 			break;
2823 		udelay(1);
2824 	}
2825 
2826 	if (i >= usec_timeout) {
2827 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2828 		return -EINVAL;
2829 	}
2830 
2831 	return 0;
2832 }
2833 
2834 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2835 {
2836 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2837 	const struct gfx_firmware_header_v2_0 *me_hdr;
2838 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2839 	uint32_t pipe_id, tmp;
2840 
2841 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2842 		adev->gfx.mec_fw->data;
2843 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2844 		adev->gfx.me_fw->data;
2845 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2846 		adev->gfx.pfp_fw->data;
2847 
2848 	/* config pfp program start addr */
2849 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2850 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2851 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2852 			(pfp_hdr->ucode_start_addr_hi << 30) |
2853 			(pfp_hdr->ucode_start_addr_lo >> 2));
2854 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2855 			pfp_hdr->ucode_start_addr_hi >> 2);
2856 	}
2857 	soc21_grbm_select(adev, 0, 0, 0, 0);
2858 
2859 	/* reset pfp pipe */
2860 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2861 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2862 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2863 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2864 
2865 	/* clear pfp pipe reset */
2866 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2867 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2868 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2869 
2870 	/* config me program start addr */
2871 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2872 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2873 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2874 			(me_hdr->ucode_start_addr_hi << 30) |
2875 			(me_hdr->ucode_start_addr_lo >> 2) );
2876 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2877 			me_hdr->ucode_start_addr_hi>>2);
2878 	}
2879 	soc21_grbm_select(adev, 0, 0, 0, 0);
2880 
2881 	/* reset me pipe */
2882 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2883 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2884 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2885 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2886 
2887 	/* clear me pipe reset */
2888 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2889 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2890 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2891 
2892 	/* config mec program start addr */
2893 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2894 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2895 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2896 					mec_hdr->ucode_start_addr_lo >> 2 |
2897 					mec_hdr->ucode_start_addr_hi << 30);
2898 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2899 					mec_hdr->ucode_start_addr_hi >> 2);
2900 	}
2901 	soc21_grbm_select(adev, 0, 0, 0, 0);
2902 
2903 	/* reset mec pipe */
2904 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2905 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2906 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2907 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2908 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2909 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2910 
2911 	/* clear mec pipe reset */
2912 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2913 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2914 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2915 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2916 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2917 }
2918 
2919 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2920 {
2921 	uint32_t cp_status;
2922 	uint32_t bootload_status;
2923 	int i, r;
2924 	uint64_t addr, addr2;
2925 
2926 	for (i = 0; i < adev->usec_timeout; i++) {
2927 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2928 
2929 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2930 			    IP_VERSION(11, 0, 1) ||
2931 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
2932 			    IP_VERSION(11, 0, 4) ||
2933 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
2934 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
2935 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) ||
2936 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3))
2937 			bootload_status = RREG32_SOC15(GC, 0,
2938 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2939 		else
2940 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2941 
2942 		if ((cp_status == 0) &&
2943 		    (REG_GET_FIELD(bootload_status,
2944 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2945 			break;
2946 		}
2947 		udelay(1);
2948 	}
2949 
2950 	if (i >= adev->usec_timeout) {
2951 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2952 		return -ETIMEDOUT;
2953 	}
2954 
2955 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2956 		if (adev->gfx.rs64_enable) {
2957 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2958 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2959 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2960 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2961 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2962 			if (r)
2963 				return r;
2964 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2965 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2966 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2967 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2968 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2969 			if (r)
2970 				return r;
2971 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2972 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2973 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2974 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2975 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2976 			if (r)
2977 				return r;
2978 		} else {
2979 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2980 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2981 			r = gfx_v11_0_config_me_cache(adev, addr);
2982 			if (r)
2983 				return r;
2984 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2985 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2986 			r = gfx_v11_0_config_pfp_cache(adev, addr);
2987 			if (r)
2988 				return r;
2989 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2990 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2991 			r = gfx_v11_0_config_mec_cache(adev, addr);
2992 			if (r)
2993 				return r;
2994 		}
2995 	}
2996 
2997 	return 0;
2998 }
2999 
3000 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3001 {
3002 	int i;
3003 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3004 
3005 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3006 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3007 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3008 
3009 	for (i = 0; i < adev->usec_timeout; i++) {
3010 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
3011 			break;
3012 		udelay(1);
3013 	}
3014 
3015 	if (i >= adev->usec_timeout)
3016 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
3017 
3018 	return 0;
3019 }
3020 
3021 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
3022 {
3023 	int r;
3024 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3025 	const __le32 *fw_data;
3026 	unsigned i, fw_size;
3027 
3028 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3029 		adev->gfx.pfp_fw->data;
3030 
3031 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3032 
3033 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3034 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3035 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
3036 
3037 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
3038 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3039 				      &adev->gfx.pfp.pfp_fw_obj,
3040 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3041 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3042 	if (r) {
3043 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
3044 		gfx_v11_0_pfp_fini(adev);
3045 		return r;
3046 	}
3047 
3048 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
3049 
3050 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3051 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3052 
3053 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
3054 
3055 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
3056 
3057 	for (i = 0; i < pfp_hdr->jt_size; i++)
3058 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
3059 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
3060 
3061 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3062 
3063 	return 0;
3064 }
3065 
3066 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
3067 {
3068 	int r;
3069 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
3070 	const __le32 *fw_ucode, *fw_data;
3071 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3072 	uint32_t tmp;
3073 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3074 
3075 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
3076 		adev->gfx.pfp_fw->data;
3077 
3078 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3079 
3080 	/* instruction */
3081 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
3082 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
3083 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
3084 	/* data */
3085 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3086 		le32_to_cpu(pfp_hdr->data_offset_bytes));
3087 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
3088 
3089 	/* 64kb align */
3090 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3091 				      64 * 1024,
3092 				      AMDGPU_GEM_DOMAIN_VRAM |
3093 				      AMDGPU_GEM_DOMAIN_GTT,
3094 				      &adev->gfx.pfp.pfp_fw_obj,
3095 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3096 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3097 	if (r) {
3098 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
3099 		gfx_v11_0_pfp_fini(adev);
3100 		return r;
3101 	}
3102 
3103 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3104 				      64 * 1024,
3105 				      AMDGPU_GEM_DOMAIN_VRAM |
3106 				      AMDGPU_GEM_DOMAIN_GTT,
3107 				      &adev->gfx.pfp.pfp_fw_data_obj,
3108 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
3109 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
3110 	if (r) {
3111 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
3112 		gfx_v11_0_pfp_fini(adev);
3113 		return r;
3114 	}
3115 
3116 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
3117 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
3118 
3119 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3120 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
3121 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3122 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
3123 
3124 	if (amdgpu_emu_mode == 1)
3125 		adev->hdp.funcs->flush_hdp(adev, NULL);
3126 
3127 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
3128 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3129 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
3130 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3131 
3132 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
3133 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
3134 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
3135 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
3136 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
3137 
3138 	/*
3139 	 * Programming any of the CP_PFP_IC_BASE registers
3140 	 * forces invalidation of the ME L1 I$. Wait for the
3141 	 * invalidation complete
3142 	 */
3143 	for (i = 0; i < usec_timeout; i++) {
3144 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3145 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3146 			INVALIDATE_CACHE_COMPLETE))
3147 			break;
3148 		udelay(1);
3149 	}
3150 
3151 	if (i >= usec_timeout) {
3152 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3153 		return -EINVAL;
3154 	}
3155 
3156 	/* Prime the L1 instruction caches */
3157 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3158 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
3159 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
3160 	/* Waiting for cache primed*/
3161 	for (i = 0; i < usec_timeout; i++) {
3162 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3163 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3164 			ICACHE_PRIMED))
3165 			break;
3166 		udelay(1);
3167 	}
3168 
3169 	if (i >= usec_timeout) {
3170 		dev_err(adev->dev, "failed to prime instruction cache\n");
3171 		return -EINVAL;
3172 	}
3173 
3174 	mutex_lock(&adev->srbm_mutex);
3175 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3176 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3177 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3178 			(pfp_hdr->ucode_start_addr_hi << 30) |
3179 			(pfp_hdr->ucode_start_addr_lo >> 2) );
3180 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3181 			pfp_hdr->ucode_start_addr_hi>>2);
3182 
3183 		/*
3184 		 * Program CP_ME_CNTL to reset given PIPE to take
3185 		 * effect of CP_PFP_PRGRM_CNTR_START.
3186 		 */
3187 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3188 		if (pipe_id == 0)
3189 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3190 					PFP_PIPE0_RESET, 1);
3191 		else
3192 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3193 					PFP_PIPE1_RESET, 1);
3194 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3195 
3196 		/* Clear pfp pipe0 reset bit. */
3197 		if (pipe_id == 0)
3198 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3199 					PFP_PIPE0_RESET, 0);
3200 		else
3201 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3202 					PFP_PIPE1_RESET, 0);
3203 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3204 
3205 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
3206 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3207 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
3208 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3209 	}
3210 	soc21_grbm_select(adev, 0, 0, 0, 0);
3211 	mutex_unlock(&adev->srbm_mutex);
3212 
3213 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3214 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3215 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3216 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3217 
3218 	/* Invalidate the data caches */
3219 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3220 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3221 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3222 
3223 	for (i = 0; i < usec_timeout; i++) {
3224 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3225 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3226 			INVALIDATE_DCACHE_COMPLETE))
3227 			break;
3228 		udelay(1);
3229 	}
3230 
3231 	if (i >= usec_timeout) {
3232 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3233 		return -EINVAL;
3234 	}
3235 
3236 	return 0;
3237 }
3238 
3239 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
3240 {
3241 	int r;
3242 	const struct gfx_firmware_header_v1_0 *me_hdr;
3243 	const __le32 *fw_data;
3244 	unsigned i, fw_size;
3245 
3246 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3247 		adev->gfx.me_fw->data;
3248 
3249 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3250 
3251 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3252 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3253 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
3254 
3255 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
3256 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3257 				      &adev->gfx.me.me_fw_obj,
3258 				      &adev->gfx.me.me_fw_gpu_addr,
3259 				      (void **)&adev->gfx.me.me_fw_ptr);
3260 	if (r) {
3261 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
3262 		gfx_v11_0_me_fini(adev);
3263 		return r;
3264 	}
3265 
3266 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
3267 
3268 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3269 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3270 
3271 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
3272 
3273 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
3274 
3275 	for (i = 0; i < me_hdr->jt_size; i++)
3276 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
3277 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
3278 
3279 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
3280 
3281 	return 0;
3282 }
3283 
3284 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
3285 {
3286 	int r;
3287 	const struct gfx_firmware_header_v2_0 *me_hdr;
3288 	const __le32 *fw_ucode, *fw_data;
3289 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3290 	uint32_t tmp;
3291 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3292 
3293 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
3294 		adev->gfx.me_fw->data;
3295 
3296 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3297 
3298 	/* instruction */
3299 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
3300 		le32_to_cpu(me_hdr->ucode_offset_bytes));
3301 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
3302 	/* data */
3303 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3304 		le32_to_cpu(me_hdr->data_offset_bytes));
3305 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
3306 
3307 	/* 64kb align*/
3308 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3309 				      64 * 1024,
3310 				      AMDGPU_GEM_DOMAIN_VRAM |
3311 				      AMDGPU_GEM_DOMAIN_GTT,
3312 				      &adev->gfx.me.me_fw_obj,
3313 				      &adev->gfx.me.me_fw_gpu_addr,
3314 				      (void **)&adev->gfx.me.me_fw_ptr);
3315 	if (r) {
3316 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3317 		gfx_v11_0_me_fini(adev);
3318 		return r;
3319 	}
3320 
3321 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3322 				      64 * 1024,
3323 				      AMDGPU_GEM_DOMAIN_VRAM |
3324 				      AMDGPU_GEM_DOMAIN_GTT,
3325 				      &adev->gfx.me.me_fw_data_obj,
3326 				      &adev->gfx.me.me_fw_data_gpu_addr,
3327 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3328 	if (r) {
3329 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3330 		gfx_v11_0_pfp_fini(adev);
3331 		return r;
3332 	}
3333 
3334 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3335 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3336 
3337 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3338 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3339 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3340 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3341 
3342 	if (amdgpu_emu_mode == 1)
3343 		adev->hdp.funcs->flush_hdp(adev, NULL);
3344 
3345 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3346 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3347 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3348 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3349 
3350 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3351 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3352 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3353 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3354 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3355 
3356 	/*
3357 	 * Programming any of the CP_ME_IC_BASE registers
3358 	 * forces invalidation of the ME L1 I$. Wait for the
3359 	 * invalidation complete
3360 	 */
3361 	for (i = 0; i < usec_timeout; i++) {
3362 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3363 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3364 			INVALIDATE_CACHE_COMPLETE))
3365 			break;
3366 		udelay(1);
3367 	}
3368 
3369 	if (i >= usec_timeout) {
3370 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3371 		return -EINVAL;
3372 	}
3373 
3374 	/* Prime the instruction caches */
3375 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3376 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3377 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3378 
3379 	/* Waiting for instruction cache primed*/
3380 	for (i = 0; i < usec_timeout; i++) {
3381 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3382 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3383 			ICACHE_PRIMED))
3384 			break;
3385 		udelay(1);
3386 	}
3387 
3388 	if (i >= usec_timeout) {
3389 		dev_err(adev->dev, "failed to prime instruction cache\n");
3390 		return -EINVAL;
3391 	}
3392 
3393 	mutex_lock(&adev->srbm_mutex);
3394 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3395 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3396 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3397 			(me_hdr->ucode_start_addr_hi << 30) |
3398 			(me_hdr->ucode_start_addr_lo >> 2) );
3399 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3400 			me_hdr->ucode_start_addr_hi>>2);
3401 
3402 		/*
3403 		 * Program CP_ME_CNTL to reset given PIPE to take
3404 		 * effect of CP_PFP_PRGRM_CNTR_START.
3405 		 */
3406 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3407 		if (pipe_id == 0)
3408 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3409 					ME_PIPE0_RESET, 1);
3410 		else
3411 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3412 					ME_PIPE1_RESET, 1);
3413 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3414 
3415 		/* Clear pfp pipe0 reset bit. */
3416 		if (pipe_id == 0)
3417 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3418 					ME_PIPE0_RESET, 0);
3419 		else
3420 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3421 					ME_PIPE1_RESET, 0);
3422 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3423 
3424 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3425 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3426 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3427 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3428 	}
3429 	soc21_grbm_select(adev, 0, 0, 0, 0);
3430 	mutex_unlock(&adev->srbm_mutex);
3431 
3432 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3433 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3434 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3435 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3436 
3437 	/* Invalidate the data caches */
3438 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3439 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3440 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3441 
3442 	for (i = 0; i < usec_timeout; i++) {
3443 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3444 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3445 			INVALIDATE_DCACHE_COMPLETE))
3446 			break;
3447 		udelay(1);
3448 	}
3449 
3450 	if (i >= usec_timeout) {
3451 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3452 		return -EINVAL;
3453 	}
3454 
3455 	return 0;
3456 }
3457 
3458 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3459 {
3460 	int r;
3461 
3462 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3463 		return -EINVAL;
3464 
3465 	gfx_v11_0_cp_gfx_enable(adev, false);
3466 
3467 	if (adev->gfx.rs64_enable)
3468 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3469 	else
3470 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3471 	if (r) {
3472 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3473 		return r;
3474 	}
3475 
3476 	if (adev->gfx.rs64_enable)
3477 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3478 	else
3479 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3480 	if (r) {
3481 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3482 		return r;
3483 	}
3484 
3485 	return 0;
3486 }
3487 
3488 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3489 {
3490 	struct amdgpu_ring *ring;
3491 	const struct cs_section_def *sect = NULL;
3492 	const struct cs_extent_def *ext = NULL;
3493 	int r, i;
3494 	int ctx_reg_offset;
3495 
3496 	/* init the CP */
3497 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3498 		     adev->gfx.config.max_hw_contexts - 1);
3499 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3500 
3501 	if (!amdgpu_async_gfx_ring)
3502 		gfx_v11_0_cp_gfx_enable(adev, true);
3503 
3504 	ring = &adev->gfx.gfx_ring[0];
3505 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3506 	if (r) {
3507 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3508 		return r;
3509 	}
3510 
3511 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3512 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3513 
3514 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3515 	amdgpu_ring_write(ring, 0x80000000);
3516 	amdgpu_ring_write(ring, 0x80000000);
3517 
3518 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3519 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3520 			if (sect->id == SECT_CONTEXT) {
3521 				amdgpu_ring_write(ring,
3522 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3523 							  ext->reg_count));
3524 				amdgpu_ring_write(ring, ext->reg_index -
3525 						  PACKET3_SET_CONTEXT_REG_START);
3526 				for (i = 0; i < ext->reg_count; i++)
3527 					amdgpu_ring_write(ring, ext->extent[i]);
3528 			}
3529 		}
3530 	}
3531 
3532 	ctx_reg_offset =
3533 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3534 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3535 	amdgpu_ring_write(ring, ctx_reg_offset);
3536 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3537 
3538 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3539 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3540 
3541 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3542 	amdgpu_ring_write(ring, 0);
3543 
3544 	amdgpu_ring_commit(ring);
3545 
3546 	/* submit cs packet to copy state 0 to next available state */
3547 	if (adev->gfx.num_gfx_rings > 1) {
3548 		/* maximum supported gfx ring is 2 */
3549 		ring = &adev->gfx.gfx_ring[1];
3550 		r = amdgpu_ring_alloc(ring, 2);
3551 		if (r) {
3552 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3553 			return r;
3554 		}
3555 
3556 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3557 		amdgpu_ring_write(ring, 0);
3558 
3559 		amdgpu_ring_commit(ring);
3560 	}
3561 	return 0;
3562 }
3563 
3564 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3565 					 CP_PIPE_ID pipe)
3566 {
3567 	u32 tmp;
3568 
3569 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3570 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3571 
3572 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3573 }
3574 
3575 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3576 					  struct amdgpu_ring *ring)
3577 {
3578 	u32 tmp;
3579 
3580 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3581 	if (ring->use_doorbell) {
3582 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3583 				    DOORBELL_OFFSET, ring->doorbell_index);
3584 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3585 				    DOORBELL_EN, 1);
3586 	} else {
3587 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3588 				    DOORBELL_EN, 0);
3589 	}
3590 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3591 
3592 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3593 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3594 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3595 
3596 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3597 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3598 }
3599 
3600 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3601 {
3602 	struct amdgpu_ring *ring;
3603 	u32 tmp;
3604 	u32 rb_bufsz;
3605 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3606 
3607 	/* Set the write pointer delay */
3608 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3609 
3610 	/* set the RB to use vmid 0 */
3611 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3612 
3613 	/* Init gfx ring 0 for pipe 0 */
3614 	mutex_lock(&adev->srbm_mutex);
3615 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3616 
3617 	/* Set ring buffer size */
3618 	ring = &adev->gfx.gfx_ring[0];
3619 	rb_bufsz = order_base_2(ring->ring_size / 8);
3620 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3621 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3622 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3623 
3624 	/* Initialize the ring buffer's write pointers */
3625 	ring->wptr = 0;
3626 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3627 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3628 
3629 	/* set the wb address whether it's enabled or not */
3630 	rptr_addr = ring->rptr_gpu_addr;
3631 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3632 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3633 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3634 
3635 	wptr_gpu_addr = ring->wptr_gpu_addr;
3636 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3637 		     lower_32_bits(wptr_gpu_addr));
3638 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3639 		     upper_32_bits(wptr_gpu_addr));
3640 
3641 	mdelay(1);
3642 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3643 
3644 	rb_addr = ring->gpu_addr >> 8;
3645 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3646 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3647 
3648 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3649 
3650 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3651 	mutex_unlock(&adev->srbm_mutex);
3652 
3653 	/* Init gfx ring 1 for pipe 1 */
3654 	if (adev->gfx.num_gfx_rings > 1) {
3655 		mutex_lock(&adev->srbm_mutex);
3656 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3657 		/* maximum supported gfx ring is 2 */
3658 		ring = &adev->gfx.gfx_ring[1];
3659 		rb_bufsz = order_base_2(ring->ring_size / 8);
3660 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3661 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3662 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3663 		/* Initialize the ring buffer's write pointers */
3664 		ring->wptr = 0;
3665 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3666 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3667 		/* Set the wb address whether it's enabled or not */
3668 		rptr_addr = ring->rptr_gpu_addr;
3669 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3670 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3671 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3672 		wptr_gpu_addr = ring->wptr_gpu_addr;
3673 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3674 			     lower_32_bits(wptr_gpu_addr));
3675 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3676 			     upper_32_bits(wptr_gpu_addr));
3677 
3678 		mdelay(1);
3679 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3680 
3681 		rb_addr = ring->gpu_addr >> 8;
3682 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3683 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3684 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3685 
3686 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3687 		mutex_unlock(&adev->srbm_mutex);
3688 	}
3689 	/* Switch to pipe 0 */
3690 	mutex_lock(&adev->srbm_mutex);
3691 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3692 	mutex_unlock(&adev->srbm_mutex);
3693 
3694 	/* start the ring */
3695 	gfx_v11_0_cp_gfx_start(adev);
3696 
3697 	return 0;
3698 }
3699 
3700 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3701 {
3702 	u32 data;
3703 
3704 	if (adev->gfx.rs64_enable) {
3705 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3706 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3707 							 enable ? 0 : 1);
3708 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3709 							 enable ? 0 : 1);
3710 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3711 							 enable ? 0 : 1);
3712 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3713 							 enable ? 0 : 1);
3714 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3715 							 enable ? 0 : 1);
3716 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3717 							 enable ? 1 : 0);
3718 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3719 				                         enable ? 1 : 0);
3720 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3721 							 enable ? 1 : 0);
3722 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3723 							 enable ? 1 : 0);
3724 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3725 							 enable ? 0 : 1);
3726 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3727 	} else {
3728 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3729 
3730 		if (enable) {
3731 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3732 			if (!adev->enable_mes_kiq)
3733 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3734 						     MEC_ME2_HALT, 0);
3735 		} else {
3736 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3737 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3738 		}
3739 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3740 	}
3741 
3742 	udelay(50);
3743 }
3744 
3745 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3746 {
3747 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3748 	const __le32 *fw_data;
3749 	unsigned i, fw_size;
3750 	u32 *fw = NULL;
3751 	int r;
3752 
3753 	if (!adev->gfx.mec_fw)
3754 		return -EINVAL;
3755 
3756 	gfx_v11_0_cp_compute_enable(adev, false);
3757 
3758 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3759 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3760 
3761 	fw_data = (const __le32 *)
3762 		(adev->gfx.mec_fw->data +
3763 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3764 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3765 
3766 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3767 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3768 					  &adev->gfx.mec.mec_fw_obj,
3769 					  &adev->gfx.mec.mec_fw_gpu_addr,
3770 					  (void **)&fw);
3771 	if (r) {
3772 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3773 		gfx_v11_0_mec_fini(adev);
3774 		return r;
3775 	}
3776 
3777 	memcpy(fw, fw_data, fw_size);
3778 
3779 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3780 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3781 
3782 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3783 
3784 	/* MEC1 */
3785 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3786 
3787 	for (i = 0; i < mec_hdr->jt_size; i++)
3788 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3789 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3790 
3791 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3792 
3793 	return 0;
3794 }
3795 
3796 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3797 {
3798 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3799 	const __le32 *fw_ucode, *fw_data;
3800 	u32 tmp, fw_ucode_size, fw_data_size;
3801 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3802 	u32 *fw_ucode_ptr, *fw_data_ptr;
3803 	int r;
3804 
3805 	if (!adev->gfx.mec_fw)
3806 		return -EINVAL;
3807 
3808 	gfx_v11_0_cp_compute_enable(adev, false);
3809 
3810 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3811 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3812 
3813 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3814 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3815 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3816 
3817 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3818 				le32_to_cpu(mec_hdr->data_offset_bytes));
3819 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3820 
3821 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3822 				      64 * 1024,
3823 				      AMDGPU_GEM_DOMAIN_VRAM |
3824 				      AMDGPU_GEM_DOMAIN_GTT,
3825 				      &adev->gfx.mec.mec_fw_obj,
3826 				      &adev->gfx.mec.mec_fw_gpu_addr,
3827 				      (void **)&fw_ucode_ptr);
3828 	if (r) {
3829 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3830 		gfx_v11_0_mec_fini(adev);
3831 		return r;
3832 	}
3833 
3834 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3835 				      64 * 1024,
3836 				      AMDGPU_GEM_DOMAIN_VRAM |
3837 				      AMDGPU_GEM_DOMAIN_GTT,
3838 				      &adev->gfx.mec.mec_fw_data_obj,
3839 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3840 				      (void **)&fw_data_ptr);
3841 	if (r) {
3842 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3843 		gfx_v11_0_mec_fini(adev);
3844 		return r;
3845 	}
3846 
3847 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3848 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3849 
3850 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3851 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3852 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3853 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3854 
3855 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3856 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3857 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3858 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3859 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3860 
3861 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3862 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3863 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3864 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3865 
3866 	mutex_lock(&adev->srbm_mutex);
3867 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3868 		soc21_grbm_select(adev, 1, i, 0, 0);
3869 
3870 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3871 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3872 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3873 
3874 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3875 					mec_hdr->ucode_start_addr_lo >> 2 |
3876 					mec_hdr->ucode_start_addr_hi << 30);
3877 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3878 					mec_hdr->ucode_start_addr_hi >> 2);
3879 
3880 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3881 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3882 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3883 	}
3884 	mutex_unlock(&adev->srbm_mutex);
3885 	soc21_grbm_select(adev, 0, 0, 0, 0);
3886 
3887 	/* Trigger an invalidation of the L1 instruction caches */
3888 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3889 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3890 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3891 
3892 	/* Wait for invalidation complete */
3893 	for (i = 0; i < usec_timeout; i++) {
3894 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3895 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3896 				       INVALIDATE_DCACHE_COMPLETE))
3897 			break;
3898 		udelay(1);
3899 	}
3900 
3901 	if (i >= usec_timeout) {
3902 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3903 		return -EINVAL;
3904 	}
3905 
3906 	/* Trigger an invalidation of the L1 instruction caches */
3907 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3908 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3909 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3910 
3911 	/* Wait for invalidation complete */
3912 	for (i = 0; i < usec_timeout; i++) {
3913 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3914 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3915 				       INVALIDATE_CACHE_COMPLETE))
3916 			break;
3917 		udelay(1);
3918 	}
3919 
3920 	if (i >= usec_timeout) {
3921 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3922 		return -EINVAL;
3923 	}
3924 
3925 	return 0;
3926 }
3927 
3928 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3929 {
3930 	uint32_t tmp;
3931 	struct amdgpu_device *adev = ring->adev;
3932 
3933 	/* tell RLC which is KIQ queue */
3934 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3935 	tmp &= 0xffffff00;
3936 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3937 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
3938 }
3939 
3940 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3941 {
3942 	/* set graphics engine doorbell range */
3943 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3944 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3945 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3946 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3947 
3948 	/* set compute engine doorbell range */
3949 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3950 		     (adev->doorbell_index.kiq * 2) << 2);
3951 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3952 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3953 }
3954 
3955 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
3956 					   struct v11_gfx_mqd *mqd,
3957 					   struct amdgpu_mqd_prop *prop)
3958 {
3959 	bool priority = 0;
3960 	u32 tmp;
3961 
3962 	/* set up default queue priority level
3963 	 * 0x0 = low priority, 0x1 = high priority
3964 	 */
3965 	if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
3966 		priority = 1;
3967 
3968 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3969 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
3970 	mqd->cp_gfx_hqd_queue_priority = tmp;
3971 }
3972 
3973 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3974 				  struct amdgpu_mqd_prop *prop)
3975 {
3976 	struct v11_gfx_mqd *mqd = m;
3977 	uint64_t hqd_gpu_addr, wb_gpu_addr;
3978 	uint32_t tmp;
3979 	uint32_t rb_bufsz;
3980 
3981 	/* set up gfx hqd wptr */
3982 	mqd->cp_gfx_hqd_wptr = 0;
3983 	mqd->cp_gfx_hqd_wptr_hi = 0;
3984 
3985 	/* set the pointer to the MQD */
3986 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3987 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3988 
3989 	/* set up mqd control */
3990 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3991 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3992 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3993 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3994 	mqd->cp_gfx_mqd_control = tmp;
3995 
3996 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3997 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3998 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3999 	mqd->cp_gfx_hqd_vmid = 0;
4000 
4001 	/* set up gfx queue priority */
4002 	gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
4003 
4004 	/* set up time quantum */
4005 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
4006 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
4007 	mqd->cp_gfx_hqd_quantum = tmp;
4008 
4009 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
4010 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4011 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
4012 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
4013 
4014 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
4015 	wb_gpu_addr = prop->rptr_gpu_addr;
4016 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
4017 	mqd->cp_gfx_hqd_rptr_addr_hi =
4018 		upper_32_bits(wb_gpu_addr) & 0xffff;
4019 
4020 	/* set up rb_wptr_poll addr */
4021 	wb_gpu_addr = prop->wptr_gpu_addr;
4022 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4023 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4024 
4025 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
4026 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
4027 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
4028 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
4029 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
4030 #ifdef __BIG_ENDIAN
4031 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
4032 #endif
4033 	mqd->cp_gfx_hqd_cntl = tmp;
4034 
4035 	/* set up cp_doorbell_control */
4036 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
4037 	if (prop->use_doorbell) {
4038 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4039 				    DOORBELL_OFFSET, prop->doorbell_index);
4040 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4041 				    DOORBELL_EN, 1);
4042 	} else
4043 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4044 				    DOORBELL_EN, 0);
4045 	mqd->cp_rb_doorbell_control = tmp;
4046 
4047 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4048 	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
4049 
4050 	/* active the queue */
4051 	mqd->cp_gfx_hqd_active = 1;
4052 
4053 	return 0;
4054 }
4055 
4056 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
4057 {
4058 	struct amdgpu_device *adev = ring->adev;
4059 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
4060 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
4061 
4062 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4063 		memset((void *)mqd, 0, sizeof(*mqd));
4064 		mutex_lock(&adev->srbm_mutex);
4065 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4066 		amdgpu_ring_init_mqd(ring);
4067 		soc21_grbm_select(adev, 0, 0, 0, 0);
4068 		mutex_unlock(&adev->srbm_mutex);
4069 		if (adev->gfx.me.mqd_backup[mqd_idx])
4070 			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4071 	} else {
4072 		/* restore mqd with the backup copy */
4073 		if (adev->gfx.me.mqd_backup[mqd_idx])
4074 			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
4075 		/* reset the ring */
4076 		ring->wptr = 0;
4077 		*ring->wptr_cpu_addr = 0;
4078 		amdgpu_ring_clear_ring(ring);
4079 	}
4080 
4081 	return 0;
4082 }
4083 
4084 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
4085 {
4086 	int r, i;
4087 	struct amdgpu_ring *ring;
4088 
4089 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4090 		ring = &adev->gfx.gfx_ring[i];
4091 
4092 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4093 		if (unlikely(r != 0))
4094 			return r;
4095 
4096 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4097 		if (!r) {
4098 			r = gfx_v11_0_kgq_init_queue(ring, false);
4099 			amdgpu_bo_kunmap(ring->mqd_obj);
4100 			ring->mqd_ptr = NULL;
4101 		}
4102 		amdgpu_bo_unreserve(ring->mqd_obj);
4103 		if (r)
4104 			return r;
4105 	}
4106 
4107 	r = amdgpu_gfx_enable_kgq(adev, 0);
4108 	if (r)
4109 		return r;
4110 
4111 	return gfx_v11_0_cp_gfx_start(adev);
4112 }
4113 
4114 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
4115 				      struct amdgpu_mqd_prop *prop)
4116 {
4117 	struct v11_compute_mqd *mqd = m;
4118 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4119 	uint32_t tmp;
4120 
4121 	mqd->header = 0xC0310800;
4122 	mqd->compute_pipelinestat_enable = 0x00000001;
4123 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4124 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4125 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4126 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4127 	mqd->compute_misc_reserved = 0x00000007;
4128 
4129 	eop_base_addr = prop->eop_gpu_addr >> 8;
4130 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4131 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4132 
4133 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4134 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
4135 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4136 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
4137 
4138 	mqd->cp_hqd_eop_control = tmp;
4139 
4140 	/* enable doorbell? */
4141 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
4142 
4143 	if (prop->use_doorbell) {
4144 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4145 				    DOORBELL_OFFSET, prop->doorbell_index);
4146 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4147 				    DOORBELL_EN, 1);
4148 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4149 				    DOORBELL_SOURCE, 0);
4150 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4151 				    DOORBELL_HIT, 0);
4152 	} else {
4153 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4154 				    DOORBELL_EN, 0);
4155 	}
4156 
4157 	mqd->cp_hqd_pq_doorbell_control = tmp;
4158 
4159 	/* disable the queue if it's active */
4160 	mqd->cp_hqd_dequeue_request = 0;
4161 	mqd->cp_hqd_pq_rptr = 0;
4162 	mqd->cp_hqd_pq_wptr_lo = 0;
4163 	mqd->cp_hqd_pq_wptr_hi = 0;
4164 
4165 	/* set the pointer to the MQD */
4166 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
4167 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4168 
4169 	/* set MQD vmid to 0 */
4170 	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
4171 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4172 	mqd->cp_mqd_control = tmp;
4173 
4174 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4175 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4176 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4177 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4178 
4179 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4180 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
4181 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4182 			    (order_base_2(prop->queue_size / 4) - 1));
4183 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4184 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4185 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
4186 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
4187 			    prop->allow_tunneling);
4188 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4189 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4190 	mqd->cp_hqd_pq_control = tmp;
4191 
4192 	/* set the wb address whether it's enabled or not */
4193 	wb_gpu_addr = prop->rptr_gpu_addr;
4194 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4195 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4196 		upper_32_bits(wb_gpu_addr) & 0xffff;
4197 
4198 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4199 	wb_gpu_addr = prop->wptr_gpu_addr;
4200 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4201 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4202 
4203 	tmp = 0;
4204 	/* enable the doorbell if requested */
4205 	if (prop->use_doorbell) {
4206 		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
4207 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4208 				DOORBELL_OFFSET, prop->doorbell_index);
4209 
4210 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4211 				    DOORBELL_EN, 1);
4212 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4213 				    DOORBELL_SOURCE, 0);
4214 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4215 				    DOORBELL_HIT, 0);
4216 	}
4217 
4218 	mqd->cp_hqd_pq_doorbell_control = tmp;
4219 
4220 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4221 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
4222 
4223 	/* set the vmid for the queue */
4224 	mqd->cp_hqd_vmid = 0;
4225 
4226 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
4227 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4228 	mqd->cp_hqd_persistent_state = tmp;
4229 
4230 	/* set MIN_IB_AVAIL_SIZE */
4231 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
4232 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4233 	mqd->cp_hqd_ib_control = tmp;
4234 
4235 	/* set static priority for a compute queue/ring */
4236 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4237 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4238 
4239 	mqd->cp_hqd_active = prop->hqd_active;
4240 
4241 	return 0;
4242 }
4243 
4244 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4245 {
4246 	struct amdgpu_device *adev = ring->adev;
4247 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4248 	int j;
4249 
4250 	/* inactivate the queue */
4251 	if (amdgpu_sriov_vf(adev))
4252 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4253 
4254 	/* disable wptr polling */
4255 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4256 
4257 	/* write the EOP addr */
4258 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4259 	       mqd->cp_hqd_eop_base_addr_lo);
4260 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4261 	       mqd->cp_hqd_eop_base_addr_hi);
4262 
4263 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4264 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4265 	       mqd->cp_hqd_eop_control);
4266 
4267 	/* enable doorbell? */
4268 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4269 	       mqd->cp_hqd_pq_doorbell_control);
4270 
4271 	/* disable the queue if it's active */
4272 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4273 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4274 		for (j = 0; j < adev->usec_timeout; j++) {
4275 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4276 				break;
4277 			udelay(1);
4278 		}
4279 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4280 		       mqd->cp_hqd_dequeue_request);
4281 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4282 		       mqd->cp_hqd_pq_rptr);
4283 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4284 		       mqd->cp_hqd_pq_wptr_lo);
4285 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4286 		       mqd->cp_hqd_pq_wptr_hi);
4287 	}
4288 
4289 	/* set the pointer to the MQD */
4290 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4291 	       mqd->cp_mqd_base_addr_lo);
4292 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4293 	       mqd->cp_mqd_base_addr_hi);
4294 
4295 	/* set MQD vmid to 0 */
4296 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4297 	       mqd->cp_mqd_control);
4298 
4299 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4300 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4301 	       mqd->cp_hqd_pq_base_lo);
4302 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4303 	       mqd->cp_hqd_pq_base_hi);
4304 
4305 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4306 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4307 	       mqd->cp_hqd_pq_control);
4308 
4309 	/* set the wb address whether it's enabled or not */
4310 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4311 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4312 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4313 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4314 
4315 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4316 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4317 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4318 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4319 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4320 
4321 	/* enable the doorbell if requested */
4322 	if (ring->use_doorbell) {
4323 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4324 			(adev->doorbell_index.kiq * 2) << 2);
4325 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4326 			(adev->doorbell_index.userqueue_end * 2) << 2);
4327 	}
4328 
4329 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4330 	       mqd->cp_hqd_pq_doorbell_control);
4331 
4332 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4333 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4334 	       mqd->cp_hqd_pq_wptr_lo);
4335 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4336 	       mqd->cp_hqd_pq_wptr_hi);
4337 
4338 	/* set the vmid for the queue */
4339 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4340 
4341 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4342 	       mqd->cp_hqd_persistent_state);
4343 
4344 	/* activate the queue */
4345 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4346 	       mqd->cp_hqd_active);
4347 
4348 	if (ring->use_doorbell)
4349 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4350 
4351 	return 0;
4352 }
4353 
4354 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4355 {
4356 	struct amdgpu_device *adev = ring->adev;
4357 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4358 
4359 	gfx_v11_0_kiq_setting(ring);
4360 
4361 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4362 		/* reset MQD to a clean status */
4363 		if (adev->gfx.kiq[0].mqd_backup)
4364 			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4365 
4366 		/* reset ring buffer */
4367 		ring->wptr = 0;
4368 		amdgpu_ring_clear_ring(ring);
4369 
4370 		mutex_lock(&adev->srbm_mutex);
4371 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4372 		gfx_v11_0_kiq_init_register(ring);
4373 		soc21_grbm_select(adev, 0, 0, 0, 0);
4374 		mutex_unlock(&adev->srbm_mutex);
4375 	} else {
4376 		memset((void *)mqd, 0, sizeof(*mqd));
4377 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4378 			amdgpu_ring_clear_ring(ring);
4379 		mutex_lock(&adev->srbm_mutex);
4380 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4381 		amdgpu_ring_init_mqd(ring);
4382 		gfx_v11_0_kiq_init_register(ring);
4383 		soc21_grbm_select(adev, 0, 0, 0, 0);
4384 		mutex_unlock(&adev->srbm_mutex);
4385 
4386 		if (adev->gfx.kiq[0].mqd_backup)
4387 			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4388 	}
4389 
4390 	return 0;
4391 }
4392 
4393 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
4394 {
4395 	struct amdgpu_device *adev = ring->adev;
4396 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4397 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4398 
4399 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4400 		memset((void *)mqd, 0, sizeof(*mqd));
4401 		mutex_lock(&adev->srbm_mutex);
4402 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4403 		amdgpu_ring_init_mqd(ring);
4404 		soc21_grbm_select(adev, 0, 0, 0, 0);
4405 		mutex_unlock(&adev->srbm_mutex);
4406 
4407 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4408 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4409 	} else {
4410 		/* restore MQD to a clean status */
4411 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4412 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4413 		/* reset ring buffer */
4414 		ring->wptr = 0;
4415 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4416 		amdgpu_ring_clear_ring(ring);
4417 	}
4418 
4419 	return 0;
4420 }
4421 
4422 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4423 {
4424 	struct amdgpu_ring *ring;
4425 	int r;
4426 
4427 	ring = &adev->gfx.kiq[0].ring;
4428 
4429 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4430 	if (unlikely(r != 0))
4431 		return r;
4432 
4433 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4434 	if (unlikely(r != 0)) {
4435 		amdgpu_bo_unreserve(ring->mqd_obj);
4436 		return r;
4437 	}
4438 
4439 	gfx_v11_0_kiq_init_queue(ring);
4440 	amdgpu_bo_kunmap(ring->mqd_obj);
4441 	ring->mqd_ptr = NULL;
4442 	amdgpu_bo_unreserve(ring->mqd_obj);
4443 	ring->sched.ready = true;
4444 	return 0;
4445 }
4446 
4447 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4448 {
4449 	struct amdgpu_ring *ring = NULL;
4450 	int r = 0, i;
4451 
4452 	if (!amdgpu_async_gfx_ring)
4453 		gfx_v11_0_cp_compute_enable(adev, true);
4454 
4455 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4456 		ring = &adev->gfx.compute_ring[i];
4457 
4458 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4459 		if (unlikely(r != 0))
4460 			goto done;
4461 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4462 		if (!r) {
4463 			r = gfx_v11_0_kcq_init_queue(ring, false);
4464 			amdgpu_bo_kunmap(ring->mqd_obj);
4465 			ring->mqd_ptr = NULL;
4466 		}
4467 		amdgpu_bo_unreserve(ring->mqd_obj);
4468 		if (r)
4469 			goto done;
4470 	}
4471 
4472 	r = amdgpu_gfx_enable_kcq(adev, 0);
4473 done:
4474 	return r;
4475 }
4476 
4477 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4478 {
4479 	int r, i;
4480 	struct amdgpu_ring *ring;
4481 
4482 	if (!(adev->flags & AMD_IS_APU))
4483 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4484 
4485 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4486 		/* legacy firmware loading */
4487 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4488 		if (r)
4489 			return r;
4490 
4491 		if (adev->gfx.rs64_enable)
4492 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4493 		else
4494 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4495 		if (r)
4496 			return r;
4497 	}
4498 
4499 	gfx_v11_0_cp_set_doorbell_range(adev);
4500 
4501 	if (amdgpu_async_gfx_ring) {
4502 		gfx_v11_0_cp_compute_enable(adev, true);
4503 		gfx_v11_0_cp_gfx_enable(adev, true);
4504 	}
4505 
4506 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4507 		r = amdgpu_mes_kiq_hw_init(adev);
4508 	else
4509 		r = gfx_v11_0_kiq_resume(adev);
4510 	if (r)
4511 		return r;
4512 
4513 	r = gfx_v11_0_kcq_resume(adev);
4514 	if (r)
4515 		return r;
4516 
4517 	if (!amdgpu_async_gfx_ring) {
4518 		r = gfx_v11_0_cp_gfx_resume(adev);
4519 		if (r)
4520 			return r;
4521 	} else {
4522 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4523 		if (r)
4524 			return r;
4525 	}
4526 
4527 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4528 		ring = &adev->gfx.gfx_ring[i];
4529 		r = amdgpu_ring_test_helper(ring);
4530 		if (r)
4531 			return r;
4532 	}
4533 
4534 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4535 		ring = &adev->gfx.compute_ring[i];
4536 		r = amdgpu_ring_test_helper(ring);
4537 		if (r)
4538 			return r;
4539 	}
4540 
4541 	return 0;
4542 }
4543 
4544 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4545 {
4546 	gfx_v11_0_cp_gfx_enable(adev, enable);
4547 	gfx_v11_0_cp_compute_enable(adev, enable);
4548 }
4549 
4550 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4551 {
4552 	int r;
4553 	bool value;
4554 
4555 	r = adev->gfxhub.funcs->gart_enable(adev);
4556 	if (r)
4557 		return r;
4558 
4559 	adev->hdp.funcs->flush_hdp(adev, NULL);
4560 
4561 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4562 		false : true;
4563 
4564 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4565 	/* TODO investigate why this and the hdp flush above is needed,
4566 	 * are we missing a flush somewhere else? */
4567 	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4568 
4569 	return 0;
4570 }
4571 
4572 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4573 {
4574 	u32 tmp;
4575 
4576 	/* select RS64 */
4577 	if (adev->gfx.rs64_enable) {
4578 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4579 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4580 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4581 
4582 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4583 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4584 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4585 	}
4586 
4587 	if (amdgpu_emu_mode == 1)
4588 		msleep(100);
4589 }
4590 
4591 static int get_gb_addr_config(struct amdgpu_device * adev)
4592 {
4593 	u32 gb_addr_config;
4594 
4595 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4596 	if (gb_addr_config == 0)
4597 		return -EINVAL;
4598 
4599 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4600 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4601 
4602 	adev->gfx.config.gb_addr_config = gb_addr_config;
4603 
4604 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4605 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4606 				      GB_ADDR_CONFIG, NUM_PIPES);
4607 
4608 	adev->gfx.config.max_tile_pipes =
4609 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4610 
4611 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4612 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4613 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4614 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4615 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4616 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4617 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4618 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4619 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4620 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4621 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4622 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4623 
4624 	return 0;
4625 }
4626 
4627 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4628 {
4629 	uint32_t data;
4630 
4631 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4632 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4633 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4634 
4635 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4636 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4637 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4638 }
4639 
4640 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
4641 {
4642 	int r;
4643 	struct amdgpu_device *adev = ip_block->adev;
4644 
4645 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4646 				       adev->gfx.cleaner_shader_ptr);
4647 
4648 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4649 		if (adev->gfx.imu.funcs) {
4650 			/* RLC autoload sequence 1: Program rlc ram */
4651 			if (adev->gfx.imu.funcs->program_rlc_ram)
4652 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4653 			/* rlc autoload firmware */
4654 			r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4655 			if (r)
4656 				return r;
4657 		}
4658 	} else {
4659 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4660 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4661 				if (adev->gfx.imu.funcs->load_microcode)
4662 					adev->gfx.imu.funcs->load_microcode(adev);
4663 				if (adev->gfx.imu.funcs->setup_imu)
4664 					adev->gfx.imu.funcs->setup_imu(adev);
4665 				if (adev->gfx.imu.funcs->start_imu)
4666 					adev->gfx.imu.funcs->start_imu(adev);
4667 			}
4668 
4669 			/* disable gpa mode in backdoor loading */
4670 			gfx_v11_0_disable_gpa_mode(adev);
4671 		}
4672 	}
4673 
4674 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4675 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4676 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4677 		if (r) {
4678 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4679 			return r;
4680 		}
4681 	}
4682 
4683 	adev->gfx.is_poweron = true;
4684 
4685 	if(get_gb_addr_config(adev))
4686 		DRM_WARN("Invalid gb_addr_config !\n");
4687 
4688 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4689 	    adev->gfx.rs64_enable)
4690 		gfx_v11_0_config_gfx_rs64(adev);
4691 
4692 	r = gfx_v11_0_gfxhub_enable(adev);
4693 	if (r)
4694 		return r;
4695 
4696 	if (!amdgpu_emu_mode)
4697 		gfx_v11_0_init_golden_registers(adev);
4698 
4699 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4700 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4701 		/**
4702 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4703 		 * loaded firstly, so in direct type, it has to load smc ucode
4704 		 * here before rlc.
4705 		 */
4706 		r = amdgpu_pm_load_smu_firmware(adev, NULL);
4707 		if (r)
4708 			return r;
4709 	}
4710 
4711 	gfx_v11_0_constants_init(adev);
4712 
4713 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4714 		gfx_v11_0_select_cp_fw_arch(adev);
4715 
4716 	if (adev->nbio.funcs->gc_doorbell_init)
4717 		adev->nbio.funcs->gc_doorbell_init(adev);
4718 
4719 	r = gfx_v11_0_rlc_resume(adev);
4720 	if (r)
4721 		return r;
4722 
4723 	/*
4724 	 * init golden registers and rlc resume may override some registers,
4725 	 * reconfig them here
4726 	 */
4727 	gfx_v11_0_tcp_harvest(adev);
4728 
4729 	r = gfx_v11_0_cp_resume(adev);
4730 	if (r)
4731 		return r;
4732 
4733 	/* get IMU version from HW if it's not set */
4734 	if (!adev->gfx.imu_fw_version)
4735 		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4736 
4737 	return r;
4738 }
4739 
4740 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
4741 {
4742 	struct amdgpu_device *adev = ip_block->adev;
4743 
4744 	cancel_delayed_work_sync(&adev->gfx.idle_work);
4745 
4746 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4747 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4748 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4749 
4750 	if (!adev->no_hw_access) {
4751 		if (amdgpu_async_gfx_ring) {
4752 			if (amdgpu_gfx_disable_kgq(adev, 0))
4753 				DRM_ERROR("KGQ disable failed\n");
4754 		}
4755 
4756 		if (amdgpu_gfx_disable_kcq(adev, 0))
4757 			DRM_ERROR("KCQ disable failed\n");
4758 
4759 		amdgpu_mes_kiq_hw_fini(adev);
4760 	}
4761 
4762 	if (amdgpu_sriov_vf(adev))
4763 		/* Remove the steps disabling CPG and clearing KIQ position,
4764 		 * so that CP could perform IDLE-SAVE during switch. Those
4765 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
4766 		 * not reproduced on gfx11.
4767 		 */
4768 		return 0;
4769 
4770 	gfx_v11_0_cp_enable(adev, false);
4771 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4772 
4773 	adev->gfxhub.funcs->gart_disable(adev);
4774 
4775 	adev->gfx.is_poweron = false;
4776 
4777 	return 0;
4778 }
4779 
4780 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
4781 {
4782 	return gfx_v11_0_hw_fini(ip_block);
4783 }
4784 
4785 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
4786 {
4787 	return gfx_v11_0_hw_init(ip_block);
4788 }
4789 
4790 static bool gfx_v11_0_is_idle(void *handle)
4791 {
4792 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4793 
4794 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4795 				GRBM_STATUS, GUI_ACTIVE))
4796 		return false;
4797 	else
4798 		return true;
4799 }
4800 
4801 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4802 {
4803 	unsigned i;
4804 	u32 tmp;
4805 	struct amdgpu_device *adev = ip_block->adev;
4806 
4807 	for (i = 0; i < adev->usec_timeout; i++) {
4808 		/* read MC_STATUS */
4809 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4810 			GRBM_STATUS__GUI_ACTIVE_MASK;
4811 
4812 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4813 			return 0;
4814 		udelay(1);
4815 	}
4816 	return -ETIMEDOUT;
4817 }
4818 
4819 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
4820 				      bool req)
4821 {
4822 	u32 i, tmp, val;
4823 
4824 	for (i = 0; i < adev->usec_timeout; i++) {
4825 		/* Request with MeId=2, PipeId=0 */
4826 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
4827 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
4828 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
4829 
4830 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
4831 		if (req) {
4832 			if (val == tmp)
4833 				break;
4834 		} else {
4835 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
4836 					    REQUEST, 1);
4837 
4838 			/* unlocked or locked by firmware */
4839 			if (val != tmp)
4840 				break;
4841 		}
4842 		udelay(1);
4843 	}
4844 
4845 	if (i >= adev->usec_timeout)
4846 		return -EINVAL;
4847 
4848 	return 0;
4849 }
4850 
4851 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
4852 {
4853 	u32 grbm_soft_reset = 0;
4854 	u32 tmp;
4855 	int r, i, j, k;
4856 	struct amdgpu_device *adev = ip_block->adev;
4857 
4858 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4859 
4860 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4861 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4862 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4863 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4864 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4865 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4866 
4867 	mutex_lock(&adev->srbm_mutex);
4868 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4869 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4870 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4871 				soc21_grbm_select(adev, i, k, j, 0);
4872 
4873 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4874 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4875 			}
4876 		}
4877 	}
4878 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4879 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4880 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4881 				soc21_grbm_select(adev, i, k, j, 0);
4882 
4883 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4884 			}
4885 		}
4886 	}
4887 	soc21_grbm_select(adev, 0, 0, 0, 0);
4888 	mutex_unlock(&adev->srbm_mutex);
4889 
4890 	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
4891 	mutex_lock(&adev->gfx.reset_sem_mutex);
4892 	r = gfx_v11_0_request_gfx_index_mutex(adev, true);
4893 	if (r) {
4894 		mutex_unlock(&adev->gfx.reset_sem_mutex);
4895 		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
4896 		return r;
4897 	}
4898 
4899 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4900 
4901 	// Read CP_VMID_RESET register three times.
4902 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4903 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4904 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4905 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4906 
4907 	/* release the gfx mutex */
4908 	r = gfx_v11_0_request_gfx_index_mutex(adev, false);
4909 	mutex_unlock(&adev->gfx.reset_sem_mutex);
4910 	if (r) {
4911 		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
4912 		return r;
4913 	}
4914 
4915 	for (i = 0; i < adev->usec_timeout; i++) {
4916 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4917 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4918 			break;
4919 		udelay(1);
4920 	}
4921 	if (i >= adev->usec_timeout) {
4922 		printk("Failed to wait all pipes clean\n");
4923 		return -EINVAL;
4924 	}
4925 
4926 	/**********  trigger soft reset  ***********/
4927 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4928 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4929 					SOFT_RESET_CP, 1);
4930 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4931 					SOFT_RESET_GFX, 1);
4932 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4933 					SOFT_RESET_CPF, 1);
4934 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4935 					SOFT_RESET_CPC, 1);
4936 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4937 					SOFT_RESET_CPG, 1);
4938 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4939 	/**********  exit soft reset  ***********/
4940 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4941 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4942 					SOFT_RESET_CP, 0);
4943 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4944 					SOFT_RESET_GFX, 0);
4945 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4946 					SOFT_RESET_CPF, 0);
4947 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4948 					SOFT_RESET_CPC, 0);
4949 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4950 					SOFT_RESET_CPG, 0);
4951 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4952 
4953 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4954 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4955 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4956 
4957 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4958 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4959 
4960 	for (i = 0; i < adev->usec_timeout; i++) {
4961 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4962 			break;
4963 		udelay(1);
4964 	}
4965 	if (i >= adev->usec_timeout) {
4966 		printk("Failed to wait CP_VMID_RESET to 0\n");
4967 		return -EINVAL;
4968 	}
4969 
4970 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4971 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4972 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4973 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4974 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4975 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4976 
4977 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4978 
4979 	return gfx_v11_0_cp_resume(adev);
4980 }
4981 
4982 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4983 {
4984 	int i, r;
4985 	struct amdgpu_device *adev = ip_block->adev;
4986 	struct amdgpu_ring *ring;
4987 	long tmo = msecs_to_jiffies(1000);
4988 
4989 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4990 		ring = &adev->gfx.gfx_ring[i];
4991 		r = amdgpu_ring_test_ib(ring, tmo);
4992 		if (r)
4993 			return true;
4994 	}
4995 
4996 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4997 		ring = &adev->gfx.compute_ring[i];
4998 		r = amdgpu_ring_test_ib(ring, tmo);
4999 		if (r)
5000 			return true;
5001 	}
5002 
5003 	return false;
5004 }
5005 
5006 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
5007 {
5008 	struct amdgpu_device *adev = ip_block->adev;
5009 	/**
5010 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
5011 	 */
5012 	return amdgpu_mes_resume(adev);
5013 }
5014 
5015 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5016 {
5017 	uint64_t clock;
5018 	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
5019 
5020 	if (amdgpu_sriov_vf(adev)) {
5021 		amdgpu_gfx_off_ctrl(adev, false);
5022 		mutex_lock(&adev->gfx.gpu_clock_mutex);
5023 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5024 		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5025 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5026 		if (clock_counter_hi_pre != clock_counter_hi_after)
5027 			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5028 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
5029 		amdgpu_gfx_off_ctrl(adev, true);
5030 	} else {
5031 		preempt_disable();
5032 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5033 		clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5034 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5035 		if (clock_counter_hi_pre != clock_counter_hi_after)
5036 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5037 		preempt_enable();
5038 	}
5039 	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
5040 
5041 	return clock;
5042 }
5043 
5044 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5045 					   uint32_t vmid,
5046 					   uint32_t gds_base, uint32_t gds_size,
5047 					   uint32_t gws_base, uint32_t gws_size,
5048 					   uint32_t oa_base, uint32_t oa_size)
5049 {
5050 	struct amdgpu_device *adev = ring->adev;
5051 
5052 	/* GDS Base */
5053 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5054 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
5055 				    gds_base);
5056 
5057 	/* GDS Size */
5058 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5059 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
5060 				    gds_size);
5061 
5062 	/* GWS */
5063 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5064 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
5065 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5066 
5067 	/* OA */
5068 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5069 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
5070 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
5071 }
5072 
5073 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
5074 {
5075 	struct amdgpu_device *adev = ip_block->adev;
5076 
5077 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
5078 
5079 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
5080 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5081 					  AMDGPU_MAX_COMPUTE_RINGS);
5082 
5083 	gfx_v11_0_set_kiq_pm4_funcs(adev);
5084 	gfx_v11_0_set_ring_funcs(adev);
5085 	gfx_v11_0_set_irq_funcs(adev);
5086 	gfx_v11_0_set_gds_init(adev);
5087 	gfx_v11_0_set_rlc_funcs(adev);
5088 	gfx_v11_0_set_mqd_funcs(adev);
5089 	gfx_v11_0_set_imu_funcs(adev);
5090 
5091 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
5092 
5093 	return gfx_v11_0_init_microcode(adev);
5094 }
5095 
5096 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
5097 {
5098 	struct amdgpu_device *adev = ip_block->adev;
5099 	int r;
5100 
5101 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5102 	if (r)
5103 		return r;
5104 
5105 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5106 	if (r)
5107 		return r;
5108 
5109 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
5110 	if (r)
5111 		return r;
5112 	return 0;
5113 }
5114 
5115 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
5116 {
5117 	uint32_t rlc_cntl;
5118 
5119 	/* if RLC is not enabled, do nothing */
5120 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
5121 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
5122 }
5123 
5124 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5125 {
5126 	uint32_t data;
5127 	unsigned i;
5128 
5129 	data = RLC_SAFE_MODE__CMD_MASK;
5130 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5131 
5132 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
5133 
5134 	/* wait for RLC_SAFE_MODE */
5135 	for (i = 0; i < adev->usec_timeout; i++) {
5136 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
5137 				   RLC_SAFE_MODE, CMD))
5138 			break;
5139 		udelay(1);
5140 	}
5141 }
5142 
5143 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5144 {
5145 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
5146 }
5147 
5148 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
5149 				      bool enable)
5150 {
5151 	uint32_t def, data;
5152 
5153 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
5154 		return;
5155 
5156 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5157 
5158 	if (enable)
5159 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5160 	else
5161 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5162 
5163 	if (def != data)
5164 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5165 }
5166 
5167 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
5168 				       bool enable)
5169 {
5170 	uint32_t def, data;
5171 
5172 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
5173 		return;
5174 
5175 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5176 
5177 	if (enable)
5178 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5179 	else
5180 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5181 
5182 	if (def != data)
5183 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5184 }
5185 
5186 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
5187 					   bool enable)
5188 {
5189 	uint32_t def, data;
5190 
5191 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
5192 		return;
5193 
5194 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5195 
5196 	if (enable)
5197 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5198 	else
5199 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5200 
5201 	if (def != data)
5202 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5203 }
5204 
5205 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5206 						       bool enable)
5207 {
5208 	uint32_t data, def;
5209 
5210 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
5211 		return;
5212 
5213 	/* It is disabled by HW by default */
5214 	if (enable) {
5215 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5216 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
5217 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5218 
5219 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5220 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5221 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5222 
5223 			if (def != data)
5224 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5225 		}
5226 	} else {
5227 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5228 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5229 
5230 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5231 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5232 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5233 
5234 			if (def != data)
5235 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5236 		}
5237 	}
5238 }
5239 
5240 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5241 						       bool enable)
5242 {
5243 	uint32_t def, data;
5244 
5245 	if (!(adev->cg_flags &
5246 	      (AMD_CG_SUPPORT_GFX_CGCG |
5247 	      AMD_CG_SUPPORT_GFX_CGLS |
5248 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
5249 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
5250 		return;
5251 
5252 	if (enable) {
5253 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5254 
5255 		/* unset CGCG override */
5256 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5257 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5258 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5259 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5260 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
5261 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5262 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5263 
5264 		/* update CGCG override bits */
5265 		if (def != data)
5266 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5267 
5268 		/* enable cgcg FSM(0x0000363F) */
5269 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5270 
5271 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5272 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
5273 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5274 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5275 		}
5276 
5277 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5278 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
5279 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5280 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5281 		}
5282 
5283 		if (def != data)
5284 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5285 
5286 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5287 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5288 
5289 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5290 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5291 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5292 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5293 		}
5294 
5295 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5296 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5297 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5298 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5299 		}
5300 
5301 		if (def != data)
5302 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5303 
5304 		/* set IDLE_POLL_COUNT(0x00900100) */
5305 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5306 
5307 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5308 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5309 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5310 
5311 		if (def != data)
5312 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5313 
5314 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5315 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5316 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5317 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5318 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5319 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5320 
5321 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5322 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5323 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5324 
5325 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5326 		if (adev->sdma.num_instances > 1) {
5327 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5328 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5329 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5330 		}
5331 	} else {
5332 		/* Program RLC_CGCG_CGLS_CTRL */
5333 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5334 
5335 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5336 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5337 
5338 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5339 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5340 
5341 		if (def != data)
5342 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5343 
5344 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5345 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5346 
5347 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5348 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5349 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5350 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5351 
5352 		if (def != data)
5353 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5354 
5355 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5356 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5357 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5358 
5359 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5360 		if (adev->sdma.num_instances > 1) {
5361 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5362 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5363 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5364 		}
5365 	}
5366 }
5367 
5368 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5369 					    bool enable)
5370 {
5371 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5372 
5373 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5374 
5375 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5376 
5377 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5378 
5379 	gfx_v11_0_update_sram_fgcg(adev, enable);
5380 
5381 	gfx_v11_0_update_perf_clk(adev, enable);
5382 
5383 	if (adev->cg_flags &
5384 	    (AMD_CG_SUPPORT_GFX_MGCG |
5385 	     AMD_CG_SUPPORT_GFX_CGLS |
5386 	     AMD_CG_SUPPORT_GFX_CGCG |
5387 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5388 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5389 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5390 
5391 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5392 
5393 	return 0;
5394 }
5395 
5396 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5397 {
5398 	u32 reg, pre_data, data;
5399 
5400 	amdgpu_gfx_off_ctrl(adev, false);
5401 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5402 	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
5403 		pre_data = RREG32_NO_KIQ(reg);
5404 	else
5405 		pre_data = RREG32(reg);
5406 
5407 	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
5408 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5409 
5410 	if (pre_data != data) {
5411 		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
5412 			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5413 		} else
5414 			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5415 	}
5416 	amdgpu_gfx_off_ctrl(adev, true);
5417 
5418 	if (ring
5419 		&& amdgpu_sriov_is_pp_one_vf(adev)
5420 		&& (pre_data != data)
5421 		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
5422 			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
5423 		amdgpu_ring_emit_wreg(ring, reg, data);
5424 	}
5425 }
5426 
5427 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5428 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5429 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5430 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5431 	.init = gfx_v11_0_rlc_init,
5432 	.get_csb_size = gfx_v11_0_get_csb_size,
5433 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5434 	.resume = gfx_v11_0_rlc_resume,
5435 	.stop = gfx_v11_0_rlc_stop,
5436 	.reset = gfx_v11_0_rlc_reset,
5437 	.start = gfx_v11_0_rlc_start,
5438 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5439 };
5440 
5441 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5442 {
5443 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5444 
5445 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5446 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5447 	else
5448 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5449 
5450 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5451 
5452 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5453 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5454 		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5455 		case IP_VERSION(11, 0, 1):
5456 		case IP_VERSION(11, 0, 4):
5457 		case IP_VERSION(11, 5, 0):
5458 		case IP_VERSION(11, 5, 1):
5459 		case IP_VERSION(11, 5, 2):
5460 		case IP_VERSION(11, 5, 3):
5461 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5462 			break;
5463 		default:
5464 			break;
5465 		}
5466 	}
5467 }
5468 
5469 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5470 {
5471 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5472 
5473 	gfx_v11_cntl_power_gating(adev, enable);
5474 
5475 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5476 }
5477 
5478 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5479 					   enum amd_powergating_state state)
5480 {
5481 	struct amdgpu_device *adev = ip_block->adev;
5482 	bool enable = (state == AMD_PG_STATE_GATE);
5483 
5484 	if (amdgpu_sriov_vf(adev))
5485 		return 0;
5486 
5487 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5488 	case IP_VERSION(11, 0, 0):
5489 	case IP_VERSION(11, 0, 2):
5490 	case IP_VERSION(11, 0, 3):
5491 		amdgpu_gfx_off_ctrl(adev, enable);
5492 		break;
5493 	case IP_VERSION(11, 0, 1):
5494 	case IP_VERSION(11, 0, 4):
5495 	case IP_VERSION(11, 5, 0):
5496 	case IP_VERSION(11, 5, 1):
5497 	case IP_VERSION(11, 5, 2):
5498 	case IP_VERSION(11, 5, 3):
5499 		if (!enable)
5500 			amdgpu_gfx_off_ctrl(adev, false);
5501 
5502 		gfx_v11_cntl_pg(adev, enable);
5503 
5504 		if (enable)
5505 			amdgpu_gfx_off_ctrl(adev, true);
5506 
5507 		break;
5508 	default:
5509 		break;
5510 	}
5511 
5512 	return 0;
5513 }
5514 
5515 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5516 					  enum amd_clockgating_state state)
5517 {
5518 	struct amdgpu_device *adev = ip_block->adev;
5519 
5520 	if (amdgpu_sriov_vf(adev))
5521 	        return 0;
5522 
5523 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5524 	case IP_VERSION(11, 0, 0):
5525 	case IP_VERSION(11, 0, 1):
5526 	case IP_VERSION(11, 0, 2):
5527 	case IP_VERSION(11, 0, 3):
5528 	case IP_VERSION(11, 0, 4):
5529 	case IP_VERSION(11, 5, 0):
5530 	case IP_VERSION(11, 5, 1):
5531 	case IP_VERSION(11, 5, 2):
5532 	case IP_VERSION(11, 5, 3):
5533 	        gfx_v11_0_update_gfx_clock_gating(adev,
5534 	                        state ==  AMD_CG_STATE_GATE);
5535 	        break;
5536 	default:
5537 	        break;
5538 	}
5539 
5540 	return 0;
5541 }
5542 
5543 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5544 {
5545 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5546 	int data;
5547 
5548 	/* AMD_CG_SUPPORT_GFX_MGCG */
5549 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5550 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5551 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5552 
5553 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5554 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5555 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5556 
5557 	/* AMD_CG_SUPPORT_GFX_FGCG */
5558 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5559 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5560 
5561 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5562 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5563 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5564 
5565 	/* AMD_CG_SUPPORT_GFX_CGCG */
5566 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5567 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5568 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5569 
5570 	/* AMD_CG_SUPPORT_GFX_CGLS */
5571 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5572 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5573 
5574 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5575 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5576 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5577 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5578 
5579 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5580 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5581 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5582 }
5583 
5584 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5585 {
5586 	/* gfx11 is 32bit rptr*/
5587 	return *(uint32_t *)ring->rptr_cpu_addr;
5588 }
5589 
5590 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5591 {
5592 	struct amdgpu_device *adev = ring->adev;
5593 	u64 wptr;
5594 
5595 	/* XXX check if swapping is necessary on BE */
5596 	if (ring->use_doorbell) {
5597 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5598 	} else {
5599 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5600 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5601 	}
5602 
5603 	return wptr;
5604 }
5605 
5606 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5607 {
5608 	struct amdgpu_device *adev = ring->adev;
5609 
5610 	if (ring->use_doorbell) {
5611 		/* XXX check if swapping is necessary on BE */
5612 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5613 			     ring->wptr);
5614 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5615 	} else {
5616 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5617 			     lower_32_bits(ring->wptr));
5618 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5619 			     upper_32_bits(ring->wptr));
5620 	}
5621 }
5622 
5623 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5624 {
5625 	/* gfx11 hardware is 32bit rptr */
5626 	return *(uint32_t *)ring->rptr_cpu_addr;
5627 }
5628 
5629 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5630 {
5631 	u64 wptr;
5632 
5633 	/* XXX check if swapping is necessary on BE */
5634 	if (ring->use_doorbell)
5635 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5636 	else
5637 		BUG();
5638 	return wptr;
5639 }
5640 
5641 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5642 {
5643 	struct amdgpu_device *adev = ring->adev;
5644 
5645 	/* XXX check if swapping is necessary on BE */
5646 	if (ring->use_doorbell) {
5647 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5648 			     ring->wptr);
5649 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5650 	} else {
5651 		BUG(); /* only DOORBELL method supported on gfx11 now */
5652 	}
5653 }
5654 
5655 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5656 {
5657 	struct amdgpu_device *adev = ring->adev;
5658 	u32 ref_and_mask, reg_mem_engine;
5659 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5660 
5661 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5662 		switch (ring->me) {
5663 		case 1:
5664 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5665 			break;
5666 		case 2:
5667 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5668 			break;
5669 		default:
5670 			return;
5671 		}
5672 		reg_mem_engine = 0;
5673 	} else {
5674 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
5675 		reg_mem_engine = 1; /* pfp */
5676 	}
5677 
5678 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5679 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5680 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5681 			       ref_and_mask, ref_and_mask, 0x20);
5682 }
5683 
5684 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5685 				       struct amdgpu_job *job,
5686 				       struct amdgpu_ib *ib,
5687 				       uint32_t flags)
5688 {
5689 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5690 	u32 header, control = 0;
5691 
5692 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5693 
5694 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5695 
5696 	control |= ib->length_dw | (vmid << 24);
5697 
5698 	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5699 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5700 
5701 		if (flags & AMDGPU_IB_PREEMPTED)
5702 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5703 
5704 		if (vmid)
5705 			gfx_v11_0_ring_emit_de_meta(ring,
5706 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5707 	}
5708 
5709 	if (ring->is_mes_queue)
5710 		/* inherit vmid from mqd */
5711 		control |= 0x400000;
5712 
5713 	amdgpu_ring_write(ring, header);
5714 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5715 	amdgpu_ring_write(ring,
5716 #ifdef __BIG_ENDIAN
5717 		(2 << 0) |
5718 #endif
5719 		lower_32_bits(ib->gpu_addr));
5720 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5721 	amdgpu_ring_write(ring, control);
5722 }
5723 
5724 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5725 					   struct amdgpu_job *job,
5726 					   struct amdgpu_ib *ib,
5727 					   uint32_t flags)
5728 {
5729 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5730 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5731 
5732 	if (ring->is_mes_queue)
5733 		/* inherit vmid from mqd */
5734 		control |= 0x40000000;
5735 
5736 	/* Currently, there is a high possibility to get wave ID mismatch
5737 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5738 	 * different wave IDs than the GDS expects. This situation happens
5739 	 * randomly when at least 5 compute pipes use GDS ordered append.
5740 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5741 	 * Those are probably bugs somewhere else in the kernel driver.
5742 	 *
5743 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5744 	 * GDS to 0 for this ring (me/pipe).
5745 	 */
5746 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5747 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5748 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5749 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5750 	}
5751 
5752 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5753 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5754 	amdgpu_ring_write(ring,
5755 #ifdef __BIG_ENDIAN
5756 				(2 << 0) |
5757 #endif
5758 				lower_32_bits(ib->gpu_addr));
5759 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5760 	amdgpu_ring_write(ring, control);
5761 }
5762 
5763 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5764 				     u64 seq, unsigned flags)
5765 {
5766 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5767 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5768 
5769 	/* RELEASE_MEM - flush caches, send int */
5770 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5771 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5772 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5773 				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
5774 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5775 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5776 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5777 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5778 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5779 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5780 
5781 	/*
5782 	 * the address should be Qword aligned if 64bit write, Dword
5783 	 * aligned if only send 32bit data low (discard data high)
5784 	 */
5785 	if (write64bit)
5786 		BUG_ON(addr & 0x7);
5787 	else
5788 		BUG_ON(addr & 0x3);
5789 	amdgpu_ring_write(ring, lower_32_bits(addr));
5790 	amdgpu_ring_write(ring, upper_32_bits(addr));
5791 	amdgpu_ring_write(ring, lower_32_bits(seq));
5792 	amdgpu_ring_write(ring, upper_32_bits(seq));
5793 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5794 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5795 }
5796 
5797 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5798 {
5799 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5800 	uint32_t seq = ring->fence_drv.sync_seq;
5801 	uint64_t addr = ring->fence_drv.gpu_addr;
5802 
5803 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5804 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5805 }
5806 
5807 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5808 				   uint16_t pasid, uint32_t flush_type,
5809 				   bool all_hub, uint8_t dst_sel)
5810 {
5811 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5812 	amdgpu_ring_write(ring,
5813 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5814 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5815 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5816 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5817 }
5818 
5819 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5820 					 unsigned vmid, uint64_t pd_addr)
5821 {
5822 	if (ring->is_mes_queue)
5823 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5824 	else
5825 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5826 
5827 	/* compute doesn't have PFP */
5828 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5829 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5830 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5831 		amdgpu_ring_write(ring, 0x0);
5832 	}
5833 
5834 	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
5835 	 * changed in any way.
5836 	 */
5837 	ring->set_q_mode_offs = 0;
5838 	ring->set_q_mode_ptr = NULL;
5839 }
5840 
5841 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5842 					  u64 seq, unsigned int flags)
5843 {
5844 	struct amdgpu_device *adev = ring->adev;
5845 
5846 	/* we only allocate 32bit for each seq wb address */
5847 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5848 
5849 	/* write fence seq to the "addr" */
5850 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5851 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5852 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5853 	amdgpu_ring_write(ring, lower_32_bits(addr));
5854 	amdgpu_ring_write(ring, upper_32_bits(addr));
5855 	amdgpu_ring_write(ring, lower_32_bits(seq));
5856 
5857 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5858 		/* set register to trigger INT */
5859 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5860 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5861 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5862 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5863 		amdgpu_ring_write(ring, 0);
5864 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5865 	}
5866 }
5867 
5868 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5869 					 uint32_t flags)
5870 {
5871 	uint32_t dw2 = 0;
5872 
5873 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5874 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5875 		/* set load_global_config & load_global_uconfig */
5876 		dw2 |= 0x8001;
5877 		/* set load_cs_sh_regs */
5878 		dw2 |= 0x01000000;
5879 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5880 		dw2 |= 0x10002;
5881 	}
5882 
5883 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5884 	amdgpu_ring_write(ring, dw2);
5885 	amdgpu_ring_write(ring, 0);
5886 }
5887 
5888 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5889 						   uint64_t addr)
5890 {
5891 	unsigned ret;
5892 
5893 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5894 	amdgpu_ring_write(ring, lower_32_bits(addr));
5895 	amdgpu_ring_write(ring, upper_32_bits(addr));
5896 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5897 	amdgpu_ring_write(ring, 0);
5898 	ret = ring->wptr & ring->buf_mask;
5899 	/* patch dummy value later */
5900 	amdgpu_ring_write(ring, 0);
5901 
5902 	return ret;
5903 }
5904 
5905 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5906 					   u64 shadow_va, u64 csa_va,
5907 					   u64 gds_va, bool init_shadow,
5908 					   int vmid)
5909 {
5910 	struct amdgpu_device *adev = ring->adev;
5911 	unsigned int offs, end;
5912 
5913 	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
5914 		return;
5915 
5916 	/*
5917 	 * The logic here isn't easy to understand because we need to keep state
5918 	 * accross multiple executions of the function as well as between the
5919 	 * CPU and GPU. The general idea is that the newly written GPU command
5920 	 * has a condition on the previous one and only executed if really
5921 	 * necessary.
5922 	 */
5923 
5924 	/*
5925 	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
5926 	 * executed or not. Reserve 64bits just to be on the save side.
5927 	 */
5928 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
5929 	offs = ring->wptr & ring->buf_mask;
5930 
5931 	/*
5932 	 * We start with skipping the prefix SET_Q_MODE and always executing
5933 	 * the postfix SET_Q_MODE packet. This is changed below with a
5934 	 * WRITE_DATA command when the postfix executed.
5935 	 */
5936 	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
5937 	amdgpu_ring_write(ring, 0);
5938 
5939 	if (ring->set_q_mode_offs) {
5940 		uint64_t addr;
5941 
5942 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5943 		addr += ring->set_q_mode_offs << 2;
5944 		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
5945 	}
5946 
5947 	/*
5948 	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
5949 	 * next prefix SET_Q_MODE packet executes as well.
5950 	 */
5951 	if (!shadow_va) {
5952 		uint64_t addr;
5953 
5954 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5955 		addr += offs << 2;
5956 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5957 		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5958 		amdgpu_ring_write(ring, lower_32_bits(addr));
5959 		amdgpu_ring_write(ring, upper_32_bits(addr));
5960 		amdgpu_ring_write(ring, 0x1);
5961 	}
5962 
5963 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
5964 	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
5965 	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
5966 	amdgpu_ring_write(ring, lower_32_bits(gds_va));
5967 	amdgpu_ring_write(ring, upper_32_bits(gds_va));
5968 	amdgpu_ring_write(ring, lower_32_bits(csa_va));
5969 	amdgpu_ring_write(ring, upper_32_bits(csa_va));
5970 	amdgpu_ring_write(ring, shadow_va ?
5971 			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
5972 	amdgpu_ring_write(ring, init_shadow ?
5973 			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5974 
5975 	if (ring->set_q_mode_offs)
5976 		amdgpu_ring_patch_cond_exec(ring, end);
5977 
5978 	if (shadow_va) {
5979 		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
5980 
5981 		/*
5982 		 * If the tokens match try to skip the last postfix SET_Q_MODE
5983 		 * packet to avoid saving/restoring the state all the time.
5984 		 */
5985 		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
5986 			*ring->set_q_mode_ptr = 0;
5987 
5988 		ring->set_q_mode_token = token;
5989 	} else {
5990 		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
5991 	}
5992 
5993 	ring->set_q_mode_offs = offs;
5994 }
5995 
5996 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5997 {
5998 	int i, r = 0;
5999 	struct amdgpu_device *adev = ring->adev;
6000 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
6001 	struct amdgpu_ring *kiq_ring = &kiq->ring;
6002 	unsigned long flags;
6003 
6004 	if (adev->enable_mes)
6005 		return -EINVAL;
6006 
6007 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
6008 		return -EINVAL;
6009 
6010 	spin_lock_irqsave(&kiq->ring_lock, flags);
6011 
6012 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
6013 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
6014 		return -ENOMEM;
6015 	}
6016 
6017 	/* assert preemption condition */
6018 	amdgpu_ring_set_preempt_cond_exec(ring, false);
6019 
6020 	/* assert IB preemption, emit the trailing fence */
6021 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
6022 				   ring->trail_fence_gpu_addr,
6023 				   ++ring->trail_seq);
6024 	amdgpu_ring_commit(kiq_ring);
6025 
6026 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
6027 
6028 	/* poll the trailing fence */
6029 	for (i = 0; i < adev->usec_timeout; i++) {
6030 		if (ring->trail_seq ==
6031 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
6032 			break;
6033 		udelay(1);
6034 	}
6035 
6036 	if (i >= adev->usec_timeout) {
6037 		r = -EINVAL;
6038 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
6039 	}
6040 
6041 	/* deassert preemption condition */
6042 	amdgpu_ring_set_preempt_cond_exec(ring, true);
6043 	return r;
6044 }
6045 
6046 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
6047 {
6048 	struct amdgpu_device *adev = ring->adev;
6049 	struct v10_de_ib_state de_payload = {0};
6050 	uint64_t offset, gds_addr, de_payload_gpu_addr;
6051 	void *de_payload_cpu_addr;
6052 	int cnt;
6053 
6054 	if (ring->is_mes_queue) {
6055 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
6056 				  gfx[0].gfx_meta_data) +
6057 			offsetof(struct v10_gfx_meta_data, de_payload);
6058 		de_payload_gpu_addr =
6059 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
6060 		de_payload_cpu_addr =
6061 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
6062 
6063 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
6064 				  gfx[0].gds_backup) +
6065 			offsetof(struct v10_gfx_meta_data, de_payload);
6066 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
6067 	} else {
6068 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
6069 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
6070 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
6071 
6072 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
6073 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
6074 				 PAGE_SIZE);
6075 	}
6076 
6077 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
6078 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
6079 
6080 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
6081 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
6082 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6083 				 WRITE_DATA_DST_SEL(8) |
6084 				 WR_CONFIRM) |
6085 				 WRITE_DATA_CACHE_POLICY(0));
6086 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
6087 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
6088 
6089 	if (resume)
6090 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
6091 					   sizeof(de_payload) >> 2);
6092 	else
6093 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
6094 					   sizeof(de_payload) >> 2);
6095 }
6096 
6097 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
6098 				    bool secure)
6099 {
6100 	uint32_t v = secure ? FRAME_TMZ : 0;
6101 
6102 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
6103 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
6104 }
6105 
6106 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6107 				     uint32_t reg_val_offs)
6108 {
6109 	struct amdgpu_device *adev = ring->adev;
6110 
6111 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6112 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6113 				(5 << 8) |	/* dst: memory */
6114 				(1 << 20));	/* write confirm */
6115 	amdgpu_ring_write(ring, reg);
6116 	amdgpu_ring_write(ring, 0);
6117 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6118 				reg_val_offs * 4));
6119 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6120 				reg_val_offs * 4));
6121 }
6122 
6123 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6124 				   uint32_t val)
6125 {
6126 	uint32_t cmd = 0;
6127 
6128 	switch (ring->funcs->type) {
6129 	case AMDGPU_RING_TYPE_GFX:
6130 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6131 		break;
6132 	case AMDGPU_RING_TYPE_KIQ:
6133 		cmd = (1 << 16); /* no inc addr */
6134 		break;
6135 	default:
6136 		cmd = WR_CONFIRM;
6137 		break;
6138 	}
6139 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6140 	amdgpu_ring_write(ring, cmd);
6141 	amdgpu_ring_write(ring, reg);
6142 	amdgpu_ring_write(ring, 0);
6143 	amdgpu_ring_write(ring, val);
6144 }
6145 
6146 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6147 					uint32_t val, uint32_t mask)
6148 {
6149 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6150 }
6151 
6152 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
6153 						   uint32_t reg0, uint32_t reg1,
6154 						   uint32_t ref, uint32_t mask)
6155 {
6156 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6157 
6158 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
6159 			       ref, mask, 0x20);
6160 }
6161 
6162 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
6163 					 unsigned vmid)
6164 {
6165 	struct amdgpu_device *adev = ring->adev;
6166 	uint32_t value = 0;
6167 
6168 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6169 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6170 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6171 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6172 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
6173 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
6174 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
6175 }
6176 
6177 static void
6178 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6179 				      uint32_t me, uint32_t pipe,
6180 				      enum amdgpu_interrupt_state state)
6181 {
6182 	uint32_t cp_int_cntl, cp_int_cntl_reg;
6183 
6184 	if (!me) {
6185 		switch (pipe) {
6186 		case 0:
6187 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
6188 			break;
6189 		case 1:
6190 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
6191 			break;
6192 		default:
6193 			DRM_DEBUG("invalid pipe %d\n", pipe);
6194 			return;
6195 		}
6196 	} else {
6197 		DRM_DEBUG("invalid me %d\n", me);
6198 		return;
6199 	}
6200 
6201 	switch (state) {
6202 	case AMDGPU_IRQ_STATE_DISABLE:
6203 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6204 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6205 					    TIME_STAMP_INT_ENABLE, 0);
6206 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6207 					    GENERIC0_INT_ENABLE, 0);
6208 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6209 		break;
6210 	case AMDGPU_IRQ_STATE_ENABLE:
6211 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6212 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6213 					    TIME_STAMP_INT_ENABLE, 1);
6214 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6215 					    GENERIC0_INT_ENABLE, 1);
6216 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6217 		break;
6218 	default:
6219 		break;
6220 	}
6221 }
6222 
6223 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6224 						     int me, int pipe,
6225 						     enum amdgpu_interrupt_state state)
6226 {
6227 	u32 mec_int_cntl, mec_int_cntl_reg;
6228 
6229 	/*
6230 	 * amdgpu controls only the first MEC. That's why this function only
6231 	 * handles the setting of interrupts for this specific MEC. All other
6232 	 * pipes' interrupts are set by amdkfd.
6233 	 */
6234 
6235 	if (me == 1) {
6236 		switch (pipe) {
6237 		case 0:
6238 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6239 			break;
6240 		case 1:
6241 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
6242 			break;
6243 		case 2:
6244 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
6245 			break;
6246 		case 3:
6247 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
6248 			break;
6249 		default:
6250 			DRM_DEBUG("invalid pipe %d\n", pipe);
6251 			return;
6252 		}
6253 	} else {
6254 		DRM_DEBUG("invalid me %d\n", me);
6255 		return;
6256 	}
6257 
6258 	switch (state) {
6259 	case AMDGPU_IRQ_STATE_DISABLE:
6260 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6261 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6262 					     TIME_STAMP_INT_ENABLE, 0);
6263 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6264 					     GENERIC0_INT_ENABLE, 0);
6265 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6266 		break;
6267 	case AMDGPU_IRQ_STATE_ENABLE:
6268 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6269 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6270 					     TIME_STAMP_INT_ENABLE, 1);
6271 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6272 					     GENERIC0_INT_ENABLE, 1);
6273 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6274 		break;
6275 	default:
6276 		break;
6277 	}
6278 }
6279 
6280 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6281 					    struct amdgpu_irq_src *src,
6282 					    unsigned type,
6283 					    enum amdgpu_interrupt_state state)
6284 {
6285 	switch (type) {
6286 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6287 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
6288 		break;
6289 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
6290 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
6291 		break;
6292 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6293 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6294 		break;
6295 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6296 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6297 		break;
6298 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6299 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6300 		break;
6301 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6302 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6303 		break;
6304 	default:
6305 		break;
6306 	}
6307 	return 0;
6308 }
6309 
6310 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
6311 			     struct amdgpu_irq_src *source,
6312 			     struct amdgpu_iv_entry *entry)
6313 {
6314 	int i;
6315 	u8 me_id, pipe_id, queue_id;
6316 	struct amdgpu_ring *ring;
6317 	uint32_t mes_queue_id = entry->src_data[0];
6318 
6319 	DRM_DEBUG("IH: CP EOP\n");
6320 
6321 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
6322 		struct amdgpu_mes_queue *queue;
6323 
6324 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
6325 
6326 		spin_lock(&adev->mes.queue_id_lock);
6327 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
6328 		if (queue) {
6329 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
6330 			amdgpu_fence_process(queue->ring);
6331 		}
6332 		spin_unlock(&adev->mes.queue_id_lock);
6333 	} else {
6334 		me_id = (entry->ring_id & 0x0c) >> 2;
6335 		pipe_id = (entry->ring_id & 0x03) >> 0;
6336 		queue_id = (entry->ring_id & 0x70) >> 4;
6337 
6338 		switch (me_id) {
6339 		case 0:
6340 			if (pipe_id == 0)
6341 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6342 			else
6343 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6344 			break;
6345 		case 1:
6346 		case 2:
6347 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6348 				ring = &adev->gfx.compute_ring[i];
6349 				/* Per-queue interrupt is supported for MEC starting from VI.
6350 				 * The interrupt can only be enabled/disabled per pipe instead
6351 				 * of per queue.
6352 				 */
6353 				if ((ring->me == me_id) &&
6354 				    (ring->pipe == pipe_id) &&
6355 				    (ring->queue == queue_id))
6356 					amdgpu_fence_process(ring);
6357 			}
6358 			break;
6359 		}
6360 	}
6361 
6362 	return 0;
6363 }
6364 
6365 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6366 					      struct amdgpu_irq_src *source,
6367 					      unsigned int type,
6368 					      enum amdgpu_interrupt_state state)
6369 {
6370 	u32 cp_int_cntl_reg, cp_int_cntl;
6371 	int i, j;
6372 
6373 	switch (state) {
6374 	case AMDGPU_IRQ_STATE_DISABLE:
6375 	case AMDGPU_IRQ_STATE_ENABLE:
6376 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6377 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6378 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6379 
6380 				if (cp_int_cntl_reg) {
6381 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6382 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6383 								    PRIV_REG_INT_ENABLE,
6384 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6385 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6386 				}
6387 			}
6388 		}
6389 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6390 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6391 				/* MECs start at 1 */
6392 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6393 
6394 				if (cp_int_cntl_reg) {
6395 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6396 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6397 								    PRIV_REG_INT_ENABLE,
6398 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6399 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6400 				}
6401 			}
6402 		}
6403 		break;
6404 	default:
6405 		break;
6406 	}
6407 
6408 	return 0;
6409 }
6410 
6411 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6412 					    struct amdgpu_irq_src *source,
6413 					    unsigned type,
6414 					    enum amdgpu_interrupt_state state)
6415 {
6416 	u32 cp_int_cntl_reg, cp_int_cntl;
6417 	int i, j;
6418 
6419 	switch (state) {
6420 	case AMDGPU_IRQ_STATE_DISABLE:
6421 	case AMDGPU_IRQ_STATE_ENABLE:
6422 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6423 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6424 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6425 
6426 				if (cp_int_cntl_reg) {
6427 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6428 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6429 								    OPCODE_ERROR_INT_ENABLE,
6430 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6431 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6432 				}
6433 			}
6434 		}
6435 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6436 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6437 				/* MECs start at 1 */
6438 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6439 
6440 				if (cp_int_cntl_reg) {
6441 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6442 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6443 								    OPCODE_ERROR_INT_ENABLE,
6444 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6445 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6446 				}
6447 			}
6448 		}
6449 		break;
6450 	default:
6451 		break;
6452 	}
6453 	return 0;
6454 }
6455 
6456 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6457 					       struct amdgpu_irq_src *source,
6458 					       unsigned int type,
6459 					       enum amdgpu_interrupt_state state)
6460 {
6461 	u32 cp_int_cntl_reg, cp_int_cntl;
6462 	int i, j;
6463 
6464 	switch (state) {
6465 	case AMDGPU_IRQ_STATE_DISABLE:
6466 	case AMDGPU_IRQ_STATE_ENABLE:
6467 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6468 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6469 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6470 
6471 				if (cp_int_cntl_reg) {
6472 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6473 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6474 								    PRIV_INSTR_INT_ENABLE,
6475 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6476 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6477 				}
6478 			}
6479 		}
6480 		break;
6481 	default:
6482 		break;
6483 	}
6484 
6485 	return 0;
6486 }
6487 
6488 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6489 					struct amdgpu_iv_entry *entry)
6490 {
6491 	u8 me_id, pipe_id, queue_id;
6492 	struct amdgpu_ring *ring;
6493 	int i;
6494 
6495 	me_id = (entry->ring_id & 0x0c) >> 2;
6496 	pipe_id = (entry->ring_id & 0x03) >> 0;
6497 	queue_id = (entry->ring_id & 0x70) >> 4;
6498 
6499 	switch (me_id) {
6500 	case 0:
6501 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6502 			ring = &adev->gfx.gfx_ring[i];
6503 			if (ring->me == me_id && ring->pipe == pipe_id &&
6504 			    ring->queue == queue_id)
6505 				drm_sched_fault(&ring->sched);
6506 		}
6507 		break;
6508 	case 1:
6509 	case 2:
6510 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6511 			ring = &adev->gfx.compute_ring[i];
6512 			if (ring->me == me_id && ring->pipe == pipe_id &&
6513 			    ring->queue == queue_id)
6514 				drm_sched_fault(&ring->sched);
6515 		}
6516 		break;
6517 	default:
6518 		BUG();
6519 		break;
6520 	}
6521 }
6522 
6523 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6524 				  struct amdgpu_irq_src *source,
6525 				  struct amdgpu_iv_entry *entry)
6526 {
6527 	DRM_ERROR("Illegal register access in command stream\n");
6528 	gfx_v11_0_handle_priv_fault(adev, entry);
6529 	return 0;
6530 }
6531 
6532 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
6533 				struct amdgpu_irq_src *source,
6534 				struct amdgpu_iv_entry *entry)
6535 {
6536 	DRM_ERROR("Illegal opcode in command stream \n");
6537 	gfx_v11_0_handle_priv_fault(adev, entry);
6538 	return 0;
6539 }
6540 
6541 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6542 				   struct amdgpu_irq_src *source,
6543 				   struct amdgpu_iv_entry *entry)
6544 {
6545 	DRM_ERROR("Illegal instruction in command stream\n");
6546 	gfx_v11_0_handle_priv_fault(adev, entry);
6547 	return 0;
6548 }
6549 
6550 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
6551 				  struct amdgpu_irq_src *source,
6552 				  struct amdgpu_iv_entry *entry)
6553 {
6554 	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
6555 		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
6556 
6557 	return 0;
6558 }
6559 
6560 #if 0
6561 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6562 					     struct amdgpu_irq_src *src,
6563 					     unsigned int type,
6564 					     enum amdgpu_interrupt_state state)
6565 {
6566 	uint32_t tmp, target;
6567 	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6568 
6569 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6570 	target += ring->pipe;
6571 
6572 	switch (type) {
6573 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6574 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6575 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6576 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6577 					    GENERIC2_INT_ENABLE, 0);
6578 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6579 
6580 			tmp = RREG32_SOC15_IP(GC, target);
6581 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6582 					    GENERIC2_INT_ENABLE, 0);
6583 			WREG32_SOC15_IP(GC, target, tmp);
6584 		} else {
6585 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6586 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6587 					    GENERIC2_INT_ENABLE, 1);
6588 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6589 
6590 			tmp = RREG32_SOC15_IP(GC, target);
6591 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6592 					    GENERIC2_INT_ENABLE, 1);
6593 			WREG32_SOC15_IP(GC, target, tmp);
6594 		}
6595 		break;
6596 	default:
6597 		BUG(); /* kiq only support GENERIC2_INT now */
6598 		break;
6599 	}
6600 	return 0;
6601 }
6602 #endif
6603 
6604 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6605 {
6606 	const unsigned int gcr_cntl =
6607 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6608 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6609 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6610 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6611 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6612 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6613 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6614 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6615 
6616 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6617 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6618 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6619 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6620 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6621 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6622 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6623 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6624 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6625 }
6626 
6627 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
6628 {
6629 	struct amdgpu_device *adev = ring->adev;
6630 	int r;
6631 
6632 	if (amdgpu_sriov_vf(adev))
6633 		return -EINVAL;
6634 
6635 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
6636 	if (r)
6637 		return r;
6638 
6639 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
6640 	if (unlikely(r != 0)) {
6641 		dev_err(adev->dev, "fail to resv mqd_obj\n");
6642 		return r;
6643 	}
6644 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
6645 	if (!r) {
6646 		r = gfx_v11_0_kgq_init_queue(ring, true);
6647 		amdgpu_bo_kunmap(ring->mqd_obj);
6648 		ring->mqd_ptr = NULL;
6649 	}
6650 	amdgpu_bo_unreserve(ring->mqd_obj);
6651 	if (r) {
6652 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
6653 		return r;
6654 	}
6655 
6656 	r = amdgpu_mes_map_legacy_queue(adev, ring);
6657 	if (r) {
6658 		dev_err(adev->dev, "failed to remap kgq\n");
6659 		return r;
6660 	}
6661 
6662 	return amdgpu_ring_test_ring(ring);
6663 }
6664 
6665 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
6666 {
6667 	struct amdgpu_device *adev = ring->adev;
6668 	int r = 0;
6669 
6670 	if (amdgpu_sriov_vf(adev))
6671 		return -EINVAL;
6672 
6673 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
6674 	if (r) {
6675 		dev_err(adev->dev, "reset via MMIO failed %d\n", r);
6676 		return r;
6677 	}
6678 
6679 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
6680 	if (unlikely(r != 0)) {
6681 		dev_err(adev->dev, "fail to resv mqd_obj\n");
6682 		return r;
6683 	}
6684 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
6685 	if (!r) {
6686 		r = gfx_v11_0_kcq_init_queue(ring, true);
6687 		amdgpu_bo_kunmap(ring->mqd_obj);
6688 		ring->mqd_ptr = NULL;
6689 	}
6690 	amdgpu_bo_unreserve(ring->mqd_obj);
6691 	if (r) {
6692 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
6693 		return r;
6694 	}
6695 	r = amdgpu_mes_map_legacy_queue(adev, ring);
6696 	if (r) {
6697 		dev_err(adev->dev, "failed to remap kcq\n");
6698 		return r;
6699 	}
6700 
6701 	return amdgpu_ring_test_ring(ring);
6702 }
6703 
6704 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
6705 {
6706 	struct amdgpu_device *adev = ip_block->adev;
6707 	uint32_t i, j, k, reg, index = 0;
6708 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6709 
6710 	if (!adev->gfx.ip_dump_core)
6711 		return;
6712 
6713 	for (i = 0; i < reg_count; i++)
6714 		drm_printf(p, "%-50s \t 0x%08x\n",
6715 			   gc_reg_list_11_0[i].reg_name,
6716 			   adev->gfx.ip_dump_core[i]);
6717 
6718 	/* print compute queue registers for all instances */
6719 	if (!adev->gfx.ip_dump_compute_queues)
6720 		return;
6721 
6722 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6723 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
6724 		   adev->gfx.mec.num_mec,
6725 		   adev->gfx.mec.num_pipe_per_mec,
6726 		   adev->gfx.mec.num_queue_per_pipe);
6727 
6728 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6729 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6730 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6731 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
6732 				for (reg = 0; reg < reg_count; reg++) {
6733 					drm_printf(p, "%-50s \t 0x%08x\n",
6734 						   gc_cp_reg_list_11[reg].reg_name,
6735 						   adev->gfx.ip_dump_compute_queues[index + reg]);
6736 				}
6737 				index += reg_count;
6738 			}
6739 		}
6740 	}
6741 
6742 	/* print gfx queue registers for all instances */
6743 	if (!adev->gfx.ip_dump_gfx_queues)
6744 		return;
6745 
6746 	index = 0;
6747 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6748 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
6749 		   adev->gfx.me.num_me,
6750 		   adev->gfx.me.num_pipe_per_me,
6751 		   adev->gfx.me.num_queue_per_pipe);
6752 
6753 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6754 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6755 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6756 				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
6757 				for (reg = 0; reg < reg_count; reg++) {
6758 					drm_printf(p, "%-50s \t 0x%08x\n",
6759 						   gc_gfx_queue_reg_list_11[reg].reg_name,
6760 						   adev->gfx.ip_dump_gfx_queues[index + reg]);
6761 				}
6762 				index += reg_count;
6763 			}
6764 		}
6765 	}
6766 }
6767 
6768 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
6769 {
6770 	struct amdgpu_device *adev = ip_block->adev;
6771 	uint32_t i, j, k, reg, index = 0;
6772 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6773 
6774 	if (!adev->gfx.ip_dump_core)
6775 		return;
6776 
6777 	amdgpu_gfx_off_ctrl(adev, false);
6778 	for (i = 0; i < reg_count; i++)
6779 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
6780 	amdgpu_gfx_off_ctrl(adev, true);
6781 
6782 	/* dump compute queue registers for all instances */
6783 	if (!adev->gfx.ip_dump_compute_queues)
6784 		return;
6785 
6786 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6787 	amdgpu_gfx_off_ctrl(adev, false);
6788 	mutex_lock(&adev->srbm_mutex);
6789 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6790 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6791 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6792 				/* ME0 is for GFX so start from 1 for CP */
6793 				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
6794 				for (reg = 0; reg < reg_count; reg++) {
6795 					adev->gfx.ip_dump_compute_queues[index + reg] =
6796 						RREG32(SOC15_REG_ENTRY_OFFSET(
6797 							gc_cp_reg_list_11[reg]));
6798 				}
6799 				index += reg_count;
6800 			}
6801 		}
6802 	}
6803 	soc21_grbm_select(adev, 0, 0, 0, 0);
6804 	mutex_unlock(&adev->srbm_mutex);
6805 	amdgpu_gfx_off_ctrl(adev, true);
6806 
6807 	/* dump gfx queue registers for all instances */
6808 	if (!adev->gfx.ip_dump_gfx_queues)
6809 		return;
6810 
6811 	index = 0;
6812 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6813 	amdgpu_gfx_off_ctrl(adev, false);
6814 	mutex_lock(&adev->srbm_mutex);
6815 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6816 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6817 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6818 				soc21_grbm_select(adev, i, j, k, 0);
6819 
6820 				for (reg = 0; reg < reg_count; reg++) {
6821 					adev->gfx.ip_dump_gfx_queues[index + reg] =
6822 						RREG32(SOC15_REG_ENTRY_OFFSET(
6823 							gc_gfx_queue_reg_list_11[reg]));
6824 				}
6825 				index += reg_count;
6826 			}
6827 		}
6828 	}
6829 	soc21_grbm_select(adev, 0, 0, 0, 0);
6830 	mutex_unlock(&adev->srbm_mutex);
6831 	amdgpu_gfx_off_ctrl(adev, true);
6832 }
6833 
6834 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
6835 {
6836 	/* Emit the cleaner shader */
6837 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
6838 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
6839 }
6840 
6841 static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring)
6842 {
6843 	amdgpu_gfx_profile_ring_begin_use(ring);
6844 
6845 	amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
6846 }
6847 
6848 static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring)
6849 {
6850 	amdgpu_gfx_profile_ring_end_use(ring);
6851 
6852 	amdgpu_gfx_enforce_isolation_ring_end_use(ring);
6853 }
6854 
6855 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6856 	.name = "gfx_v11_0",
6857 	.early_init = gfx_v11_0_early_init,
6858 	.late_init = gfx_v11_0_late_init,
6859 	.sw_init = gfx_v11_0_sw_init,
6860 	.sw_fini = gfx_v11_0_sw_fini,
6861 	.hw_init = gfx_v11_0_hw_init,
6862 	.hw_fini = gfx_v11_0_hw_fini,
6863 	.suspend = gfx_v11_0_suspend,
6864 	.resume = gfx_v11_0_resume,
6865 	.is_idle = gfx_v11_0_is_idle,
6866 	.wait_for_idle = gfx_v11_0_wait_for_idle,
6867 	.soft_reset = gfx_v11_0_soft_reset,
6868 	.check_soft_reset = gfx_v11_0_check_soft_reset,
6869 	.post_soft_reset = gfx_v11_0_post_soft_reset,
6870 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
6871 	.set_powergating_state = gfx_v11_0_set_powergating_state,
6872 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
6873 	.dump_ip_state = gfx_v11_ip_dump,
6874 	.print_ip_state = gfx_v11_ip_print,
6875 };
6876 
6877 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6878 	.type = AMDGPU_RING_TYPE_GFX,
6879 	.align_mask = 0xff,
6880 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6881 	.support_64bit_ptrs = true,
6882 	.secure_submission_supported = true,
6883 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6884 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6885 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6886 	.emit_frame_size = /* totally 247 maximum if 16 IBs */
6887 		5 + /* update_spm_vmid */
6888 		5 + /* COND_EXEC */
6889 		22 + /* SET_Q_PREEMPTION_MODE */
6890 		7 + /* PIPELINE_SYNC */
6891 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6892 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6893 		4 + /* VM_FLUSH */
6894 		8 + /* FENCE for VM_FLUSH */
6895 		20 + /* GDS switch */
6896 		5 + /* COND_EXEC */
6897 		7 + /* HDP_flush */
6898 		4 + /* VGT_flush */
6899 		31 + /*	DE_META */
6900 		3 + /* CNTX_CTRL */
6901 		5 + /* HDP_INVL */
6902 		22 + /* SET_Q_PREEMPTION_MODE */
6903 		8 + 8 + /* FENCE x2 */
6904 		8 + /* gfx_v11_0_emit_mem_sync */
6905 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
6906 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
6907 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6908 	.emit_fence = gfx_v11_0_ring_emit_fence,
6909 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6910 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6911 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6912 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6913 	.test_ring = gfx_v11_0_ring_test_ring,
6914 	.test_ib = gfx_v11_0_ring_test_ib,
6915 	.insert_nop = gfx_v11_ring_insert_nop,
6916 	.pad_ib = amdgpu_ring_generic_pad_ib,
6917 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6918 	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
6919 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6920 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
6921 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6922 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6923 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6924 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6925 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6926 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6927 	.reset = gfx_v11_0_reset_kgq,
6928 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
6929 	.begin_use = gfx_v11_0_ring_begin_use,
6930 	.end_use = gfx_v11_0_ring_end_use,
6931 };
6932 
6933 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6934 	.type = AMDGPU_RING_TYPE_COMPUTE,
6935 	.align_mask = 0xff,
6936 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6937 	.support_64bit_ptrs = true,
6938 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6939 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6940 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6941 	.emit_frame_size =
6942 		5 + /* update_spm_vmid */
6943 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6944 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6945 		5 + /* hdp invalidate */
6946 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6947 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6948 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6949 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6950 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6951 		8 + /* gfx_v11_0_emit_mem_sync */
6952 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
6953 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6954 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6955 	.emit_fence = gfx_v11_0_ring_emit_fence,
6956 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6957 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6958 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6959 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6960 	.test_ring = gfx_v11_0_ring_test_ring,
6961 	.test_ib = gfx_v11_0_ring_test_ib,
6962 	.insert_nop = gfx_v11_ring_insert_nop,
6963 	.pad_ib = amdgpu_ring_generic_pad_ib,
6964 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6965 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6966 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6967 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6968 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6969 	.reset = gfx_v11_0_reset_kcq,
6970 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
6971 	.begin_use = gfx_v11_0_ring_begin_use,
6972 	.end_use = gfx_v11_0_ring_end_use,
6973 };
6974 
6975 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6976 	.type = AMDGPU_RING_TYPE_KIQ,
6977 	.align_mask = 0xff,
6978 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6979 	.support_64bit_ptrs = true,
6980 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6981 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6982 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6983 	.emit_frame_size =
6984 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6985 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6986 		5 + /*hdp invalidate */
6987 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6988 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6989 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6990 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6991 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6992 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6993 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6994 	.test_ring = gfx_v11_0_ring_test_ring,
6995 	.test_ib = gfx_v11_0_ring_test_ib,
6996 	.insert_nop = amdgpu_ring_insert_nop,
6997 	.pad_ib = amdgpu_ring_generic_pad_ib,
6998 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
6999 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
7000 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
7001 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
7002 };
7003 
7004 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
7005 {
7006 	int i;
7007 
7008 	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
7009 
7010 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7011 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
7012 
7013 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
7014 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
7015 }
7016 
7017 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
7018 	.set = gfx_v11_0_set_eop_interrupt_state,
7019 	.process = gfx_v11_0_eop_irq,
7020 };
7021 
7022 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
7023 	.set = gfx_v11_0_set_priv_reg_fault_state,
7024 	.process = gfx_v11_0_priv_reg_irq,
7025 };
7026 
7027 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
7028 	.set = gfx_v11_0_set_bad_op_fault_state,
7029 	.process = gfx_v11_0_bad_op_irq,
7030 };
7031 
7032 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
7033 	.set = gfx_v11_0_set_priv_inst_fault_state,
7034 	.process = gfx_v11_0_priv_inst_irq,
7035 };
7036 
7037 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
7038 	.process = gfx_v11_0_rlc_gc_fed_irq,
7039 };
7040 
7041 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
7042 {
7043 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7044 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
7045 
7046 	adev->gfx.priv_reg_irq.num_types = 1;
7047 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
7048 
7049 	adev->gfx.bad_op_irq.num_types = 1;
7050 	adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
7051 
7052 	adev->gfx.priv_inst_irq.num_types = 1;
7053 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
7054 
7055 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
7056 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
7057 
7058 }
7059 
7060 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
7061 {
7062 	if (adev->flags & AMD_IS_APU)
7063 		adev->gfx.imu.mode = MISSION_MODE;
7064 	else
7065 		adev->gfx.imu.mode = DEBUG_MODE;
7066 
7067 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
7068 }
7069 
7070 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
7071 {
7072 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
7073 }
7074 
7075 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
7076 {
7077 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
7078 			    adev->gfx.config.max_sh_per_se *
7079 			    adev->gfx.config.max_shader_engines;
7080 
7081 	adev->gds.gds_size = 0x1000;
7082 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
7083 	adev->gds.gws_size = 64;
7084 	adev->gds.oa_size = 16;
7085 }
7086 
7087 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
7088 {
7089 	/* set gfx eng mqd */
7090 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
7091 		sizeof(struct v11_gfx_mqd);
7092 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
7093 		gfx_v11_0_gfx_mqd_init;
7094 	/* set compute eng mqd */
7095 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
7096 		sizeof(struct v11_compute_mqd);
7097 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
7098 		gfx_v11_0_compute_mqd_init;
7099 }
7100 
7101 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
7102 							  u32 bitmap)
7103 {
7104 	u32 data;
7105 
7106 	if (!bitmap)
7107 		return;
7108 
7109 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7110 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7111 
7112 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
7113 }
7114 
7115 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
7116 {
7117 	u32 data, wgp_bitmask;
7118 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
7119 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
7120 
7121 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7122 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7123 
7124 	wgp_bitmask =
7125 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
7126 
7127 	return (~data) & wgp_bitmask;
7128 }
7129 
7130 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
7131 {
7132 	u32 wgp_idx, wgp_active_bitmap;
7133 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
7134 
7135 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
7136 	cu_active_bitmap = 0;
7137 
7138 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
7139 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
7140 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
7141 		if (wgp_active_bitmap & (1 << wgp_idx))
7142 			cu_active_bitmap |= cu_bitmap_per_wgp;
7143 	}
7144 
7145 	return cu_active_bitmap;
7146 }
7147 
7148 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
7149 				 struct amdgpu_cu_info *cu_info)
7150 {
7151 	int i, j, k, counter, active_cu_number = 0;
7152 	u32 mask, bitmap;
7153 	unsigned disable_masks[8 * 2];
7154 
7155 	if (!adev || !cu_info)
7156 		return -EINVAL;
7157 
7158 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
7159 
7160 	mutex_lock(&adev->grbm_idx_mutex);
7161 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7162 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7163 			bitmap = i * adev->gfx.config.max_sh_per_se + j;
7164 			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
7165 				continue;
7166 			mask = 1;
7167 			counter = 0;
7168 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7169 			if (i < 8 && j < 2)
7170 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
7171 					adev, disable_masks[i * 2 + j]);
7172 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
7173 
7174 			/**
7175 			 * GFX11 could support more than 4 SEs, while the bitmap
7176 			 * in cu_info struct is 4x4 and ioctl interface struct
7177 			 * drm_amdgpu_info_device should keep stable.
7178 			 * So we use last two columns of bitmap to store cu mask for
7179 			 * SEs 4 to 7, the layout of the bitmap is as below:
7180 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
7181 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
7182 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
7183 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
7184 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
7185 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
7186 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
7187 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
7188 			 */
7189 			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
7190 
7191 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
7192 				if (bitmap & mask)
7193 					counter++;
7194 
7195 				mask <<= 1;
7196 			}
7197 			active_cu_number += counter;
7198 		}
7199 	}
7200 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7201 	mutex_unlock(&adev->grbm_idx_mutex);
7202 
7203 	cu_info->number = active_cu_number;
7204 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7205 
7206 	return 0;
7207 }
7208 
7209 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
7210 {
7211 	.type = AMD_IP_BLOCK_TYPE_GFX,
7212 	.major = 11,
7213 	.minor = 0,
7214 	.rev = 0,
7215 	.funcs = &gfx_v11_0_ip_funcs,
7216 };
7217