xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision ddef2cfbb8927f7236f1e0d12ffd35b5d576e300)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36 
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43 
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "gfx_v11_0_3.h"
50 #include "nbio_v4_3.h"
51 #include "mes_v11_0.h"
52 
53 #define GFX11_NUM_GFX_RINGS		1
54 #define GFX11_MEC_HPD_SIZE	2048
55 
56 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
57 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
58 
59 #define regCGTT_WD_CLK_CTRL		0x5086
60 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
61 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
63 #define regPC_CONFIG_CNTL_1		0x194d
64 #define regPC_CONFIG_CNTL_1_BASE_IDX	1
65 
66 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
84 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
86 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
87 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
88 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
91 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
92 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
94 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
95 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
96 
97 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
98 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
99 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
100 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
101 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
102 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
103 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
104 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
105 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
106 	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
107 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
108 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
109 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
110 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
111 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
112 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
113 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
114 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
115 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
116 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
117 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
118 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
119 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
120 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
121 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
122 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
123 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
124 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
125 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
126 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
127 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
128 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
129 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
130 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
131 	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
132 	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
133 	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
134 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
135 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
136 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
137 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
138 	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
139 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
140 	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
141 	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
142 	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
143 	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
144 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
145 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
146 	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
147 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
148 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
149 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
150 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
151 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
152 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
153 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
154 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
155 	/* cp header registers */
156 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
157 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
158 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
159 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
160 	/* SE status registers */
161 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
162 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
163 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
164 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
165 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
166 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
167 };
168 
169 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
170 	/* compute registers */
171 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
172 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
173 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
174 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
175 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
176 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
177 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
178 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
179 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
180 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
181 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
182 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
183 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
184 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
185 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
186 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
187 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
188 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
189 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
190 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
191 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
192 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
193 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
194 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
195 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
196 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
197 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
198 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
199 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
200 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
201 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
202 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
203 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
204 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
205 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
206 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
207 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
208 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
209 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
210 };
211 
212 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
213 	/* gfx queue registers */
214 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
215 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
216 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
217 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
218 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
219 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
220 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
221 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
222 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
223 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
224 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
225 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
226 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
227 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
228 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
229 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
230 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
231 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
232 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
233 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
234 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
235 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
236 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
237 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
238 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
239 };
240 
241 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
243 };
244 
245 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
246 {
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
248 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
251 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
252 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
253 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
255 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
256 };
257 
258 #define DEFAULT_SH_MEM_CONFIG \
259 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
260 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
261 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
262 
263 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
264 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
265 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
266 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
267 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
268 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
269 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
270 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
271                                  struct amdgpu_cu_info *cu_info);
272 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
273 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
274 				   u32 sh_num, u32 instance, int xcc_id);
275 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
276 
277 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
278 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
279 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
280 				     uint32_t val);
281 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
282 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
283 					   uint16_t pasid, uint32_t flush_type,
284 					   bool all_hub, uint8_t dst_sel);
285 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
286 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
287 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
288 				      bool enable);
289 
290 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
291 {
292 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
293 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
294 			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
295 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
296 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
297 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
298 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
299 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
300 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
301 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
302 }
303 
304 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
305 				 struct amdgpu_ring *ring)
306 {
307 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
308 	uint64_t wptr_addr = ring->wptr_gpu_addr;
309 	uint32_t me = 0, eng_sel = 0;
310 
311 	switch (ring->funcs->type) {
312 	case AMDGPU_RING_TYPE_COMPUTE:
313 		me = 1;
314 		eng_sel = 0;
315 		break;
316 	case AMDGPU_RING_TYPE_GFX:
317 		me = 0;
318 		eng_sel = 4;
319 		break;
320 	case AMDGPU_RING_TYPE_MES:
321 		me = 2;
322 		eng_sel = 5;
323 		break;
324 	default:
325 		WARN_ON(1);
326 	}
327 
328 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
329 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
330 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
331 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
332 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
333 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
334 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
335 			  PACKET3_MAP_QUEUES_ME((me)) |
336 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
337 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
338 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
339 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
340 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
341 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
342 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
343 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
344 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
345 }
346 
347 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
348 				   struct amdgpu_ring *ring,
349 				   enum amdgpu_unmap_queues_action action,
350 				   u64 gpu_addr, u64 seq)
351 {
352 	struct amdgpu_device *adev = kiq_ring->adev;
353 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
354 
355 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
356 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
357 		return;
358 	}
359 
360 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
361 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
362 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
363 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
364 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
365 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
366 	amdgpu_ring_write(kiq_ring,
367 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
368 
369 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
370 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
371 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
372 		amdgpu_ring_write(kiq_ring, seq);
373 	} else {
374 		amdgpu_ring_write(kiq_ring, 0);
375 		amdgpu_ring_write(kiq_ring, 0);
376 		amdgpu_ring_write(kiq_ring, 0);
377 	}
378 }
379 
380 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
381 				   struct amdgpu_ring *ring,
382 				   u64 addr,
383 				   u64 seq)
384 {
385 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
386 
387 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
388 	amdgpu_ring_write(kiq_ring,
389 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
390 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
391 			  PACKET3_QUERY_STATUS_COMMAND(2));
392 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
393 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
394 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
395 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
396 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
397 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
398 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
399 }
400 
401 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
402 				uint16_t pasid, uint32_t flush_type,
403 				bool all_hub)
404 {
405 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
406 }
407 
408 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
409 	.kiq_set_resources = gfx11_kiq_set_resources,
410 	.kiq_map_queues = gfx11_kiq_map_queues,
411 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
412 	.kiq_query_status = gfx11_kiq_query_status,
413 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
414 	.set_resources_size = 8,
415 	.map_queues_size = 7,
416 	.unmap_queues_size = 6,
417 	.query_status_size = 7,
418 	.invalidate_tlbs_size = 2,
419 };
420 
421 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
422 {
423 	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
424 }
425 
426 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
427 {
428 	if (amdgpu_sriov_vf(adev))
429 		return;
430 
431 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
432 	case IP_VERSION(11, 0, 1):
433 	case IP_VERSION(11, 0, 4):
434 		soc15_program_register_sequence(adev,
435 						golden_settings_gc_11_0_1,
436 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
437 		break;
438 	default:
439 		break;
440 	}
441 	soc15_program_register_sequence(adev,
442 					golden_settings_gc_11_0,
443 					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
444 
445 }
446 
447 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
448 				       bool wc, uint32_t reg, uint32_t val)
449 {
450 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
451 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
452 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
453 	amdgpu_ring_write(ring, reg);
454 	amdgpu_ring_write(ring, 0);
455 	amdgpu_ring_write(ring, val);
456 }
457 
458 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
459 				  int mem_space, int opt, uint32_t addr0,
460 				  uint32_t addr1, uint32_t ref, uint32_t mask,
461 				  uint32_t inv)
462 {
463 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
464 	amdgpu_ring_write(ring,
465 			  /* memory (1) or register (0) */
466 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
467 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
468 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
469 			   WAIT_REG_MEM_ENGINE(eng_sel)));
470 
471 	if (mem_space)
472 		BUG_ON(addr0 & 0x3); /* Dword align */
473 	amdgpu_ring_write(ring, addr0);
474 	amdgpu_ring_write(ring, addr1);
475 	amdgpu_ring_write(ring, ref);
476 	amdgpu_ring_write(ring, mask);
477 	amdgpu_ring_write(ring, inv); /* poll interval */
478 }
479 
480 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
481 {
482 	struct amdgpu_device *adev = ring->adev;
483 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
484 	uint32_t tmp = 0;
485 	unsigned i;
486 	int r;
487 
488 	WREG32(scratch, 0xCAFEDEAD);
489 	r = amdgpu_ring_alloc(ring, 5);
490 	if (r) {
491 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
492 			  ring->idx, r);
493 		return r;
494 	}
495 
496 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
497 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
498 	} else {
499 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
500 		amdgpu_ring_write(ring, scratch -
501 				  PACKET3_SET_UCONFIG_REG_START);
502 		amdgpu_ring_write(ring, 0xDEADBEEF);
503 	}
504 	amdgpu_ring_commit(ring);
505 
506 	for (i = 0; i < adev->usec_timeout; i++) {
507 		tmp = RREG32(scratch);
508 		if (tmp == 0xDEADBEEF)
509 			break;
510 		if (amdgpu_emu_mode == 1)
511 			msleep(1);
512 		else
513 			udelay(1);
514 	}
515 
516 	if (i >= adev->usec_timeout)
517 		r = -ETIMEDOUT;
518 	return r;
519 }
520 
521 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
522 {
523 	struct amdgpu_device *adev = ring->adev;
524 	struct amdgpu_ib ib;
525 	struct dma_fence *f = NULL;
526 	unsigned index;
527 	uint64_t gpu_addr;
528 	volatile uint32_t *cpu_ptr;
529 	long r;
530 
531 	/* MES KIQ fw hasn't indirect buffer support for now */
532 	if (adev->enable_mes_kiq &&
533 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
534 		return 0;
535 
536 	memset(&ib, 0, sizeof(ib));
537 
538 	if (ring->is_mes_queue) {
539 		uint32_t padding, offset;
540 
541 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
542 		padding = amdgpu_mes_ctx_get_offs(ring,
543 						  AMDGPU_MES_CTX_PADDING_OFFS);
544 
545 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
546 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
547 
548 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
549 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
550 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
551 	} else {
552 		r = amdgpu_device_wb_get(adev, &index);
553 		if (r)
554 			return r;
555 
556 		gpu_addr = adev->wb.gpu_addr + (index * 4);
557 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
558 		cpu_ptr = &adev->wb.wb[index];
559 
560 		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
561 		if (r) {
562 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
563 			goto err1;
564 		}
565 	}
566 
567 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
568 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
569 	ib.ptr[2] = lower_32_bits(gpu_addr);
570 	ib.ptr[3] = upper_32_bits(gpu_addr);
571 	ib.ptr[4] = 0xDEADBEEF;
572 	ib.length_dw = 5;
573 
574 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
575 	if (r)
576 		goto err2;
577 
578 	r = dma_fence_wait_timeout(f, false, timeout);
579 	if (r == 0) {
580 		r = -ETIMEDOUT;
581 		goto err2;
582 	} else if (r < 0) {
583 		goto err2;
584 	}
585 
586 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
587 		r = 0;
588 	else
589 		r = -EINVAL;
590 err2:
591 	if (!ring->is_mes_queue)
592 		amdgpu_ib_free(adev, &ib, NULL);
593 	dma_fence_put(f);
594 err1:
595 	if (!ring->is_mes_queue)
596 		amdgpu_device_wb_free(adev, index);
597 	return r;
598 }
599 
600 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
601 {
602 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
603 	amdgpu_ucode_release(&adev->gfx.me_fw);
604 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
605 	amdgpu_ucode_release(&adev->gfx.mec_fw);
606 
607 	kfree(adev->gfx.rlc.register_list_format);
608 }
609 
610 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
611 {
612 	const struct psp_firmware_header_v1_0 *toc_hdr;
613 	int err = 0;
614 	char fw_name[40];
615 
616 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_toc.bin", ucode_prefix);
617 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, fw_name);
618 	if (err)
619 		goto out;
620 
621 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
622 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
623 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
624 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
625 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
626 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
627 	return 0;
628 out:
629 	amdgpu_ucode_release(&adev->psp.toc_fw);
630 	return err;
631 }
632 
633 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
634 {
635 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
636 	case IP_VERSION(11, 0, 0):
637 	case IP_VERSION(11, 0, 2):
638 	case IP_VERSION(11, 0, 3):
639 		if ((adev->gfx.me_fw_version >= 1505) &&
640 		    (adev->gfx.pfp_fw_version >= 1600) &&
641 		    (adev->gfx.mec_fw_version >= 512)) {
642 			if (amdgpu_sriov_vf(adev))
643 				adev->gfx.cp_gfx_shadow = true;
644 			else
645 				adev->gfx.cp_gfx_shadow = false;
646 		}
647 		break;
648 	default:
649 		adev->gfx.cp_gfx_shadow = false;
650 		break;
651 	}
652 }
653 
654 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
655 {
656 	char fw_name[40];
657 	char ucode_prefix[25];
658 	int err;
659 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
660 	uint16_t version_major;
661 	uint16_t version_minor;
662 
663 	DRM_DEBUG("\n");
664 
665 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
666 
667 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", ucode_prefix);
668 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, fw_name);
669 	if (err)
670 		goto out;
671 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
672 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
673 				(union amdgpu_firmware_header *)
674 				adev->gfx.pfp_fw->data, 2, 0);
675 	if (adev->gfx.rs64_enable) {
676 		dev_info(adev->dev, "CP RS64 enable\n");
677 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
678 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
679 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
680 	} else {
681 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
682 	}
683 
684 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", ucode_prefix);
685 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, fw_name);
686 	if (err)
687 		goto out;
688 	if (adev->gfx.rs64_enable) {
689 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
690 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
691 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
692 	} else {
693 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
694 	}
695 
696 	if (!amdgpu_sriov_vf(adev)) {
697 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
698 		    adev->pdev->revision == 0xCE)
699 			snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin");
700 		else
701 			snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
702 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
703 		if (err)
704 			goto out;
705 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
706 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
707 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
708 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
709 		if (err)
710 			goto out;
711 	}
712 
713 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", ucode_prefix);
714 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, fw_name);
715 	if (err)
716 		goto out;
717 	if (adev->gfx.rs64_enable) {
718 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
719 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
720 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
721 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
722 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
723 	} else {
724 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
725 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
726 	}
727 
728 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
729 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
730 
731 	/* only one MEC for gfx 11.0.0. */
732 	adev->gfx.mec2_fw = NULL;
733 
734 	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
735 
736 	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
737 		err = adev->gfx.imu.funcs->init_microcode(adev);
738 		if (err)
739 			DRM_ERROR("Failed to init imu firmware!\n");
740 		return err;
741 	}
742 
743 out:
744 	if (err) {
745 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
746 		amdgpu_ucode_release(&adev->gfx.me_fw);
747 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
748 		amdgpu_ucode_release(&adev->gfx.mec_fw);
749 	}
750 
751 	return err;
752 }
753 
754 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
755 {
756 	u32 count = 0;
757 	const struct cs_section_def *sect = NULL;
758 	const struct cs_extent_def *ext = NULL;
759 
760 	/* begin clear state */
761 	count += 2;
762 	/* context control state */
763 	count += 3;
764 
765 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
766 		for (ext = sect->section; ext->extent != NULL; ++ext) {
767 			if (sect->id == SECT_CONTEXT)
768 				count += 2 + ext->reg_count;
769 			else
770 				return 0;
771 		}
772 	}
773 
774 	/* set PA_SC_TILE_STEERING_OVERRIDE */
775 	count += 3;
776 	/* end clear state */
777 	count += 2;
778 	/* clear state */
779 	count += 2;
780 
781 	return count;
782 }
783 
784 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
785 				    volatile u32 *buffer)
786 {
787 	u32 count = 0, i;
788 	const struct cs_section_def *sect = NULL;
789 	const struct cs_extent_def *ext = NULL;
790 	int ctx_reg_offset;
791 
792 	if (adev->gfx.rlc.cs_data == NULL)
793 		return;
794 	if (buffer == NULL)
795 		return;
796 
797 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
798 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
799 
800 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
801 	buffer[count++] = cpu_to_le32(0x80000000);
802 	buffer[count++] = cpu_to_le32(0x80000000);
803 
804 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
805 		for (ext = sect->section; ext->extent != NULL; ++ext) {
806 			if (sect->id == SECT_CONTEXT) {
807 				buffer[count++] =
808 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
809 				buffer[count++] = cpu_to_le32(ext->reg_index -
810 						PACKET3_SET_CONTEXT_REG_START);
811 				for (i = 0; i < ext->reg_count; i++)
812 					buffer[count++] = cpu_to_le32(ext->extent[i]);
813 			} else {
814 				return;
815 			}
816 		}
817 	}
818 
819 	ctx_reg_offset =
820 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
821 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
822 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
823 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
824 
825 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
826 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
827 
828 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
829 	buffer[count++] = cpu_to_le32(0);
830 }
831 
832 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
833 {
834 	/* clear state block */
835 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
836 			&adev->gfx.rlc.clear_state_gpu_addr,
837 			(void **)&adev->gfx.rlc.cs_ptr);
838 
839 	/* jump table block */
840 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
841 			&adev->gfx.rlc.cp_table_gpu_addr,
842 			(void **)&adev->gfx.rlc.cp_table_ptr);
843 }
844 
845 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
846 {
847 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
848 
849 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
850 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
851 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
852 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
853 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
854 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
855 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
856 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
857 	adev->gfx.rlc.rlcg_reg_access_supported = true;
858 }
859 
860 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
861 {
862 	const struct cs_section_def *cs_data;
863 	int r;
864 
865 	adev->gfx.rlc.cs_data = gfx11_cs_data;
866 
867 	cs_data = adev->gfx.rlc.cs_data;
868 
869 	if (cs_data) {
870 		/* init clear state block */
871 		r = amdgpu_gfx_rlc_init_csb(adev);
872 		if (r)
873 			return r;
874 	}
875 
876 	/* init spm vmid with 0xf */
877 	if (adev->gfx.rlc.funcs->update_spm_vmid)
878 		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
879 
880 	return 0;
881 }
882 
883 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
884 {
885 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
886 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
887 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
888 }
889 
890 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
891 {
892 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
893 
894 	amdgpu_gfx_graphics_queue_acquire(adev);
895 }
896 
897 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
898 {
899 	int r;
900 	u32 *hpd;
901 	size_t mec_hpd_size;
902 
903 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
904 
905 	/* take ownership of the relevant compute queues */
906 	amdgpu_gfx_compute_queue_acquire(adev);
907 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
908 
909 	if (mec_hpd_size) {
910 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
911 					      AMDGPU_GEM_DOMAIN_GTT,
912 					      &adev->gfx.mec.hpd_eop_obj,
913 					      &adev->gfx.mec.hpd_eop_gpu_addr,
914 					      (void **)&hpd);
915 		if (r) {
916 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
917 			gfx_v11_0_mec_fini(adev);
918 			return r;
919 		}
920 
921 		memset(hpd, 0, mec_hpd_size);
922 
923 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
924 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
925 	}
926 
927 	return 0;
928 }
929 
930 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
931 {
932 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
933 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
934 		(address << SQ_IND_INDEX__INDEX__SHIFT));
935 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
936 }
937 
938 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
939 			   uint32_t thread, uint32_t regno,
940 			   uint32_t num, uint32_t *out)
941 {
942 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
943 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
944 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
945 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
946 		(SQ_IND_INDEX__AUTO_INCR_MASK));
947 	while (num--)
948 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
949 }
950 
951 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
952 {
953 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
954 	 * field when performing a select_se_sh so it should be
955 	 * zero here */
956 	WARN_ON(simd != 0);
957 
958 	/* type 3 wave data */
959 	dst[(*no_fields)++] = 3;
960 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
961 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
962 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
963 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
964 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
965 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
966 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
967 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
968 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
969 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
970 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
971 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
972 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
973 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
974 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
975 }
976 
977 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
978 				     uint32_t wave, uint32_t start,
979 				     uint32_t size, uint32_t *dst)
980 {
981 	WARN_ON(simd != 0);
982 
983 	wave_read_regs(
984 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
985 		dst);
986 }
987 
988 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
989 				      uint32_t wave, uint32_t thread,
990 				      uint32_t start, uint32_t size,
991 				      uint32_t *dst)
992 {
993 	wave_read_regs(
994 		adev, wave, thread,
995 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
996 }
997 
998 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
999 					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1000 {
1001 	soc21_grbm_select(adev, me, pipe, q, vm);
1002 }
1003 
1004 /* all sizes are in bytes */
1005 #define MQD_SHADOW_BASE_SIZE      73728
1006 #define MQD_SHADOW_BASE_ALIGNMENT 256
1007 #define MQD_FWWORKAREA_SIZE       484
1008 #define MQD_FWWORKAREA_ALIGNMENT  256
1009 
1010 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
1011 					 struct amdgpu_gfx_shadow_info *shadow_info)
1012 {
1013 	if (adev->gfx.cp_gfx_shadow) {
1014 		shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
1015 		shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
1016 		shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
1017 		shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
1018 		return 0;
1019 	} else {
1020 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
1021 		return -ENOTSUPP;
1022 	}
1023 }
1024 
1025 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
1026 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
1027 	.select_se_sh = &gfx_v11_0_select_se_sh,
1028 	.read_wave_data = &gfx_v11_0_read_wave_data,
1029 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
1030 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
1031 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1032 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1033 	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
1034 };
1035 
1036 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1037 {
1038 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1039 	case IP_VERSION(11, 0, 0):
1040 	case IP_VERSION(11, 0, 2):
1041 		adev->gfx.config.max_hw_contexts = 8;
1042 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1043 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1044 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1045 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1046 		break;
1047 	case IP_VERSION(11, 0, 3):
1048 		adev->gfx.ras = &gfx_v11_0_3_ras;
1049 		adev->gfx.config.max_hw_contexts = 8;
1050 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1051 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1052 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1053 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1054 		break;
1055 	case IP_VERSION(11, 0, 1):
1056 	case IP_VERSION(11, 0, 4):
1057 	case IP_VERSION(11, 5, 0):
1058 	case IP_VERSION(11, 5, 1):
1059 		adev->gfx.config.max_hw_contexts = 8;
1060 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1061 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1062 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1063 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1064 		break;
1065 	default:
1066 		BUG();
1067 		break;
1068 	}
1069 
1070 	return 0;
1071 }
1072 
1073 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1074 				   int me, int pipe, int queue)
1075 {
1076 	int r;
1077 	struct amdgpu_ring *ring;
1078 	unsigned int irq_type;
1079 
1080 	ring = &adev->gfx.gfx_ring[ring_id];
1081 
1082 	ring->me = me;
1083 	ring->pipe = pipe;
1084 	ring->queue = queue;
1085 
1086 	ring->ring_obj = NULL;
1087 	ring->use_doorbell = true;
1088 
1089 	if (!ring_id)
1090 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1091 	else
1092 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1093 	ring->vm_hub = AMDGPU_GFXHUB(0);
1094 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1095 
1096 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1097 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1098 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
1099 	if (r)
1100 		return r;
1101 	return 0;
1102 }
1103 
1104 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1105 				       int mec, int pipe, int queue)
1106 {
1107 	int r;
1108 	unsigned irq_type;
1109 	struct amdgpu_ring *ring;
1110 	unsigned int hw_prio;
1111 
1112 	ring = &adev->gfx.compute_ring[ring_id];
1113 
1114 	/* mec0 is me1 */
1115 	ring->me = mec + 1;
1116 	ring->pipe = pipe;
1117 	ring->queue = queue;
1118 
1119 	ring->ring_obj = NULL;
1120 	ring->use_doorbell = true;
1121 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1122 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1123 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1124 	ring->vm_hub = AMDGPU_GFXHUB(0);
1125 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1126 
1127 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1128 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1129 		+ ring->pipe;
1130 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1131 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1132 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1133 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1134 			     hw_prio, NULL);
1135 	if (r)
1136 		return r;
1137 
1138 	return 0;
1139 }
1140 
1141 static struct {
1142 	SOC21_FIRMWARE_ID	id;
1143 	unsigned int		offset;
1144 	unsigned int		size;
1145 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1146 
1147 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1148 {
1149 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1150 
1151 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1152 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1153 		rlc_autoload_info[ucode->id].id = ucode->id;
1154 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1155 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1156 
1157 		ucode++;
1158 	}
1159 }
1160 
1161 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1162 {
1163 	uint32_t total_size = 0;
1164 	SOC21_FIRMWARE_ID id;
1165 
1166 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1167 
1168 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1169 		total_size += rlc_autoload_info[id].size;
1170 
1171 	/* In case the offset in rlc toc ucode is aligned */
1172 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1173 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1174 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1175 
1176 	return total_size;
1177 }
1178 
1179 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1180 {
1181 	int r;
1182 	uint32_t total_size;
1183 
1184 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1185 
1186 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1187 				      AMDGPU_GEM_DOMAIN_VRAM |
1188 				      AMDGPU_GEM_DOMAIN_GTT,
1189 				      &adev->gfx.rlc.rlc_autoload_bo,
1190 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
1191 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1192 
1193 	if (r) {
1194 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1195 		return r;
1196 	}
1197 
1198 	return 0;
1199 }
1200 
1201 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1202 					      SOC21_FIRMWARE_ID id,
1203 			    		      const void *fw_data,
1204 					      uint32_t fw_size,
1205 					      uint32_t *fw_autoload_mask)
1206 {
1207 	uint32_t toc_offset;
1208 	uint32_t toc_fw_size;
1209 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1210 
1211 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1212 		return;
1213 
1214 	toc_offset = rlc_autoload_info[id].offset;
1215 	toc_fw_size = rlc_autoload_info[id].size;
1216 
1217 	if (fw_size == 0)
1218 		fw_size = toc_fw_size;
1219 
1220 	if (fw_size > toc_fw_size)
1221 		fw_size = toc_fw_size;
1222 
1223 	memcpy(ptr + toc_offset, fw_data, fw_size);
1224 
1225 	if (fw_size < toc_fw_size)
1226 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1227 
1228 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1229 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1230 }
1231 
1232 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1233 							uint32_t *fw_autoload_mask)
1234 {
1235 	void *data;
1236 	uint32_t size;
1237 	uint64_t *toc_ptr;
1238 
1239 	*(uint64_t *)fw_autoload_mask |= 0x1;
1240 
1241 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1242 
1243 	data = adev->psp.toc.start_addr;
1244 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1245 
1246 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1247 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1248 
1249 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1250 					data, size, fw_autoload_mask);
1251 }
1252 
1253 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1254 							uint32_t *fw_autoload_mask)
1255 {
1256 	const __le32 *fw_data;
1257 	uint32_t fw_size;
1258 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1259 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1260 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1261 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1262 	uint16_t version_major, version_minor;
1263 
1264 	if (adev->gfx.rs64_enable) {
1265 		/* pfp ucode */
1266 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1267 			adev->gfx.pfp_fw->data;
1268 		/* instruction */
1269 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1270 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1271 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1272 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1273 						fw_data, fw_size, fw_autoload_mask);
1274 		/* data */
1275 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1276 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1277 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1278 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1279 						fw_data, fw_size, fw_autoload_mask);
1280 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1281 						fw_data, fw_size, fw_autoload_mask);
1282 		/* me ucode */
1283 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1284 			adev->gfx.me_fw->data;
1285 		/* instruction */
1286 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1287 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1288 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1289 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1290 						fw_data, fw_size, fw_autoload_mask);
1291 		/* data */
1292 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1293 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1294 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1295 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1296 						fw_data, fw_size, fw_autoload_mask);
1297 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1298 						fw_data, fw_size, fw_autoload_mask);
1299 		/* mec ucode */
1300 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1301 			adev->gfx.mec_fw->data;
1302 		/* instruction */
1303 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1304 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1305 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1306 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1307 						fw_data, fw_size, fw_autoload_mask);
1308 		/* data */
1309 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1310 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1311 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1312 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1313 						fw_data, fw_size, fw_autoload_mask);
1314 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1315 						fw_data, fw_size, fw_autoload_mask);
1316 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1317 						fw_data, fw_size, fw_autoload_mask);
1318 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1319 						fw_data, fw_size, fw_autoload_mask);
1320 	} else {
1321 		/* pfp ucode */
1322 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1323 			adev->gfx.pfp_fw->data;
1324 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1325 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1326 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1327 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1328 						fw_data, fw_size, fw_autoload_mask);
1329 
1330 		/* me ucode */
1331 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1332 			adev->gfx.me_fw->data;
1333 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1334 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1335 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1336 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1337 						fw_data, fw_size, fw_autoload_mask);
1338 
1339 		/* mec ucode */
1340 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1341 			adev->gfx.mec_fw->data;
1342 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1343 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1344 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1345 			cp_hdr->jt_size * 4;
1346 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1347 						fw_data, fw_size, fw_autoload_mask);
1348 	}
1349 
1350 	/* rlc ucode */
1351 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1352 		adev->gfx.rlc_fw->data;
1353 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1354 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1355 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1356 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1357 					fw_data, fw_size, fw_autoload_mask);
1358 
1359 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1360 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1361 	if (version_major == 2) {
1362 		if (version_minor >= 2) {
1363 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1364 
1365 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1366 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1367 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1368 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1369 					fw_data, fw_size, fw_autoload_mask);
1370 
1371 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1372 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1373 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1374 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1375 					fw_data, fw_size, fw_autoload_mask);
1376 		}
1377 	}
1378 }
1379 
1380 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1381 							uint32_t *fw_autoload_mask)
1382 {
1383 	const __le32 *fw_data;
1384 	uint32_t fw_size;
1385 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1386 
1387 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1388 		adev->sdma.instance[0].fw->data;
1389 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1390 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1391 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1392 
1393 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1394 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1395 
1396 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1397 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1398 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1399 
1400 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1401 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1402 }
1403 
1404 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1405 							uint32_t *fw_autoload_mask)
1406 {
1407 	const __le32 *fw_data;
1408 	unsigned fw_size;
1409 	const struct mes_firmware_header_v1_0 *mes_hdr;
1410 	int pipe, ucode_id, data_id;
1411 
1412 	for (pipe = 0; pipe < 2; pipe++) {
1413 		if (pipe==0) {
1414 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1415 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1416 		} else {
1417 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1418 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1419 		}
1420 
1421 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1422 			adev->mes.fw[pipe]->data;
1423 
1424 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1425 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1426 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1427 
1428 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1429 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1430 
1431 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1432 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1433 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1434 
1435 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1436 				data_id, fw_data, fw_size, fw_autoload_mask);
1437 	}
1438 }
1439 
1440 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1441 {
1442 	uint32_t rlc_g_offset, rlc_g_size;
1443 	uint64_t gpu_addr;
1444 	uint32_t autoload_fw_id[2];
1445 
1446 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1447 
1448 	/* RLC autoload sequence 2: copy ucode */
1449 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1450 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1451 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1452 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1453 
1454 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1455 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1456 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1457 
1458 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1459 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1460 
1461 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1462 
1463 	/* RLC autoload sequence 3: load IMU fw */
1464 	if (adev->gfx.imu.funcs->load_microcode)
1465 		adev->gfx.imu.funcs->load_microcode(adev);
1466 	/* RLC autoload sequence 4 init IMU fw */
1467 	if (adev->gfx.imu.funcs->setup_imu)
1468 		adev->gfx.imu.funcs->setup_imu(adev);
1469 	if (adev->gfx.imu.funcs->start_imu)
1470 		adev->gfx.imu.funcs->start_imu(adev);
1471 
1472 	/* RLC autoload sequence 5 disable gpa mode */
1473 	gfx_v11_0_disable_gpa_mode(adev);
1474 
1475 	return 0;
1476 }
1477 
1478 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
1479 {
1480 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
1481 	uint32_t *ptr;
1482 	uint32_t inst;
1483 
1484 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1485 	if (ptr == NULL) {
1486 		DRM_ERROR("Failed to allocate memory for IP Dump\n");
1487 		adev->gfx.ip_dump_core = NULL;
1488 	} else {
1489 		adev->gfx.ip_dump_core = ptr;
1490 	}
1491 
1492 	/* Allocate memory for compute queue registers for all the instances */
1493 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
1494 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1495 		adev->gfx.mec.num_queue_per_pipe;
1496 
1497 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1498 	if (ptr == NULL) {
1499 		DRM_ERROR("Failed to allocate memory for GFX CP IP Dump\n");
1500 		adev->gfx.ip_dump_cp_queues = NULL;
1501 	} else {
1502 		adev->gfx.ip_dump_cp_queues = ptr;
1503 	}
1504 
1505 	/* Allocate memory for gfx queue registers for all the instances */
1506 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
1507 	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1508 		adev->gfx.me.num_queue_per_pipe;
1509 
1510 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1511 	if (ptr == NULL) {
1512 		DRM_ERROR("Failed to allocate memory for GFX CP IP Dump\n");
1513 		adev->gfx.ip_dump_gfx_queues = NULL;
1514 	} else {
1515 		adev->gfx.ip_dump_gfx_queues = ptr;
1516 	}
1517 }
1518 
1519 static int gfx_v11_0_sw_init(void *handle)
1520 {
1521 	int i, j, k, r, ring_id = 0;
1522 	int xcc_id = 0;
1523 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1524 
1525 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1526 	case IP_VERSION(11, 0, 0):
1527 	case IP_VERSION(11, 0, 2):
1528 	case IP_VERSION(11, 0, 3):
1529 		adev->gfx.me.num_me = 1;
1530 		adev->gfx.me.num_pipe_per_me = 1;
1531 		adev->gfx.me.num_queue_per_pipe = 1;
1532 		adev->gfx.mec.num_mec = 2;
1533 		adev->gfx.mec.num_pipe_per_mec = 4;
1534 		adev->gfx.mec.num_queue_per_pipe = 4;
1535 		break;
1536 	case IP_VERSION(11, 0, 1):
1537 	case IP_VERSION(11, 0, 4):
1538 	case IP_VERSION(11, 5, 0):
1539 	case IP_VERSION(11, 5, 1):
1540 		adev->gfx.me.num_me = 1;
1541 		adev->gfx.me.num_pipe_per_me = 1;
1542 		adev->gfx.me.num_queue_per_pipe = 1;
1543 		adev->gfx.mec.num_mec = 1;
1544 		adev->gfx.mec.num_pipe_per_mec = 4;
1545 		adev->gfx.mec.num_queue_per_pipe = 4;
1546 		break;
1547 	default:
1548 		adev->gfx.me.num_me = 1;
1549 		adev->gfx.me.num_pipe_per_me = 1;
1550 		adev->gfx.me.num_queue_per_pipe = 1;
1551 		adev->gfx.mec.num_mec = 1;
1552 		adev->gfx.mec.num_pipe_per_mec = 4;
1553 		adev->gfx.mec.num_queue_per_pipe = 8;
1554 		break;
1555 	}
1556 
1557 	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1558 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1559 	    amdgpu_sriov_is_pp_one_vf(adev))
1560 		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1561 
1562 	/* EOP Event */
1563 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1564 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1565 			      &adev->gfx.eop_irq);
1566 	if (r)
1567 		return r;
1568 
1569 	/* Privileged reg */
1570 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1571 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1572 			      &adev->gfx.priv_reg_irq);
1573 	if (r)
1574 		return r;
1575 
1576 	/* Privileged inst */
1577 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1578 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1579 			      &adev->gfx.priv_inst_irq);
1580 	if (r)
1581 		return r;
1582 
1583 	/* FED error */
1584 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1585 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1586 				  &adev->gfx.rlc_gc_fed_irq);
1587 	if (r)
1588 		return r;
1589 
1590 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1591 
1592 	gfx_v11_0_me_init(adev);
1593 
1594 	r = gfx_v11_0_rlc_init(adev);
1595 	if (r) {
1596 		DRM_ERROR("Failed to init rlc BOs!\n");
1597 		return r;
1598 	}
1599 
1600 	r = gfx_v11_0_mec_init(adev);
1601 	if (r) {
1602 		DRM_ERROR("Failed to init MEC BOs!\n");
1603 		return r;
1604 	}
1605 
1606 	/* set up the gfx ring */
1607 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1608 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1609 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1610 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1611 					continue;
1612 
1613 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1614 							    i, k, j);
1615 				if (r)
1616 					return r;
1617 				ring_id++;
1618 			}
1619 		}
1620 	}
1621 
1622 	ring_id = 0;
1623 	/* set up the compute queues - allocate horizontally across pipes */
1624 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1625 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1626 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1627 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1628 								     k, j))
1629 					continue;
1630 
1631 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1632 								i, k, j);
1633 				if (r)
1634 					return r;
1635 
1636 				ring_id++;
1637 			}
1638 		}
1639 	}
1640 
1641 	if (!adev->enable_mes_kiq) {
1642 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1643 		if (r) {
1644 			DRM_ERROR("Failed to init KIQ BOs!\n");
1645 			return r;
1646 		}
1647 
1648 		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1649 		if (r)
1650 			return r;
1651 	}
1652 
1653 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1654 	if (r)
1655 		return r;
1656 
1657 	/* allocate visible FB for rlc auto-loading fw */
1658 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1659 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1660 		if (r)
1661 			return r;
1662 	}
1663 
1664 	r = gfx_v11_0_gpu_early_init(adev);
1665 	if (r)
1666 		return r;
1667 
1668 	if (amdgpu_gfx_ras_sw_init(adev)) {
1669 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1670 		return -EINVAL;
1671 	}
1672 
1673 	gfx_v11_0_alloc_ip_dump(adev);
1674 
1675 	return 0;
1676 }
1677 
1678 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1679 {
1680 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1681 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1682 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1683 
1684 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1685 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1686 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1687 }
1688 
1689 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1690 {
1691 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1692 			      &adev->gfx.me.me_fw_gpu_addr,
1693 			      (void **)&adev->gfx.me.me_fw_ptr);
1694 
1695 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1696 			       &adev->gfx.me.me_fw_data_gpu_addr,
1697 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1698 }
1699 
1700 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1701 {
1702 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1703 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1704 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1705 }
1706 
1707 static int gfx_v11_0_sw_fini(void *handle)
1708 {
1709 	int i;
1710 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1711 
1712 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1713 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1714 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1715 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1716 
1717 	amdgpu_gfx_mqd_sw_fini(adev, 0);
1718 
1719 	if (!adev->enable_mes_kiq) {
1720 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1721 		amdgpu_gfx_kiq_fini(adev, 0);
1722 	}
1723 
1724 	gfx_v11_0_pfp_fini(adev);
1725 	gfx_v11_0_me_fini(adev);
1726 	gfx_v11_0_rlc_fini(adev);
1727 	gfx_v11_0_mec_fini(adev);
1728 
1729 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1730 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1731 
1732 	gfx_v11_0_free_microcode(adev);
1733 
1734 	kfree(adev->gfx.ip_dump_core);
1735 	kfree(adev->gfx.ip_dump_cp_queues);
1736 	kfree(adev->gfx.ip_dump_gfx_queues);
1737 
1738 	return 0;
1739 }
1740 
1741 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1742 				   u32 sh_num, u32 instance, int xcc_id)
1743 {
1744 	u32 data;
1745 
1746 	if (instance == 0xffffffff)
1747 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1748 				     INSTANCE_BROADCAST_WRITES, 1);
1749 	else
1750 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1751 				     instance);
1752 
1753 	if (se_num == 0xffffffff)
1754 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1755 				     1);
1756 	else
1757 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1758 
1759 	if (sh_num == 0xffffffff)
1760 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1761 				     1);
1762 	else
1763 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1764 
1765 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1766 }
1767 
1768 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1769 {
1770 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1771 
1772 	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
1773 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1774 					   CC_GC_SA_UNIT_DISABLE,
1775 					   SA_DISABLE);
1776 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
1777 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1778 						 GC_USER_SA_UNIT_DISABLE,
1779 						 SA_DISABLE);
1780 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1781 					    adev->gfx.config.max_shader_engines);
1782 
1783 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1784 }
1785 
1786 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1787 {
1788 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1789 	u32 rb_mask;
1790 
1791 	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1792 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1793 					    CC_RB_BACKEND_DISABLE,
1794 					    BACKEND_DISABLE);
1795 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1796 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1797 						 GC_USER_RB_BACKEND_DISABLE,
1798 						 BACKEND_DISABLE);
1799 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1800 					    adev->gfx.config.max_shader_engines);
1801 
1802 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1803 }
1804 
1805 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1806 {
1807 	u32 rb_bitmap_width_per_sa;
1808 	u32 max_sa;
1809 	u32 active_sa_bitmap;
1810 	u32 global_active_rb_bitmap;
1811 	u32 active_rb_bitmap = 0;
1812 	u32 i;
1813 
1814 	/* query sa bitmap from SA_UNIT_DISABLE registers */
1815 	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
1816 	/* query rb bitmap from RB_BACKEND_DISABLE registers */
1817 	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
1818 
1819 	/* generate active rb bitmap according to active sa bitmap */
1820 	max_sa = adev->gfx.config.max_shader_engines *
1821 		 adev->gfx.config.max_sh_per_se;
1822 	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1823 				 adev->gfx.config.max_sh_per_se;
1824 	for (i = 0; i < max_sa; i++) {
1825 		if (active_sa_bitmap & (1 << i))
1826 			active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1827 	}
1828 
1829 	active_rb_bitmap &= global_active_rb_bitmap;
1830 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1831 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1832 }
1833 
1834 #define DEFAULT_SH_MEM_BASES	(0x6000)
1835 #define LDS_APP_BASE           0x1
1836 #define SCRATCH_APP_BASE       0x2
1837 
1838 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1839 {
1840 	int i;
1841 	uint32_t sh_mem_bases;
1842 	uint32_t data;
1843 
1844 	/*
1845 	 * Configure apertures:
1846 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1847 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1848 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1849 	 */
1850 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1851 			SCRATCH_APP_BASE;
1852 
1853 	mutex_lock(&adev->srbm_mutex);
1854 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1855 		soc21_grbm_select(adev, 0, 0, 0, i);
1856 		/* CP and shaders */
1857 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1858 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1859 
1860 		/* Enable trap for each kfd vmid. */
1861 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1862 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1863 		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1864 	}
1865 	soc21_grbm_select(adev, 0, 0, 0, 0);
1866 	mutex_unlock(&adev->srbm_mutex);
1867 
1868 	/* Initialize all compute VMIDs to have no GDS, GWS, or OA
1869 	   acccess. These should be enabled by FW for target VMIDs. */
1870 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1871 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1872 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1873 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1874 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1875 	}
1876 }
1877 
1878 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1879 {
1880 	int vmid;
1881 
1882 	/*
1883 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1884 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1885 	 * the driver can enable them for graphics. VMID0 should maintain
1886 	 * access so that HWS firmware can save/restore entries.
1887 	 */
1888 	for (vmid = 1; vmid < 16; vmid++) {
1889 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1890 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1891 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1892 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1893 	}
1894 }
1895 
1896 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1897 {
1898 	/* TODO: harvest feature to be added later. */
1899 }
1900 
1901 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1902 {
1903 	/* TCCs are global (not instanced). */
1904 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1905 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1906 
1907 	adev->gfx.config.tcc_disabled_mask =
1908 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
1909 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
1910 }
1911 
1912 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
1913 {
1914 	u32 tmp;
1915 	int i;
1916 
1917 	if (!amdgpu_sriov_vf(adev))
1918 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1919 
1920 	gfx_v11_0_setup_rb(adev);
1921 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
1922 	gfx_v11_0_get_tcc_info(adev);
1923 	adev->gfx.config.pa_sc_tile_steering_override = 0;
1924 
1925 	/* Set whether texture coordinate truncation is conformant. */
1926 	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
1927 	adev->gfx.config.ta_cntl2_truncate_coord_mode =
1928 		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
1929 
1930 	/* XXX SH_MEM regs */
1931 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1932 	mutex_lock(&adev->srbm_mutex);
1933 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1934 		soc21_grbm_select(adev, 0, 0, 0, i);
1935 		/* CP and shaders */
1936 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1937 		if (i != 0) {
1938 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1939 				(adev->gmc.private_aperture_start >> 48));
1940 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1941 				(adev->gmc.shared_aperture_start >> 48));
1942 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
1943 		}
1944 	}
1945 	soc21_grbm_select(adev, 0, 0, 0, 0);
1946 
1947 	mutex_unlock(&adev->srbm_mutex);
1948 
1949 	gfx_v11_0_init_compute_vmid(adev);
1950 	gfx_v11_0_init_gds_vmid(adev);
1951 }
1952 
1953 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1954 					       bool enable)
1955 {
1956 	u32 tmp;
1957 
1958 	if (amdgpu_sriov_vf(adev))
1959 		return;
1960 
1961 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0);
1962 
1963 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1964 			    enable ? 1 : 0);
1965 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1966 			    enable ? 1 : 0);
1967 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1968 			    enable ? 1 : 0);
1969 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1970 			    enable ? 1 : 0);
1971 
1972 	WREG32_SOC15(GC, 0, regCP_INT_CNTL_RING0, tmp);
1973 }
1974 
1975 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
1976 {
1977 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1978 
1979 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
1980 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1981 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
1982 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1983 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1984 
1985 	return 0;
1986 }
1987 
1988 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
1989 {
1990 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
1991 
1992 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1993 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
1994 }
1995 
1996 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
1997 {
1998 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1999 	udelay(50);
2000 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2001 	udelay(50);
2002 }
2003 
2004 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
2005 					     bool enable)
2006 {
2007 	uint32_t rlc_pg_cntl;
2008 
2009 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
2010 
2011 	if (!enable) {
2012 		/* RLC_PG_CNTL[23] = 0 (default)
2013 		 * RLC will wait for handshake acks with SMU
2014 		 * GFXOFF will be enabled
2015 		 * RLC_PG_CNTL[23] = 1
2016 		 * RLC will not issue any message to SMU
2017 		 * hence no handshake between SMU & RLC
2018 		 * GFXOFF will be disabled
2019 		 */
2020 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2021 	} else
2022 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2023 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
2024 }
2025 
2026 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
2027 {
2028 	/* TODO: enable rlc & smu handshake until smu
2029 	 * and gfxoff feature works as expected */
2030 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
2031 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
2032 
2033 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2034 	udelay(50);
2035 }
2036 
2037 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
2038 {
2039 	uint32_t tmp;
2040 
2041 	/* enable Save Restore Machine */
2042 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
2043 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2044 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
2045 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
2046 }
2047 
2048 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
2049 {
2050 	const struct rlc_firmware_header_v2_0 *hdr;
2051 	const __le32 *fw_data;
2052 	unsigned i, fw_size;
2053 
2054 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2055 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2056 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2057 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2058 
2059 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
2060 		     RLCG_UCODE_LOADING_START_ADDRESS);
2061 
2062 	for (i = 0; i < fw_size; i++)
2063 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
2064 			     le32_to_cpup(fw_data++));
2065 
2066 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2067 }
2068 
2069 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
2070 {
2071 	const struct rlc_firmware_header_v2_2 *hdr;
2072 	const __le32 *fw_data;
2073 	unsigned i, fw_size;
2074 	u32 tmp;
2075 
2076 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
2077 
2078 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2079 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
2080 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
2081 
2082 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
2083 
2084 	for (i = 0; i < fw_size; i++) {
2085 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2086 			msleep(1);
2087 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
2088 				le32_to_cpup(fw_data++));
2089 	}
2090 
2091 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2092 
2093 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2094 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
2095 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
2096 
2097 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
2098 	for (i = 0; i < fw_size; i++) {
2099 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2100 			msleep(1);
2101 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
2102 				le32_to_cpup(fw_data++));
2103 	}
2104 
2105 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2106 
2107 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
2108 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
2109 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
2110 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
2111 }
2112 
2113 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
2114 {
2115 	const struct rlc_firmware_header_v2_3 *hdr;
2116 	const __le32 *fw_data;
2117 	unsigned i, fw_size;
2118 	u32 tmp;
2119 
2120 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
2121 
2122 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2123 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
2124 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
2125 
2126 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
2127 
2128 	for (i = 0; i < fw_size; i++) {
2129 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2130 			msleep(1);
2131 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
2132 				le32_to_cpup(fw_data++));
2133 	}
2134 
2135 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2136 
2137 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2138 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2139 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2140 
2141 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2142 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2143 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2144 
2145 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2146 
2147 	for (i = 0; i < fw_size; i++) {
2148 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2149 			msleep(1);
2150 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2151 				le32_to_cpup(fw_data++));
2152 	}
2153 
2154 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2155 
2156 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2157 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2158 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2159 }
2160 
2161 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2162 {
2163 	const struct rlc_firmware_header_v2_0 *hdr;
2164 	uint16_t version_major;
2165 	uint16_t version_minor;
2166 
2167 	if (!adev->gfx.rlc_fw)
2168 		return -EINVAL;
2169 
2170 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2171 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2172 
2173 	version_major = le16_to_cpu(hdr->header.header_version_major);
2174 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2175 
2176 	if (version_major == 2) {
2177 		gfx_v11_0_load_rlcg_microcode(adev);
2178 		if (amdgpu_dpm == 1) {
2179 			if (version_minor >= 2)
2180 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2181 			if (version_minor == 3)
2182 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2183 		}
2184 
2185 		return 0;
2186 	}
2187 
2188 	return -EINVAL;
2189 }
2190 
2191 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2192 {
2193 	int r;
2194 
2195 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2196 		gfx_v11_0_init_csb(adev);
2197 
2198 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2199 			gfx_v11_0_rlc_enable_srm(adev);
2200 	} else {
2201 		if (amdgpu_sriov_vf(adev)) {
2202 			gfx_v11_0_init_csb(adev);
2203 			return 0;
2204 		}
2205 
2206 		adev->gfx.rlc.funcs->stop(adev);
2207 
2208 		/* disable CG */
2209 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2210 
2211 		/* disable PG */
2212 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2213 
2214 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2215 			/* legacy rlc firmware loading */
2216 			r = gfx_v11_0_rlc_load_microcode(adev);
2217 			if (r)
2218 				return r;
2219 		}
2220 
2221 		gfx_v11_0_init_csb(adev);
2222 
2223 		adev->gfx.rlc.funcs->start(adev);
2224 	}
2225 	return 0;
2226 }
2227 
2228 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2229 {
2230 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2231 	uint32_t tmp;
2232 	int i;
2233 
2234 	/* Trigger an invalidation of the L1 instruction caches */
2235 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2236 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2237 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2238 
2239 	/* Wait for invalidation complete */
2240 	for (i = 0; i < usec_timeout; i++) {
2241 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2242 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2243 					INVALIDATE_CACHE_COMPLETE))
2244 			break;
2245 		udelay(1);
2246 	}
2247 
2248 	if (i >= usec_timeout) {
2249 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2250 		return -EINVAL;
2251 	}
2252 
2253 	if (amdgpu_emu_mode == 1)
2254 		adev->hdp.funcs->flush_hdp(adev, NULL);
2255 
2256 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2257 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2258 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2259 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2260 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2261 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2262 
2263 	/* Program me ucode address into intruction cache address register */
2264 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2265 			lower_32_bits(addr) & 0xFFFFF000);
2266 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2267 			upper_32_bits(addr));
2268 
2269 	return 0;
2270 }
2271 
2272 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2273 {
2274 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2275 	uint32_t tmp;
2276 	int i;
2277 
2278 	/* Trigger an invalidation of the L1 instruction caches */
2279 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2280 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2281 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2282 
2283 	/* Wait for invalidation complete */
2284 	for (i = 0; i < usec_timeout; i++) {
2285 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2286 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2287 					INVALIDATE_CACHE_COMPLETE))
2288 			break;
2289 		udelay(1);
2290 	}
2291 
2292 	if (i >= usec_timeout) {
2293 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2294 		return -EINVAL;
2295 	}
2296 
2297 	if (amdgpu_emu_mode == 1)
2298 		adev->hdp.funcs->flush_hdp(adev, NULL);
2299 
2300 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2301 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2302 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2303 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2304 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2305 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2306 
2307 	/* Program pfp ucode address into intruction cache address register */
2308 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2309 			lower_32_bits(addr) & 0xFFFFF000);
2310 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2311 			upper_32_bits(addr));
2312 
2313 	return 0;
2314 }
2315 
2316 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2317 {
2318 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2319 	uint32_t tmp;
2320 	int i;
2321 
2322 	/* Trigger an invalidation of the L1 instruction caches */
2323 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2324 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2325 
2326 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2327 
2328 	/* Wait for invalidation complete */
2329 	for (i = 0; i < usec_timeout; i++) {
2330 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2331 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2332 					INVALIDATE_CACHE_COMPLETE))
2333 			break;
2334 		udelay(1);
2335 	}
2336 
2337 	if (i >= usec_timeout) {
2338 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2339 		return -EINVAL;
2340 	}
2341 
2342 	if (amdgpu_emu_mode == 1)
2343 		adev->hdp.funcs->flush_hdp(adev, NULL);
2344 
2345 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2346 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2347 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2348 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2349 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2350 
2351 	/* Program mec1 ucode address into intruction cache address register */
2352 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2353 			lower_32_bits(addr) & 0xFFFFF000);
2354 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2355 			upper_32_bits(addr));
2356 
2357 	return 0;
2358 }
2359 
2360 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2361 {
2362 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2363 	uint32_t tmp;
2364 	unsigned i, pipe_id;
2365 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2366 
2367 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2368 		adev->gfx.pfp_fw->data;
2369 
2370 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2371 		lower_32_bits(addr));
2372 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2373 		upper_32_bits(addr));
2374 
2375 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2376 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2377 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2378 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2379 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2380 
2381 	/*
2382 	 * Programming any of the CP_PFP_IC_BASE registers
2383 	 * forces invalidation of the ME L1 I$. Wait for the
2384 	 * invalidation complete
2385 	 */
2386 	for (i = 0; i < usec_timeout; i++) {
2387 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2388 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2389 			INVALIDATE_CACHE_COMPLETE))
2390 			break;
2391 		udelay(1);
2392 	}
2393 
2394 	if (i >= usec_timeout) {
2395 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2396 		return -EINVAL;
2397 	}
2398 
2399 	/* Prime the L1 instruction caches */
2400 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2401 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2402 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2403 	/* Waiting for cache primed*/
2404 	for (i = 0; i < usec_timeout; i++) {
2405 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2406 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2407 			ICACHE_PRIMED))
2408 			break;
2409 		udelay(1);
2410 	}
2411 
2412 	if (i >= usec_timeout) {
2413 		dev_err(adev->dev, "failed to prime instruction cache\n");
2414 		return -EINVAL;
2415 	}
2416 
2417 	mutex_lock(&adev->srbm_mutex);
2418 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2419 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2420 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2421 			(pfp_hdr->ucode_start_addr_hi << 30) |
2422 			(pfp_hdr->ucode_start_addr_lo >> 2));
2423 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2424 			pfp_hdr->ucode_start_addr_hi >> 2);
2425 
2426 		/*
2427 		 * Program CP_ME_CNTL to reset given PIPE to take
2428 		 * effect of CP_PFP_PRGRM_CNTR_START.
2429 		 */
2430 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2431 		if (pipe_id == 0)
2432 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2433 					PFP_PIPE0_RESET, 1);
2434 		else
2435 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2436 					PFP_PIPE1_RESET, 1);
2437 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2438 
2439 		/* Clear pfp pipe0 reset bit. */
2440 		if (pipe_id == 0)
2441 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2442 					PFP_PIPE0_RESET, 0);
2443 		else
2444 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2445 					PFP_PIPE1_RESET, 0);
2446 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2447 
2448 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2449 			lower_32_bits(addr2));
2450 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2451 			upper_32_bits(addr2));
2452 	}
2453 	soc21_grbm_select(adev, 0, 0, 0, 0);
2454 	mutex_unlock(&adev->srbm_mutex);
2455 
2456 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2457 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2458 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2459 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2460 
2461 	/* Invalidate the data caches */
2462 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2463 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2464 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2465 
2466 	for (i = 0; i < usec_timeout; i++) {
2467 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2468 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2469 			INVALIDATE_DCACHE_COMPLETE))
2470 			break;
2471 		udelay(1);
2472 	}
2473 
2474 	if (i >= usec_timeout) {
2475 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2476 		return -EINVAL;
2477 	}
2478 
2479 	return 0;
2480 }
2481 
2482 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2483 {
2484 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2485 	uint32_t tmp;
2486 	unsigned i, pipe_id;
2487 	const struct gfx_firmware_header_v2_0 *me_hdr;
2488 
2489 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2490 		adev->gfx.me_fw->data;
2491 
2492 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2493 		lower_32_bits(addr));
2494 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2495 		upper_32_bits(addr));
2496 
2497 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2498 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2499 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2500 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2501 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2502 
2503 	/*
2504 	 * Programming any of the CP_ME_IC_BASE registers
2505 	 * forces invalidation of the ME L1 I$. Wait for the
2506 	 * invalidation complete
2507 	 */
2508 	for (i = 0; i < usec_timeout; i++) {
2509 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2510 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2511 			INVALIDATE_CACHE_COMPLETE))
2512 			break;
2513 		udelay(1);
2514 	}
2515 
2516 	if (i >= usec_timeout) {
2517 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2518 		return -EINVAL;
2519 	}
2520 
2521 	/* Prime the instruction caches */
2522 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2523 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2524 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2525 
2526 	/* Waiting for instruction cache primed*/
2527 	for (i = 0; i < usec_timeout; i++) {
2528 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2529 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2530 			ICACHE_PRIMED))
2531 			break;
2532 		udelay(1);
2533 	}
2534 
2535 	if (i >= usec_timeout) {
2536 		dev_err(adev->dev, "failed to prime instruction cache\n");
2537 		return -EINVAL;
2538 	}
2539 
2540 	mutex_lock(&adev->srbm_mutex);
2541 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2542 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2543 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2544 			(me_hdr->ucode_start_addr_hi << 30) |
2545 			(me_hdr->ucode_start_addr_lo >> 2) );
2546 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2547 			me_hdr->ucode_start_addr_hi>>2);
2548 
2549 		/*
2550 		 * Program CP_ME_CNTL to reset given PIPE to take
2551 		 * effect of CP_PFP_PRGRM_CNTR_START.
2552 		 */
2553 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2554 		if (pipe_id == 0)
2555 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2556 					ME_PIPE0_RESET, 1);
2557 		else
2558 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2559 					ME_PIPE1_RESET, 1);
2560 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2561 
2562 		/* Clear pfp pipe0 reset bit. */
2563 		if (pipe_id == 0)
2564 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2565 					ME_PIPE0_RESET, 0);
2566 		else
2567 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2568 					ME_PIPE1_RESET, 0);
2569 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2570 
2571 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2572 			lower_32_bits(addr2));
2573 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2574 			upper_32_bits(addr2));
2575 	}
2576 	soc21_grbm_select(adev, 0, 0, 0, 0);
2577 	mutex_unlock(&adev->srbm_mutex);
2578 
2579 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2580 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2581 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2582 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2583 
2584 	/* Invalidate the data caches */
2585 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2586 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2587 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2588 
2589 	for (i = 0; i < usec_timeout; i++) {
2590 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2591 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2592 			INVALIDATE_DCACHE_COMPLETE))
2593 			break;
2594 		udelay(1);
2595 	}
2596 
2597 	if (i >= usec_timeout) {
2598 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2599 		return -EINVAL;
2600 	}
2601 
2602 	return 0;
2603 }
2604 
2605 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2606 {
2607 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2608 	uint32_t tmp;
2609 	unsigned i;
2610 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2611 
2612 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2613 		adev->gfx.mec_fw->data;
2614 
2615 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2616 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2617 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2618 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2619 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2620 
2621 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2622 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2623 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2624 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2625 
2626 	mutex_lock(&adev->srbm_mutex);
2627 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2628 		soc21_grbm_select(adev, 1, i, 0, 0);
2629 
2630 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2631 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2632 		     upper_32_bits(addr2));
2633 
2634 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2635 					mec_hdr->ucode_start_addr_lo >> 2 |
2636 					mec_hdr->ucode_start_addr_hi << 30);
2637 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2638 					mec_hdr->ucode_start_addr_hi >> 2);
2639 
2640 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2641 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2642 		     upper_32_bits(addr));
2643 	}
2644 	mutex_unlock(&adev->srbm_mutex);
2645 	soc21_grbm_select(adev, 0, 0, 0, 0);
2646 
2647 	/* Trigger an invalidation of the L1 instruction caches */
2648 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2649 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2650 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2651 
2652 	/* Wait for invalidation complete */
2653 	for (i = 0; i < usec_timeout; i++) {
2654 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2655 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2656 				       INVALIDATE_DCACHE_COMPLETE))
2657 			break;
2658 		udelay(1);
2659 	}
2660 
2661 	if (i >= usec_timeout) {
2662 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2663 		return -EINVAL;
2664 	}
2665 
2666 	/* Trigger an invalidation of the L1 instruction caches */
2667 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2668 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2669 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2670 
2671 	/* Wait for invalidation complete */
2672 	for (i = 0; i < usec_timeout; i++) {
2673 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2674 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2675 				       INVALIDATE_CACHE_COMPLETE))
2676 			break;
2677 		udelay(1);
2678 	}
2679 
2680 	if (i >= usec_timeout) {
2681 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2682 		return -EINVAL;
2683 	}
2684 
2685 	return 0;
2686 }
2687 
2688 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2689 {
2690 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2691 	const struct gfx_firmware_header_v2_0 *me_hdr;
2692 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2693 	uint32_t pipe_id, tmp;
2694 
2695 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2696 		adev->gfx.mec_fw->data;
2697 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2698 		adev->gfx.me_fw->data;
2699 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2700 		adev->gfx.pfp_fw->data;
2701 
2702 	/* config pfp program start addr */
2703 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2704 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2705 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2706 			(pfp_hdr->ucode_start_addr_hi << 30) |
2707 			(pfp_hdr->ucode_start_addr_lo >> 2));
2708 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2709 			pfp_hdr->ucode_start_addr_hi >> 2);
2710 	}
2711 	soc21_grbm_select(adev, 0, 0, 0, 0);
2712 
2713 	/* reset pfp pipe */
2714 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2715 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2716 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2717 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2718 
2719 	/* clear pfp pipe reset */
2720 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2721 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2722 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2723 
2724 	/* config me program start addr */
2725 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2726 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2727 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2728 			(me_hdr->ucode_start_addr_hi << 30) |
2729 			(me_hdr->ucode_start_addr_lo >> 2) );
2730 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2731 			me_hdr->ucode_start_addr_hi>>2);
2732 	}
2733 	soc21_grbm_select(adev, 0, 0, 0, 0);
2734 
2735 	/* reset me pipe */
2736 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2737 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2738 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2739 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2740 
2741 	/* clear me pipe reset */
2742 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2743 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2744 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2745 
2746 	/* config mec program start addr */
2747 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2748 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2749 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2750 					mec_hdr->ucode_start_addr_lo >> 2 |
2751 					mec_hdr->ucode_start_addr_hi << 30);
2752 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2753 					mec_hdr->ucode_start_addr_hi >> 2);
2754 	}
2755 	soc21_grbm_select(adev, 0, 0, 0, 0);
2756 
2757 	/* reset mec pipe */
2758 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2759 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2760 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2761 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2762 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2763 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2764 
2765 	/* clear mec pipe reset */
2766 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2767 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2768 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2769 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2770 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2771 }
2772 
2773 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2774 {
2775 	uint32_t cp_status;
2776 	uint32_t bootload_status;
2777 	int i, r;
2778 	uint64_t addr, addr2;
2779 
2780 	for (i = 0; i < adev->usec_timeout; i++) {
2781 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2782 
2783 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2784 			    IP_VERSION(11, 0, 1) ||
2785 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
2786 			    IP_VERSION(11, 0, 4) ||
2787 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
2788 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1))
2789 			bootload_status = RREG32_SOC15(GC, 0,
2790 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2791 		else
2792 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2793 
2794 		if ((cp_status == 0) &&
2795 		    (REG_GET_FIELD(bootload_status,
2796 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2797 			break;
2798 		}
2799 		udelay(1);
2800 	}
2801 
2802 	if (i >= adev->usec_timeout) {
2803 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2804 		return -ETIMEDOUT;
2805 	}
2806 
2807 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2808 		if (adev->gfx.rs64_enable) {
2809 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2810 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2811 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2812 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2813 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2814 			if (r)
2815 				return r;
2816 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2817 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2818 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2819 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2820 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2821 			if (r)
2822 				return r;
2823 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2824 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2825 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2826 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2827 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2828 			if (r)
2829 				return r;
2830 		} else {
2831 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2832 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2833 			r = gfx_v11_0_config_me_cache(adev, addr);
2834 			if (r)
2835 				return r;
2836 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2837 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2838 			r = gfx_v11_0_config_pfp_cache(adev, addr);
2839 			if (r)
2840 				return r;
2841 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2842 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2843 			r = gfx_v11_0_config_mec_cache(adev, addr);
2844 			if (r)
2845 				return r;
2846 		}
2847 	}
2848 
2849 	return 0;
2850 }
2851 
2852 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2853 {
2854 	int i;
2855 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2856 
2857 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2858 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2859 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2860 
2861 	for (i = 0; i < adev->usec_timeout; i++) {
2862 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
2863 			break;
2864 		udelay(1);
2865 	}
2866 
2867 	if (i >= adev->usec_timeout)
2868 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
2869 
2870 	return 0;
2871 }
2872 
2873 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
2874 {
2875 	int r;
2876 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2877 	const __le32 *fw_data;
2878 	unsigned i, fw_size;
2879 
2880 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2881 		adev->gfx.pfp_fw->data;
2882 
2883 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2884 
2885 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2886 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2887 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
2888 
2889 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
2890 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
2891 				      &adev->gfx.pfp.pfp_fw_obj,
2892 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2893 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2894 	if (r) {
2895 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
2896 		gfx_v11_0_pfp_fini(adev);
2897 		return r;
2898 	}
2899 
2900 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
2901 
2902 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2903 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2904 
2905 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
2906 
2907 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
2908 
2909 	for (i = 0; i < pfp_hdr->jt_size; i++)
2910 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
2911 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
2912 
2913 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2914 
2915 	return 0;
2916 }
2917 
2918 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
2919 {
2920 	int r;
2921 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2922 	const __le32 *fw_ucode, *fw_data;
2923 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
2924 	uint32_t tmp;
2925 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2926 
2927 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2928 		adev->gfx.pfp_fw->data;
2929 
2930 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2931 
2932 	/* instruction */
2933 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
2934 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
2935 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
2936 	/* data */
2937 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
2938 		le32_to_cpu(pfp_hdr->data_offset_bytes));
2939 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
2940 
2941 	/* 64kb align */
2942 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
2943 				      64 * 1024,
2944 				      AMDGPU_GEM_DOMAIN_VRAM |
2945 				      AMDGPU_GEM_DOMAIN_GTT,
2946 				      &adev->gfx.pfp.pfp_fw_obj,
2947 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
2948 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
2949 	if (r) {
2950 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
2951 		gfx_v11_0_pfp_fini(adev);
2952 		return r;
2953 	}
2954 
2955 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
2956 				      64 * 1024,
2957 				      AMDGPU_GEM_DOMAIN_VRAM |
2958 				      AMDGPU_GEM_DOMAIN_GTT,
2959 				      &adev->gfx.pfp.pfp_fw_data_obj,
2960 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
2961 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
2962 	if (r) {
2963 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
2964 		gfx_v11_0_pfp_fini(adev);
2965 		return r;
2966 	}
2967 
2968 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
2969 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
2970 
2971 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
2972 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
2973 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
2974 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
2975 
2976 	if (amdgpu_emu_mode == 1)
2977 		adev->hdp.funcs->flush_hdp(adev, NULL);
2978 
2979 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2980 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2981 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2982 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
2983 
2984 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2985 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2986 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2987 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2988 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2989 
2990 	/*
2991 	 * Programming any of the CP_PFP_IC_BASE registers
2992 	 * forces invalidation of the ME L1 I$. Wait for the
2993 	 * invalidation complete
2994 	 */
2995 	for (i = 0; i < usec_timeout; i++) {
2996 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2997 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2998 			INVALIDATE_CACHE_COMPLETE))
2999 			break;
3000 		udelay(1);
3001 	}
3002 
3003 	if (i >= usec_timeout) {
3004 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3005 		return -EINVAL;
3006 	}
3007 
3008 	/* Prime the L1 instruction caches */
3009 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3010 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
3011 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
3012 	/* Waiting for cache primed*/
3013 	for (i = 0; i < usec_timeout; i++) {
3014 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3015 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3016 			ICACHE_PRIMED))
3017 			break;
3018 		udelay(1);
3019 	}
3020 
3021 	if (i >= usec_timeout) {
3022 		dev_err(adev->dev, "failed to prime instruction cache\n");
3023 		return -EINVAL;
3024 	}
3025 
3026 	mutex_lock(&adev->srbm_mutex);
3027 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3028 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3029 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3030 			(pfp_hdr->ucode_start_addr_hi << 30) |
3031 			(pfp_hdr->ucode_start_addr_lo >> 2) );
3032 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3033 			pfp_hdr->ucode_start_addr_hi>>2);
3034 
3035 		/*
3036 		 * Program CP_ME_CNTL to reset given PIPE to take
3037 		 * effect of CP_PFP_PRGRM_CNTR_START.
3038 		 */
3039 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3040 		if (pipe_id == 0)
3041 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3042 					PFP_PIPE0_RESET, 1);
3043 		else
3044 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3045 					PFP_PIPE1_RESET, 1);
3046 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3047 
3048 		/* Clear pfp pipe0 reset bit. */
3049 		if (pipe_id == 0)
3050 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3051 					PFP_PIPE0_RESET, 0);
3052 		else
3053 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3054 					PFP_PIPE1_RESET, 0);
3055 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3056 
3057 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
3058 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3059 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
3060 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3061 	}
3062 	soc21_grbm_select(adev, 0, 0, 0, 0);
3063 	mutex_unlock(&adev->srbm_mutex);
3064 
3065 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3066 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3067 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3068 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3069 
3070 	/* Invalidate the data caches */
3071 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3072 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3073 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3074 
3075 	for (i = 0; i < usec_timeout; i++) {
3076 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3077 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3078 			INVALIDATE_DCACHE_COMPLETE))
3079 			break;
3080 		udelay(1);
3081 	}
3082 
3083 	if (i >= usec_timeout) {
3084 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3085 		return -EINVAL;
3086 	}
3087 
3088 	return 0;
3089 }
3090 
3091 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
3092 {
3093 	int r;
3094 	const struct gfx_firmware_header_v1_0 *me_hdr;
3095 	const __le32 *fw_data;
3096 	unsigned i, fw_size;
3097 
3098 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3099 		adev->gfx.me_fw->data;
3100 
3101 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3102 
3103 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3104 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3105 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
3106 
3107 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
3108 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3109 				      &adev->gfx.me.me_fw_obj,
3110 				      &adev->gfx.me.me_fw_gpu_addr,
3111 				      (void **)&adev->gfx.me.me_fw_ptr);
3112 	if (r) {
3113 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
3114 		gfx_v11_0_me_fini(adev);
3115 		return r;
3116 	}
3117 
3118 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
3119 
3120 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3121 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3122 
3123 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
3124 
3125 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
3126 
3127 	for (i = 0; i < me_hdr->jt_size; i++)
3128 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
3129 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
3130 
3131 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
3132 
3133 	return 0;
3134 }
3135 
3136 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
3137 {
3138 	int r;
3139 	const struct gfx_firmware_header_v2_0 *me_hdr;
3140 	const __le32 *fw_ucode, *fw_data;
3141 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3142 	uint32_t tmp;
3143 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3144 
3145 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
3146 		adev->gfx.me_fw->data;
3147 
3148 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3149 
3150 	/* instruction */
3151 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
3152 		le32_to_cpu(me_hdr->ucode_offset_bytes));
3153 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
3154 	/* data */
3155 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3156 		le32_to_cpu(me_hdr->data_offset_bytes));
3157 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
3158 
3159 	/* 64kb align*/
3160 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3161 				      64 * 1024,
3162 				      AMDGPU_GEM_DOMAIN_VRAM |
3163 				      AMDGPU_GEM_DOMAIN_GTT,
3164 				      &adev->gfx.me.me_fw_obj,
3165 				      &adev->gfx.me.me_fw_gpu_addr,
3166 				      (void **)&adev->gfx.me.me_fw_ptr);
3167 	if (r) {
3168 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3169 		gfx_v11_0_me_fini(adev);
3170 		return r;
3171 	}
3172 
3173 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3174 				      64 * 1024,
3175 				      AMDGPU_GEM_DOMAIN_VRAM |
3176 				      AMDGPU_GEM_DOMAIN_GTT,
3177 				      &adev->gfx.me.me_fw_data_obj,
3178 				      &adev->gfx.me.me_fw_data_gpu_addr,
3179 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3180 	if (r) {
3181 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3182 		gfx_v11_0_pfp_fini(adev);
3183 		return r;
3184 	}
3185 
3186 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3187 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3188 
3189 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3190 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3191 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3192 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3193 
3194 	if (amdgpu_emu_mode == 1)
3195 		adev->hdp.funcs->flush_hdp(adev, NULL);
3196 
3197 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3198 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3199 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3200 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3201 
3202 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3203 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3204 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3205 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3206 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3207 
3208 	/*
3209 	 * Programming any of the CP_ME_IC_BASE registers
3210 	 * forces invalidation of the ME L1 I$. Wait for the
3211 	 * invalidation complete
3212 	 */
3213 	for (i = 0; i < usec_timeout; i++) {
3214 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3215 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3216 			INVALIDATE_CACHE_COMPLETE))
3217 			break;
3218 		udelay(1);
3219 	}
3220 
3221 	if (i >= usec_timeout) {
3222 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3223 		return -EINVAL;
3224 	}
3225 
3226 	/* Prime the instruction caches */
3227 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3228 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3229 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3230 
3231 	/* Waiting for instruction cache primed*/
3232 	for (i = 0; i < usec_timeout; i++) {
3233 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3234 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3235 			ICACHE_PRIMED))
3236 			break;
3237 		udelay(1);
3238 	}
3239 
3240 	if (i >= usec_timeout) {
3241 		dev_err(adev->dev, "failed to prime instruction cache\n");
3242 		return -EINVAL;
3243 	}
3244 
3245 	mutex_lock(&adev->srbm_mutex);
3246 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3247 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3248 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3249 			(me_hdr->ucode_start_addr_hi << 30) |
3250 			(me_hdr->ucode_start_addr_lo >> 2) );
3251 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3252 			me_hdr->ucode_start_addr_hi>>2);
3253 
3254 		/*
3255 		 * Program CP_ME_CNTL to reset given PIPE to take
3256 		 * effect of CP_PFP_PRGRM_CNTR_START.
3257 		 */
3258 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3259 		if (pipe_id == 0)
3260 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3261 					ME_PIPE0_RESET, 1);
3262 		else
3263 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3264 					ME_PIPE1_RESET, 1);
3265 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3266 
3267 		/* Clear pfp pipe0 reset bit. */
3268 		if (pipe_id == 0)
3269 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3270 					ME_PIPE0_RESET, 0);
3271 		else
3272 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3273 					ME_PIPE1_RESET, 0);
3274 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3275 
3276 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3277 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3278 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3279 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3280 	}
3281 	soc21_grbm_select(adev, 0, 0, 0, 0);
3282 	mutex_unlock(&adev->srbm_mutex);
3283 
3284 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3285 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3286 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3287 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3288 
3289 	/* Invalidate the data caches */
3290 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3291 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3292 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3293 
3294 	for (i = 0; i < usec_timeout; i++) {
3295 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3296 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3297 			INVALIDATE_DCACHE_COMPLETE))
3298 			break;
3299 		udelay(1);
3300 	}
3301 
3302 	if (i >= usec_timeout) {
3303 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3304 		return -EINVAL;
3305 	}
3306 
3307 	return 0;
3308 }
3309 
3310 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3311 {
3312 	int r;
3313 
3314 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3315 		return -EINVAL;
3316 
3317 	gfx_v11_0_cp_gfx_enable(adev, false);
3318 
3319 	if (adev->gfx.rs64_enable)
3320 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3321 	else
3322 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3323 	if (r) {
3324 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3325 		return r;
3326 	}
3327 
3328 	if (adev->gfx.rs64_enable)
3329 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3330 	else
3331 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3332 	if (r) {
3333 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3334 		return r;
3335 	}
3336 
3337 	return 0;
3338 }
3339 
3340 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3341 {
3342 	struct amdgpu_ring *ring;
3343 	const struct cs_section_def *sect = NULL;
3344 	const struct cs_extent_def *ext = NULL;
3345 	int r, i;
3346 	int ctx_reg_offset;
3347 
3348 	/* init the CP */
3349 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3350 		     adev->gfx.config.max_hw_contexts - 1);
3351 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3352 
3353 	if (!amdgpu_async_gfx_ring)
3354 		gfx_v11_0_cp_gfx_enable(adev, true);
3355 
3356 	ring = &adev->gfx.gfx_ring[0];
3357 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3358 	if (r) {
3359 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3360 		return r;
3361 	}
3362 
3363 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3364 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3365 
3366 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3367 	amdgpu_ring_write(ring, 0x80000000);
3368 	amdgpu_ring_write(ring, 0x80000000);
3369 
3370 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3371 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3372 			if (sect->id == SECT_CONTEXT) {
3373 				amdgpu_ring_write(ring,
3374 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3375 							  ext->reg_count));
3376 				amdgpu_ring_write(ring, ext->reg_index -
3377 						  PACKET3_SET_CONTEXT_REG_START);
3378 				for (i = 0; i < ext->reg_count; i++)
3379 					amdgpu_ring_write(ring, ext->extent[i]);
3380 			}
3381 		}
3382 	}
3383 
3384 	ctx_reg_offset =
3385 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3386 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3387 	amdgpu_ring_write(ring, ctx_reg_offset);
3388 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3389 
3390 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3391 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3392 
3393 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3394 	amdgpu_ring_write(ring, 0);
3395 
3396 	amdgpu_ring_commit(ring);
3397 
3398 	/* submit cs packet to copy state 0 to next available state */
3399 	if (adev->gfx.num_gfx_rings > 1) {
3400 		/* maximum supported gfx ring is 2 */
3401 		ring = &adev->gfx.gfx_ring[1];
3402 		r = amdgpu_ring_alloc(ring, 2);
3403 		if (r) {
3404 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3405 			return r;
3406 		}
3407 
3408 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3409 		amdgpu_ring_write(ring, 0);
3410 
3411 		amdgpu_ring_commit(ring);
3412 	}
3413 	return 0;
3414 }
3415 
3416 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3417 					 CP_PIPE_ID pipe)
3418 {
3419 	u32 tmp;
3420 
3421 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3422 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3423 
3424 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3425 }
3426 
3427 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3428 					  struct amdgpu_ring *ring)
3429 {
3430 	u32 tmp;
3431 
3432 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3433 	if (ring->use_doorbell) {
3434 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3435 				    DOORBELL_OFFSET, ring->doorbell_index);
3436 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3437 				    DOORBELL_EN, 1);
3438 	} else {
3439 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3440 				    DOORBELL_EN, 0);
3441 	}
3442 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3443 
3444 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3445 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3446 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3447 
3448 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3449 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3450 }
3451 
3452 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3453 {
3454 	struct amdgpu_ring *ring;
3455 	u32 tmp;
3456 	u32 rb_bufsz;
3457 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3458 
3459 	/* Set the write pointer delay */
3460 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3461 
3462 	/* set the RB to use vmid 0 */
3463 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3464 
3465 	/* Init gfx ring 0 for pipe 0 */
3466 	mutex_lock(&adev->srbm_mutex);
3467 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3468 
3469 	/* Set ring buffer size */
3470 	ring = &adev->gfx.gfx_ring[0];
3471 	rb_bufsz = order_base_2(ring->ring_size / 8);
3472 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3473 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3474 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3475 
3476 	/* Initialize the ring buffer's write pointers */
3477 	ring->wptr = 0;
3478 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3479 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3480 
3481 	/* set the wb address wether it's enabled or not */
3482 	rptr_addr = ring->rptr_gpu_addr;
3483 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3484 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3485 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3486 
3487 	wptr_gpu_addr = ring->wptr_gpu_addr;
3488 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3489 		     lower_32_bits(wptr_gpu_addr));
3490 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3491 		     upper_32_bits(wptr_gpu_addr));
3492 
3493 	mdelay(1);
3494 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3495 
3496 	rb_addr = ring->gpu_addr >> 8;
3497 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3498 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3499 
3500 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3501 
3502 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3503 	mutex_unlock(&adev->srbm_mutex);
3504 
3505 	/* Init gfx ring 1 for pipe 1 */
3506 	if (adev->gfx.num_gfx_rings > 1) {
3507 		mutex_lock(&adev->srbm_mutex);
3508 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3509 		/* maximum supported gfx ring is 2 */
3510 		ring = &adev->gfx.gfx_ring[1];
3511 		rb_bufsz = order_base_2(ring->ring_size / 8);
3512 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3513 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3514 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3515 		/* Initialize the ring buffer's write pointers */
3516 		ring->wptr = 0;
3517 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3518 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3519 		/* Set the wb address wether it's enabled or not */
3520 		rptr_addr = ring->rptr_gpu_addr;
3521 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3522 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3523 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3524 		wptr_gpu_addr = ring->wptr_gpu_addr;
3525 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3526 			     lower_32_bits(wptr_gpu_addr));
3527 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3528 			     upper_32_bits(wptr_gpu_addr));
3529 
3530 		mdelay(1);
3531 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3532 
3533 		rb_addr = ring->gpu_addr >> 8;
3534 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3535 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3536 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3537 
3538 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3539 		mutex_unlock(&adev->srbm_mutex);
3540 	}
3541 	/* Switch to pipe 0 */
3542 	mutex_lock(&adev->srbm_mutex);
3543 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3544 	mutex_unlock(&adev->srbm_mutex);
3545 
3546 	/* start the ring */
3547 	gfx_v11_0_cp_gfx_start(adev);
3548 
3549 	return 0;
3550 }
3551 
3552 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3553 {
3554 	u32 data;
3555 
3556 	if (adev->gfx.rs64_enable) {
3557 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3558 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3559 							 enable ? 0 : 1);
3560 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3561 							 enable ? 0 : 1);
3562 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3563 							 enable ? 0 : 1);
3564 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3565 							 enable ? 0 : 1);
3566 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3567 							 enable ? 0 : 1);
3568 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3569 							 enable ? 1 : 0);
3570 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3571 				                         enable ? 1 : 0);
3572 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3573 							 enable ? 1 : 0);
3574 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3575 							 enable ? 1 : 0);
3576 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3577 							 enable ? 0 : 1);
3578 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3579 	} else {
3580 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3581 
3582 		if (enable) {
3583 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3584 			if (!adev->enable_mes_kiq)
3585 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3586 						     MEC_ME2_HALT, 0);
3587 		} else {
3588 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3589 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3590 		}
3591 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3592 	}
3593 
3594 	udelay(50);
3595 }
3596 
3597 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3598 {
3599 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3600 	const __le32 *fw_data;
3601 	unsigned i, fw_size;
3602 	u32 *fw = NULL;
3603 	int r;
3604 
3605 	if (!adev->gfx.mec_fw)
3606 		return -EINVAL;
3607 
3608 	gfx_v11_0_cp_compute_enable(adev, false);
3609 
3610 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3611 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3612 
3613 	fw_data = (const __le32 *)
3614 		(adev->gfx.mec_fw->data +
3615 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3616 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3617 
3618 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3619 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3620 					  &adev->gfx.mec.mec_fw_obj,
3621 					  &adev->gfx.mec.mec_fw_gpu_addr,
3622 					  (void **)&fw);
3623 	if (r) {
3624 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3625 		gfx_v11_0_mec_fini(adev);
3626 		return r;
3627 	}
3628 
3629 	memcpy(fw, fw_data, fw_size);
3630 
3631 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3632 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3633 
3634 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3635 
3636 	/* MEC1 */
3637 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3638 
3639 	for (i = 0; i < mec_hdr->jt_size; i++)
3640 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3641 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3642 
3643 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3644 
3645 	return 0;
3646 }
3647 
3648 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3649 {
3650 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3651 	const __le32 *fw_ucode, *fw_data;
3652 	u32 tmp, fw_ucode_size, fw_data_size;
3653 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3654 	u32 *fw_ucode_ptr, *fw_data_ptr;
3655 	int r;
3656 
3657 	if (!adev->gfx.mec_fw)
3658 		return -EINVAL;
3659 
3660 	gfx_v11_0_cp_compute_enable(adev, false);
3661 
3662 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3663 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3664 
3665 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3666 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3667 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3668 
3669 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3670 				le32_to_cpu(mec_hdr->data_offset_bytes));
3671 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3672 
3673 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3674 				      64 * 1024,
3675 				      AMDGPU_GEM_DOMAIN_VRAM |
3676 				      AMDGPU_GEM_DOMAIN_GTT,
3677 				      &adev->gfx.mec.mec_fw_obj,
3678 				      &adev->gfx.mec.mec_fw_gpu_addr,
3679 				      (void **)&fw_ucode_ptr);
3680 	if (r) {
3681 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3682 		gfx_v11_0_mec_fini(adev);
3683 		return r;
3684 	}
3685 
3686 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3687 				      64 * 1024,
3688 				      AMDGPU_GEM_DOMAIN_VRAM |
3689 				      AMDGPU_GEM_DOMAIN_GTT,
3690 				      &adev->gfx.mec.mec_fw_data_obj,
3691 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3692 				      (void **)&fw_data_ptr);
3693 	if (r) {
3694 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3695 		gfx_v11_0_mec_fini(adev);
3696 		return r;
3697 	}
3698 
3699 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3700 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3701 
3702 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3703 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3704 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3705 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3706 
3707 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3708 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3709 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3710 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3711 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3712 
3713 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3714 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3715 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3716 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3717 
3718 	mutex_lock(&adev->srbm_mutex);
3719 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3720 		soc21_grbm_select(adev, 1, i, 0, 0);
3721 
3722 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3723 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3724 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3725 
3726 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3727 					mec_hdr->ucode_start_addr_lo >> 2 |
3728 					mec_hdr->ucode_start_addr_hi << 30);
3729 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3730 					mec_hdr->ucode_start_addr_hi >> 2);
3731 
3732 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3733 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3734 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3735 	}
3736 	mutex_unlock(&adev->srbm_mutex);
3737 	soc21_grbm_select(adev, 0, 0, 0, 0);
3738 
3739 	/* Trigger an invalidation of the L1 instruction caches */
3740 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3741 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3742 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3743 
3744 	/* Wait for invalidation complete */
3745 	for (i = 0; i < usec_timeout; i++) {
3746 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3747 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3748 				       INVALIDATE_DCACHE_COMPLETE))
3749 			break;
3750 		udelay(1);
3751 	}
3752 
3753 	if (i >= usec_timeout) {
3754 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3755 		return -EINVAL;
3756 	}
3757 
3758 	/* Trigger an invalidation of the L1 instruction caches */
3759 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3760 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3761 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3762 
3763 	/* Wait for invalidation complete */
3764 	for (i = 0; i < usec_timeout; i++) {
3765 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3766 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3767 				       INVALIDATE_CACHE_COMPLETE))
3768 			break;
3769 		udelay(1);
3770 	}
3771 
3772 	if (i >= usec_timeout) {
3773 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3774 		return -EINVAL;
3775 	}
3776 
3777 	return 0;
3778 }
3779 
3780 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3781 {
3782 	uint32_t tmp;
3783 	struct amdgpu_device *adev = ring->adev;
3784 
3785 	/* tell RLC which is KIQ queue */
3786 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3787 	tmp &= 0xffffff00;
3788 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3789 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3790 	tmp |= 0x80;
3791 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
3792 }
3793 
3794 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3795 {
3796 	/* set graphics engine doorbell range */
3797 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3798 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3799 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3800 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3801 
3802 	/* set compute engine doorbell range */
3803 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3804 		     (adev->doorbell_index.kiq * 2) << 2);
3805 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3806 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3807 }
3808 
3809 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3810 				  struct amdgpu_mqd_prop *prop)
3811 {
3812 	struct v11_gfx_mqd *mqd = m;
3813 	uint64_t hqd_gpu_addr, wb_gpu_addr;
3814 	uint32_t tmp;
3815 	uint32_t rb_bufsz;
3816 
3817 	/* set up gfx hqd wptr */
3818 	mqd->cp_gfx_hqd_wptr = 0;
3819 	mqd->cp_gfx_hqd_wptr_hi = 0;
3820 
3821 	/* set the pointer to the MQD */
3822 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3823 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3824 
3825 	/* set up mqd control */
3826 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3827 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3828 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3829 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3830 	mqd->cp_gfx_mqd_control = tmp;
3831 
3832 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3833 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3834 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3835 	mqd->cp_gfx_hqd_vmid = 0;
3836 
3837 	/* set up default queue priority level
3838 	 * 0x0 = low priority, 0x1 = high priority */
3839 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3840 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
3841 	mqd->cp_gfx_hqd_queue_priority = tmp;
3842 
3843 	/* set up time quantum */
3844 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3845 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
3846 	mqd->cp_gfx_hqd_quantum = tmp;
3847 
3848 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
3849 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
3850 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
3851 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
3852 
3853 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
3854 	wb_gpu_addr = prop->rptr_gpu_addr;
3855 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
3856 	mqd->cp_gfx_hqd_rptr_addr_hi =
3857 		upper_32_bits(wb_gpu_addr) & 0xffff;
3858 
3859 	/* set up rb_wptr_poll addr */
3860 	wb_gpu_addr = prop->wptr_gpu_addr;
3861 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3862 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3863 
3864 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
3865 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
3866 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
3867 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
3868 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
3869 #ifdef __BIG_ENDIAN
3870 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
3871 #endif
3872 	mqd->cp_gfx_hqd_cntl = tmp;
3873 
3874 	/* set up cp_doorbell_control */
3875 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3876 	if (prop->use_doorbell) {
3877 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3878 				    DOORBELL_OFFSET, prop->doorbell_index);
3879 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3880 				    DOORBELL_EN, 1);
3881 	} else
3882 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3883 				    DOORBELL_EN, 0);
3884 	mqd->cp_rb_doorbell_control = tmp;
3885 
3886 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3887 	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
3888 
3889 	/* active the queue */
3890 	mqd->cp_gfx_hqd_active = 1;
3891 
3892 	return 0;
3893 }
3894 
3895 static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring)
3896 {
3897 	struct amdgpu_device *adev = ring->adev;
3898 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
3899 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
3900 
3901 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
3902 		memset((void *)mqd, 0, sizeof(*mqd));
3903 		mutex_lock(&adev->srbm_mutex);
3904 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3905 		amdgpu_ring_init_mqd(ring);
3906 		soc21_grbm_select(adev, 0, 0, 0, 0);
3907 		mutex_unlock(&adev->srbm_mutex);
3908 		if (adev->gfx.me.mqd_backup[mqd_idx])
3909 			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
3910 	} else {
3911 		/* restore mqd with the backup copy */
3912 		if (adev->gfx.me.mqd_backup[mqd_idx])
3913 			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
3914 		/* reset the ring */
3915 		ring->wptr = 0;
3916 		*ring->wptr_cpu_addr = 0;
3917 		amdgpu_ring_clear_ring(ring);
3918 	}
3919 
3920 	return 0;
3921 }
3922 
3923 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
3924 {
3925 	int r, i;
3926 	struct amdgpu_ring *ring;
3927 
3928 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
3929 		ring = &adev->gfx.gfx_ring[i];
3930 
3931 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3932 		if (unlikely(r != 0))
3933 			return r;
3934 
3935 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3936 		if (!r) {
3937 			r = gfx_v11_0_gfx_init_queue(ring);
3938 			amdgpu_bo_kunmap(ring->mqd_obj);
3939 			ring->mqd_ptr = NULL;
3940 		}
3941 		amdgpu_bo_unreserve(ring->mqd_obj);
3942 		if (r)
3943 			return r;
3944 	}
3945 
3946 	r = amdgpu_gfx_enable_kgq(adev, 0);
3947 	if (r)
3948 		return r;
3949 
3950 	return gfx_v11_0_cp_gfx_start(adev);
3951 }
3952 
3953 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
3954 				      struct amdgpu_mqd_prop *prop)
3955 {
3956 	struct v11_compute_mqd *mqd = m;
3957 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3958 	uint32_t tmp;
3959 
3960 	mqd->header = 0xC0310800;
3961 	mqd->compute_pipelinestat_enable = 0x00000001;
3962 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3963 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3964 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3965 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3966 	mqd->compute_misc_reserved = 0x00000007;
3967 
3968 	eop_base_addr = prop->eop_gpu_addr >> 8;
3969 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3970 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3971 
3972 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3973 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
3974 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3975 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
3976 
3977 	mqd->cp_hqd_eop_control = tmp;
3978 
3979 	/* enable doorbell? */
3980 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
3981 
3982 	if (prop->use_doorbell) {
3983 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3984 				    DOORBELL_OFFSET, prop->doorbell_index);
3985 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3986 				    DOORBELL_EN, 1);
3987 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3988 				    DOORBELL_SOURCE, 0);
3989 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3990 				    DOORBELL_HIT, 0);
3991 	} else {
3992 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3993 				    DOORBELL_EN, 0);
3994 	}
3995 
3996 	mqd->cp_hqd_pq_doorbell_control = tmp;
3997 
3998 	/* disable the queue if it's active */
3999 	mqd->cp_hqd_dequeue_request = 0;
4000 	mqd->cp_hqd_pq_rptr = 0;
4001 	mqd->cp_hqd_pq_wptr_lo = 0;
4002 	mqd->cp_hqd_pq_wptr_hi = 0;
4003 
4004 	/* set the pointer to the MQD */
4005 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
4006 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4007 
4008 	/* set MQD vmid to 0 */
4009 	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
4010 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4011 	mqd->cp_mqd_control = tmp;
4012 
4013 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4014 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4015 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4016 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4017 
4018 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4019 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
4020 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4021 			    (order_base_2(prop->queue_size / 4) - 1));
4022 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4023 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4024 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
4025 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
4026 			    prop->allow_tunneling);
4027 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4028 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4029 	mqd->cp_hqd_pq_control = tmp;
4030 
4031 	/* set the wb address whether it's enabled or not */
4032 	wb_gpu_addr = prop->rptr_gpu_addr;
4033 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4034 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4035 		upper_32_bits(wb_gpu_addr) & 0xffff;
4036 
4037 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4038 	wb_gpu_addr = prop->wptr_gpu_addr;
4039 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4040 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4041 
4042 	tmp = 0;
4043 	/* enable the doorbell if requested */
4044 	if (prop->use_doorbell) {
4045 		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
4046 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4047 				DOORBELL_OFFSET, prop->doorbell_index);
4048 
4049 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4050 				    DOORBELL_EN, 1);
4051 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4052 				    DOORBELL_SOURCE, 0);
4053 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4054 				    DOORBELL_HIT, 0);
4055 	}
4056 
4057 	mqd->cp_hqd_pq_doorbell_control = tmp;
4058 
4059 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4060 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
4061 
4062 	/* set the vmid for the queue */
4063 	mqd->cp_hqd_vmid = 0;
4064 
4065 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
4066 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4067 	mqd->cp_hqd_persistent_state = tmp;
4068 
4069 	/* set MIN_IB_AVAIL_SIZE */
4070 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
4071 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4072 	mqd->cp_hqd_ib_control = tmp;
4073 
4074 	/* set static priority for a compute queue/ring */
4075 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4076 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4077 
4078 	mqd->cp_hqd_active = prop->hqd_active;
4079 
4080 	return 0;
4081 }
4082 
4083 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4084 {
4085 	struct amdgpu_device *adev = ring->adev;
4086 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4087 	int j;
4088 
4089 	/* inactivate the queue */
4090 	if (amdgpu_sriov_vf(adev))
4091 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4092 
4093 	/* disable wptr polling */
4094 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4095 
4096 	/* write the EOP addr */
4097 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4098 	       mqd->cp_hqd_eop_base_addr_lo);
4099 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4100 	       mqd->cp_hqd_eop_base_addr_hi);
4101 
4102 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4103 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4104 	       mqd->cp_hqd_eop_control);
4105 
4106 	/* enable doorbell? */
4107 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4108 	       mqd->cp_hqd_pq_doorbell_control);
4109 
4110 	/* disable the queue if it's active */
4111 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4112 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4113 		for (j = 0; j < adev->usec_timeout; j++) {
4114 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4115 				break;
4116 			udelay(1);
4117 		}
4118 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4119 		       mqd->cp_hqd_dequeue_request);
4120 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4121 		       mqd->cp_hqd_pq_rptr);
4122 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4123 		       mqd->cp_hqd_pq_wptr_lo);
4124 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4125 		       mqd->cp_hqd_pq_wptr_hi);
4126 	}
4127 
4128 	/* set the pointer to the MQD */
4129 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4130 	       mqd->cp_mqd_base_addr_lo);
4131 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4132 	       mqd->cp_mqd_base_addr_hi);
4133 
4134 	/* set MQD vmid to 0 */
4135 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4136 	       mqd->cp_mqd_control);
4137 
4138 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4139 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4140 	       mqd->cp_hqd_pq_base_lo);
4141 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4142 	       mqd->cp_hqd_pq_base_hi);
4143 
4144 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4145 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4146 	       mqd->cp_hqd_pq_control);
4147 
4148 	/* set the wb address whether it's enabled or not */
4149 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4150 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4151 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4152 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4153 
4154 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4155 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4156 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4157 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4158 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4159 
4160 	/* enable the doorbell if requested */
4161 	if (ring->use_doorbell) {
4162 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4163 			(adev->doorbell_index.kiq * 2) << 2);
4164 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4165 			(adev->doorbell_index.userqueue_end * 2) << 2);
4166 	}
4167 
4168 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4169 	       mqd->cp_hqd_pq_doorbell_control);
4170 
4171 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4172 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4173 	       mqd->cp_hqd_pq_wptr_lo);
4174 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4175 	       mqd->cp_hqd_pq_wptr_hi);
4176 
4177 	/* set the vmid for the queue */
4178 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4179 
4180 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4181 	       mqd->cp_hqd_persistent_state);
4182 
4183 	/* activate the queue */
4184 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4185 	       mqd->cp_hqd_active);
4186 
4187 	if (ring->use_doorbell)
4188 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4189 
4190 	return 0;
4191 }
4192 
4193 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4194 {
4195 	struct amdgpu_device *adev = ring->adev;
4196 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4197 
4198 	gfx_v11_0_kiq_setting(ring);
4199 
4200 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4201 		/* reset MQD to a clean status */
4202 		if (adev->gfx.kiq[0].mqd_backup)
4203 			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4204 
4205 		/* reset ring buffer */
4206 		ring->wptr = 0;
4207 		amdgpu_ring_clear_ring(ring);
4208 
4209 		mutex_lock(&adev->srbm_mutex);
4210 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4211 		gfx_v11_0_kiq_init_register(ring);
4212 		soc21_grbm_select(adev, 0, 0, 0, 0);
4213 		mutex_unlock(&adev->srbm_mutex);
4214 	} else {
4215 		memset((void *)mqd, 0, sizeof(*mqd));
4216 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4217 			amdgpu_ring_clear_ring(ring);
4218 		mutex_lock(&adev->srbm_mutex);
4219 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4220 		amdgpu_ring_init_mqd(ring);
4221 		gfx_v11_0_kiq_init_register(ring);
4222 		soc21_grbm_select(adev, 0, 0, 0, 0);
4223 		mutex_unlock(&adev->srbm_mutex);
4224 
4225 		if (adev->gfx.kiq[0].mqd_backup)
4226 			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4227 	}
4228 
4229 	return 0;
4230 }
4231 
4232 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring)
4233 {
4234 	struct amdgpu_device *adev = ring->adev;
4235 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4236 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4237 
4238 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4239 		memset((void *)mqd, 0, sizeof(*mqd));
4240 		mutex_lock(&adev->srbm_mutex);
4241 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4242 		amdgpu_ring_init_mqd(ring);
4243 		soc21_grbm_select(adev, 0, 0, 0, 0);
4244 		mutex_unlock(&adev->srbm_mutex);
4245 
4246 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4247 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4248 	} else {
4249 		/* restore MQD to a clean status */
4250 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4251 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4252 		/* reset ring buffer */
4253 		ring->wptr = 0;
4254 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4255 		amdgpu_ring_clear_ring(ring);
4256 	}
4257 
4258 	return 0;
4259 }
4260 
4261 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4262 {
4263 	struct amdgpu_ring *ring;
4264 	int r;
4265 
4266 	ring = &adev->gfx.kiq[0].ring;
4267 
4268 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4269 	if (unlikely(r != 0))
4270 		return r;
4271 
4272 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4273 	if (unlikely(r != 0)) {
4274 		amdgpu_bo_unreserve(ring->mqd_obj);
4275 		return r;
4276 	}
4277 
4278 	gfx_v11_0_kiq_init_queue(ring);
4279 	amdgpu_bo_kunmap(ring->mqd_obj);
4280 	ring->mqd_ptr = NULL;
4281 	amdgpu_bo_unreserve(ring->mqd_obj);
4282 	ring->sched.ready = true;
4283 	return 0;
4284 }
4285 
4286 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4287 {
4288 	struct amdgpu_ring *ring = NULL;
4289 	int r = 0, i;
4290 
4291 	if (!amdgpu_async_gfx_ring)
4292 		gfx_v11_0_cp_compute_enable(adev, true);
4293 
4294 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4295 		ring = &adev->gfx.compute_ring[i];
4296 
4297 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4298 		if (unlikely(r != 0))
4299 			goto done;
4300 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4301 		if (!r) {
4302 			r = gfx_v11_0_kcq_init_queue(ring);
4303 			amdgpu_bo_kunmap(ring->mqd_obj);
4304 			ring->mqd_ptr = NULL;
4305 		}
4306 		amdgpu_bo_unreserve(ring->mqd_obj);
4307 		if (r)
4308 			goto done;
4309 	}
4310 
4311 	r = amdgpu_gfx_enable_kcq(adev, 0);
4312 done:
4313 	return r;
4314 }
4315 
4316 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4317 {
4318 	int r, i;
4319 	struct amdgpu_ring *ring;
4320 
4321 	if (!(adev->flags & AMD_IS_APU))
4322 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4323 
4324 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4325 		/* legacy firmware loading */
4326 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4327 		if (r)
4328 			return r;
4329 
4330 		if (adev->gfx.rs64_enable)
4331 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4332 		else
4333 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4334 		if (r)
4335 			return r;
4336 	}
4337 
4338 	gfx_v11_0_cp_set_doorbell_range(adev);
4339 
4340 	if (amdgpu_async_gfx_ring) {
4341 		gfx_v11_0_cp_compute_enable(adev, true);
4342 		gfx_v11_0_cp_gfx_enable(adev, true);
4343 	}
4344 
4345 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4346 		r = amdgpu_mes_kiq_hw_init(adev);
4347 	else
4348 		r = gfx_v11_0_kiq_resume(adev);
4349 	if (r)
4350 		return r;
4351 
4352 	r = gfx_v11_0_kcq_resume(adev);
4353 	if (r)
4354 		return r;
4355 
4356 	if (!amdgpu_async_gfx_ring) {
4357 		r = gfx_v11_0_cp_gfx_resume(adev);
4358 		if (r)
4359 			return r;
4360 	} else {
4361 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4362 		if (r)
4363 			return r;
4364 	}
4365 
4366 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4367 		ring = &adev->gfx.gfx_ring[i];
4368 		r = amdgpu_ring_test_helper(ring);
4369 		if (r)
4370 			return r;
4371 	}
4372 
4373 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4374 		ring = &adev->gfx.compute_ring[i];
4375 		r = amdgpu_ring_test_helper(ring);
4376 		if (r)
4377 			return r;
4378 	}
4379 
4380 	return 0;
4381 }
4382 
4383 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4384 {
4385 	gfx_v11_0_cp_gfx_enable(adev, enable);
4386 	gfx_v11_0_cp_compute_enable(adev, enable);
4387 }
4388 
4389 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4390 {
4391 	int r;
4392 	bool value;
4393 
4394 	r = adev->gfxhub.funcs->gart_enable(adev);
4395 	if (r)
4396 		return r;
4397 
4398 	adev->hdp.funcs->flush_hdp(adev, NULL);
4399 
4400 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4401 		false : true;
4402 
4403 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4404 	amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4405 
4406 	return 0;
4407 }
4408 
4409 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4410 {
4411 	u32 tmp;
4412 
4413 	/* select RS64 */
4414 	if (adev->gfx.rs64_enable) {
4415 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4416 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4417 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4418 
4419 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4420 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4421 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4422 	}
4423 
4424 	if (amdgpu_emu_mode == 1)
4425 		msleep(100);
4426 }
4427 
4428 static int get_gb_addr_config(struct amdgpu_device * adev)
4429 {
4430 	u32 gb_addr_config;
4431 
4432 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4433 	if (gb_addr_config == 0)
4434 		return -EINVAL;
4435 
4436 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4437 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4438 
4439 	adev->gfx.config.gb_addr_config = gb_addr_config;
4440 
4441 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4442 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4443 				      GB_ADDR_CONFIG, NUM_PIPES);
4444 
4445 	adev->gfx.config.max_tile_pipes =
4446 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4447 
4448 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4449 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4450 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4451 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4452 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4453 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4454 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4455 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4456 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4457 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4458 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4459 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4460 
4461 	return 0;
4462 }
4463 
4464 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4465 {
4466 	uint32_t data;
4467 
4468 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4469 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4470 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4471 
4472 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4473 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4474 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4475 }
4476 
4477 static int gfx_v11_0_hw_init(void *handle)
4478 {
4479 	int r;
4480 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4481 
4482 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4483 		if (adev->gfx.imu.funcs) {
4484 			/* RLC autoload sequence 1: Program rlc ram */
4485 			if (adev->gfx.imu.funcs->program_rlc_ram)
4486 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4487 		}
4488 		/* rlc autoload firmware */
4489 		r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4490 		if (r)
4491 			return r;
4492 	} else {
4493 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4494 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4495 				if (adev->gfx.imu.funcs->load_microcode)
4496 					adev->gfx.imu.funcs->load_microcode(adev);
4497 				if (adev->gfx.imu.funcs->setup_imu)
4498 					adev->gfx.imu.funcs->setup_imu(adev);
4499 				if (adev->gfx.imu.funcs->start_imu)
4500 					adev->gfx.imu.funcs->start_imu(adev);
4501 			}
4502 
4503 			/* disable gpa mode in backdoor loading */
4504 			gfx_v11_0_disable_gpa_mode(adev);
4505 		}
4506 	}
4507 
4508 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4509 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4510 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4511 		if (r) {
4512 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4513 			return r;
4514 		}
4515 	}
4516 
4517 	adev->gfx.is_poweron = true;
4518 
4519 	if(get_gb_addr_config(adev))
4520 		DRM_WARN("Invalid gb_addr_config !\n");
4521 
4522 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4523 	    adev->gfx.rs64_enable)
4524 		gfx_v11_0_config_gfx_rs64(adev);
4525 
4526 	r = gfx_v11_0_gfxhub_enable(adev);
4527 	if (r)
4528 		return r;
4529 
4530 	if (!amdgpu_emu_mode)
4531 		gfx_v11_0_init_golden_registers(adev);
4532 
4533 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4534 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4535 		/**
4536 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4537 		 * loaded firstly, so in direct type, it has to load smc ucode
4538 		 * here before rlc.
4539 		 */
4540 		if (!(adev->flags & AMD_IS_APU)) {
4541 			r = amdgpu_pm_load_smu_firmware(adev, NULL);
4542 			if (r)
4543 				return r;
4544 		}
4545 	}
4546 
4547 	gfx_v11_0_constants_init(adev);
4548 
4549 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4550 		gfx_v11_0_select_cp_fw_arch(adev);
4551 
4552 	if (adev->nbio.funcs->gc_doorbell_init)
4553 		adev->nbio.funcs->gc_doorbell_init(adev);
4554 
4555 	r = gfx_v11_0_rlc_resume(adev);
4556 	if (r)
4557 		return r;
4558 
4559 	/*
4560 	 * init golden registers and rlc resume may override some registers,
4561 	 * reconfig them here
4562 	 */
4563 	gfx_v11_0_tcp_harvest(adev);
4564 
4565 	r = gfx_v11_0_cp_resume(adev);
4566 	if (r)
4567 		return r;
4568 
4569 	/* get IMU version from HW if it's not set */
4570 	if (!adev->gfx.imu_fw_version)
4571 		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4572 
4573 	return r;
4574 }
4575 
4576 static int gfx_v11_0_hw_fini(void *handle)
4577 {
4578 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4579 
4580 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4581 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4582 
4583 	if (!adev->no_hw_access) {
4584 		if (amdgpu_async_gfx_ring) {
4585 			if (amdgpu_gfx_disable_kgq(adev, 0))
4586 				DRM_ERROR("KGQ disable failed\n");
4587 		}
4588 
4589 		if (amdgpu_gfx_disable_kcq(adev, 0))
4590 			DRM_ERROR("KCQ disable failed\n");
4591 
4592 		amdgpu_mes_kiq_hw_fini(adev);
4593 	}
4594 
4595 	if (amdgpu_sriov_vf(adev))
4596 		/* Remove the steps disabling CPG and clearing KIQ position,
4597 		 * so that CP could perform IDLE-SAVE during switch. Those
4598 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
4599 		 * not reproduced on gfx11.
4600 		 */
4601 		return 0;
4602 
4603 	gfx_v11_0_cp_enable(adev, false);
4604 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4605 
4606 	adev->gfxhub.funcs->gart_disable(adev);
4607 
4608 	adev->gfx.is_poweron = false;
4609 
4610 	return 0;
4611 }
4612 
4613 static int gfx_v11_0_suspend(void *handle)
4614 {
4615 	return gfx_v11_0_hw_fini(handle);
4616 }
4617 
4618 static int gfx_v11_0_resume(void *handle)
4619 {
4620 	return gfx_v11_0_hw_init(handle);
4621 }
4622 
4623 static bool gfx_v11_0_is_idle(void *handle)
4624 {
4625 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4626 
4627 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4628 				GRBM_STATUS, GUI_ACTIVE))
4629 		return false;
4630 	else
4631 		return true;
4632 }
4633 
4634 static int gfx_v11_0_wait_for_idle(void *handle)
4635 {
4636 	unsigned i;
4637 	u32 tmp;
4638 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4639 
4640 	for (i = 0; i < adev->usec_timeout; i++) {
4641 		/* read MC_STATUS */
4642 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4643 			GRBM_STATUS__GUI_ACTIVE_MASK;
4644 
4645 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4646 			return 0;
4647 		udelay(1);
4648 	}
4649 	return -ETIMEDOUT;
4650 }
4651 
4652 static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
4653 					     int req)
4654 {
4655 	u32 i, tmp, val;
4656 
4657 	for (i = 0; i < adev->usec_timeout; i++) {
4658 		/* Request with MeId=2, PipeId=0 */
4659 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
4660 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
4661 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
4662 
4663 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
4664 		if (req) {
4665 			if (val == tmp)
4666 				break;
4667 		} else {
4668 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
4669 					    REQUEST, 1);
4670 
4671 			/* unlocked or locked by firmware */
4672 			if (val != tmp)
4673 				break;
4674 		}
4675 		udelay(1);
4676 	}
4677 
4678 	if (i >= adev->usec_timeout)
4679 		return -EINVAL;
4680 
4681 	return 0;
4682 }
4683 
4684 static int gfx_v11_0_soft_reset(void *handle)
4685 {
4686 	u32 grbm_soft_reset = 0;
4687 	u32 tmp;
4688 	int r, i, j, k;
4689 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4690 
4691 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4692 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4693 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4694 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4695 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4696 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4697 
4698 	gfx_v11_0_set_safe_mode(adev, 0);
4699 
4700 	mutex_lock(&adev->srbm_mutex);
4701 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4702 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4703 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4704 				soc21_grbm_select(adev, i, k, j, 0);
4705 
4706 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4707 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4708 			}
4709 		}
4710 	}
4711 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4712 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4713 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4714 				soc21_grbm_select(adev, i, k, j, 0);
4715 
4716 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4717 			}
4718 		}
4719 	}
4720 	soc21_grbm_select(adev, 0, 0, 0, 0);
4721 	mutex_unlock(&adev->srbm_mutex);
4722 
4723 	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
4724 	r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
4725 	if (r) {
4726 		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
4727 		return r;
4728 	}
4729 
4730 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4731 
4732 	// Read CP_VMID_RESET register three times.
4733 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4734 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4735 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4736 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4737 
4738 	/* release the gfx mutex */
4739 	r = gfx_v11_0_request_gfx_index_mutex(adev, 0);
4740 	if (r) {
4741 		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
4742 		return r;
4743 	}
4744 
4745 	for (i = 0; i < adev->usec_timeout; i++) {
4746 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4747 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4748 			break;
4749 		udelay(1);
4750 	}
4751 	if (i >= adev->usec_timeout) {
4752 		printk("Failed to wait all pipes clean\n");
4753 		return -EINVAL;
4754 	}
4755 
4756 	/**********  trigger soft reset  ***********/
4757 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4758 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4759 					SOFT_RESET_CP, 1);
4760 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4761 					SOFT_RESET_GFX, 1);
4762 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4763 					SOFT_RESET_CPF, 1);
4764 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4765 					SOFT_RESET_CPC, 1);
4766 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4767 					SOFT_RESET_CPG, 1);
4768 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4769 	/**********  exit soft reset  ***********/
4770 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4771 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4772 					SOFT_RESET_CP, 0);
4773 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4774 					SOFT_RESET_GFX, 0);
4775 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4776 					SOFT_RESET_CPF, 0);
4777 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4778 					SOFT_RESET_CPC, 0);
4779 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4780 					SOFT_RESET_CPG, 0);
4781 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4782 
4783 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4784 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4785 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4786 
4787 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4788 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4789 
4790 	for (i = 0; i < adev->usec_timeout; i++) {
4791 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4792 			break;
4793 		udelay(1);
4794 	}
4795 	if (i >= adev->usec_timeout) {
4796 		printk("Failed to wait CP_VMID_RESET to 0\n");
4797 		return -EINVAL;
4798 	}
4799 
4800 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4801 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4802 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4803 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4804 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4805 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4806 
4807 	gfx_v11_0_unset_safe_mode(adev, 0);
4808 
4809 	return gfx_v11_0_cp_resume(adev);
4810 }
4811 
4812 static bool gfx_v11_0_check_soft_reset(void *handle)
4813 {
4814 	int i, r;
4815 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4816 	struct amdgpu_ring *ring;
4817 	long tmo = msecs_to_jiffies(1000);
4818 
4819 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4820 		ring = &adev->gfx.gfx_ring[i];
4821 		r = amdgpu_ring_test_ib(ring, tmo);
4822 		if (r)
4823 			return true;
4824 	}
4825 
4826 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4827 		ring = &adev->gfx.compute_ring[i];
4828 		r = amdgpu_ring_test_ib(ring, tmo);
4829 		if (r)
4830 			return true;
4831 	}
4832 
4833 	return false;
4834 }
4835 
4836 static int gfx_v11_0_post_soft_reset(void *handle)
4837 {
4838 	/**
4839 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
4840 	 */
4841 	return amdgpu_mes_resume((struct amdgpu_device *)handle);
4842 }
4843 
4844 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4845 {
4846 	uint64_t clock;
4847 	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
4848 
4849 	if (amdgpu_sriov_vf(adev)) {
4850 		amdgpu_gfx_off_ctrl(adev, false);
4851 		mutex_lock(&adev->gfx.gpu_clock_mutex);
4852 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
4853 		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
4854 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
4855 		if (clock_counter_hi_pre != clock_counter_hi_after)
4856 			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
4857 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
4858 		amdgpu_gfx_off_ctrl(adev, true);
4859 	} else {
4860 		preempt_disable();
4861 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
4862 		clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
4863 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
4864 		if (clock_counter_hi_pre != clock_counter_hi_after)
4865 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
4866 		preempt_enable();
4867 	}
4868 	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
4869 
4870 	return clock;
4871 }
4872 
4873 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4874 					   uint32_t vmid,
4875 					   uint32_t gds_base, uint32_t gds_size,
4876 					   uint32_t gws_base, uint32_t gws_size,
4877 					   uint32_t oa_base, uint32_t oa_size)
4878 {
4879 	struct amdgpu_device *adev = ring->adev;
4880 
4881 	/* GDS Base */
4882 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4883 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
4884 				    gds_base);
4885 
4886 	/* GDS Size */
4887 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4888 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
4889 				    gds_size);
4890 
4891 	/* GWS */
4892 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4893 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
4894 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4895 
4896 	/* OA */
4897 	gfx_v11_0_write_data_to_reg(ring, 0, false,
4898 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
4899 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
4900 }
4901 
4902 static int gfx_v11_0_early_init(void *handle)
4903 {
4904 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4905 
4906 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
4907 
4908 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
4909 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4910 					  AMDGPU_MAX_COMPUTE_RINGS);
4911 
4912 	gfx_v11_0_set_kiq_pm4_funcs(adev);
4913 	gfx_v11_0_set_ring_funcs(adev);
4914 	gfx_v11_0_set_irq_funcs(adev);
4915 	gfx_v11_0_set_gds_init(adev);
4916 	gfx_v11_0_set_rlc_funcs(adev);
4917 	gfx_v11_0_set_mqd_funcs(adev);
4918 	gfx_v11_0_set_imu_funcs(adev);
4919 
4920 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
4921 
4922 	return gfx_v11_0_init_microcode(adev);
4923 }
4924 
4925 static int gfx_v11_0_late_init(void *handle)
4926 {
4927 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4928 	int r;
4929 
4930 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4931 	if (r)
4932 		return r;
4933 
4934 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4935 	if (r)
4936 		return r;
4937 
4938 	return 0;
4939 }
4940 
4941 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
4942 {
4943 	uint32_t rlc_cntl;
4944 
4945 	/* if RLC is not enabled, do nothing */
4946 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
4947 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
4948 }
4949 
4950 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4951 {
4952 	uint32_t data;
4953 	unsigned i;
4954 
4955 	data = RLC_SAFE_MODE__CMD_MASK;
4956 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4957 
4958 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
4959 
4960 	/* wait for RLC_SAFE_MODE */
4961 	for (i = 0; i < adev->usec_timeout; i++) {
4962 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
4963 				   RLC_SAFE_MODE, CMD))
4964 			break;
4965 		udelay(1);
4966 	}
4967 }
4968 
4969 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4970 {
4971 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
4972 }
4973 
4974 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
4975 				      bool enable)
4976 {
4977 	uint32_t def, data;
4978 
4979 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
4980 		return;
4981 
4982 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
4983 
4984 	if (enable)
4985 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4986 	else
4987 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
4988 
4989 	if (def != data)
4990 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
4991 }
4992 
4993 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
4994 				       bool enable)
4995 {
4996 	uint32_t def, data;
4997 
4998 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
4999 		return;
5000 
5001 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5002 
5003 	if (enable)
5004 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5005 	else
5006 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5007 
5008 	if (def != data)
5009 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5010 }
5011 
5012 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
5013 					   bool enable)
5014 {
5015 	uint32_t def, data;
5016 
5017 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
5018 		return;
5019 
5020 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5021 
5022 	if (enable)
5023 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5024 	else
5025 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5026 
5027 	if (def != data)
5028 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5029 }
5030 
5031 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5032 						       bool enable)
5033 {
5034 	uint32_t data, def;
5035 
5036 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
5037 		return;
5038 
5039 	/* It is disabled by HW by default */
5040 	if (enable) {
5041 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5042 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
5043 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5044 
5045 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5046 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5047 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5048 
5049 			if (def != data)
5050 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5051 		}
5052 	} else {
5053 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5054 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5055 
5056 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5057 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5058 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5059 
5060 			if (def != data)
5061 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5062 		}
5063 	}
5064 }
5065 
5066 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5067 						       bool enable)
5068 {
5069 	uint32_t def, data;
5070 
5071 	if (!(adev->cg_flags &
5072 	      (AMD_CG_SUPPORT_GFX_CGCG |
5073 	      AMD_CG_SUPPORT_GFX_CGLS |
5074 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
5075 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
5076 		return;
5077 
5078 	if (enable) {
5079 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5080 
5081 		/* unset CGCG override */
5082 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5083 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5084 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5085 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5086 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
5087 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5088 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5089 
5090 		/* update CGCG override bits */
5091 		if (def != data)
5092 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5093 
5094 		/* enable cgcg FSM(0x0000363F) */
5095 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5096 
5097 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5098 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
5099 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5100 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5101 		}
5102 
5103 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5104 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
5105 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5106 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5107 		}
5108 
5109 		if (def != data)
5110 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5111 
5112 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5113 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5114 
5115 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5116 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5117 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5118 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5119 		}
5120 
5121 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5122 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5123 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5124 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5125 		}
5126 
5127 		if (def != data)
5128 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5129 
5130 		/* set IDLE_POLL_COUNT(0x00900100) */
5131 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5132 
5133 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5134 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5135 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5136 
5137 		if (def != data)
5138 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5139 
5140 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5141 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5142 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5143 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5144 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5145 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5146 
5147 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5148 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5149 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5150 
5151 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5152 		if (adev->sdma.num_instances > 1) {
5153 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5154 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5155 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5156 		}
5157 	} else {
5158 		/* Program RLC_CGCG_CGLS_CTRL */
5159 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5160 
5161 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5162 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5163 
5164 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5165 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5166 
5167 		if (def != data)
5168 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5169 
5170 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5171 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5172 
5173 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5174 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5175 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5176 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5177 
5178 		if (def != data)
5179 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5180 
5181 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5182 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5183 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5184 
5185 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5186 		if (adev->sdma.num_instances > 1) {
5187 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5188 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5189 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5190 		}
5191 	}
5192 }
5193 
5194 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5195 					    bool enable)
5196 {
5197 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5198 
5199 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5200 
5201 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5202 
5203 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5204 
5205 	gfx_v11_0_update_sram_fgcg(adev, enable);
5206 
5207 	gfx_v11_0_update_perf_clk(adev, enable);
5208 
5209 	if (adev->cg_flags &
5210 	    (AMD_CG_SUPPORT_GFX_MGCG |
5211 	     AMD_CG_SUPPORT_GFX_CGLS |
5212 	     AMD_CG_SUPPORT_GFX_CGCG |
5213 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5214 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5215 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5216 
5217 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5218 
5219 	return 0;
5220 }
5221 
5222 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5223 {
5224 	u32 reg, pre_data, data;
5225 
5226 	amdgpu_gfx_off_ctrl(adev, false);
5227 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5228 	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
5229 		pre_data = RREG32_NO_KIQ(reg);
5230 	else
5231 		pre_data = RREG32(reg);
5232 
5233 	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
5234 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5235 
5236 	if (pre_data != data) {
5237 		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
5238 			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5239 		} else
5240 			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5241 	}
5242 	amdgpu_gfx_off_ctrl(adev, true);
5243 
5244 	if (ring
5245 		&& amdgpu_sriov_is_pp_one_vf(adev)
5246 		&& (pre_data != data)
5247 		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
5248 			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
5249 		amdgpu_ring_emit_wreg(ring, reg, data);
5250 	}
5251 }
5252 
5253 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5254 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5255 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5256 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5257 	.init = gfx_v11_0_rlc_init,
5258 	.get_csb_size = gfx_v11_0_get_csb_size,
5259 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5260 	.resume = gfx_v11_0_rlc_resume,
5261 	.stop = gfx_v11_0_rlc_stop,
5262 	.reset = gfx_v11_0_rlc_reset,
5263 	.start = gfx_v11_0_rlc_start,
5264 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5265 };
5266 
5267 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5268 {
5269 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5270 
5271 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5272 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5273 	else
5274 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5275 
5276 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5277 
5278 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5279 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5280 		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5281 		case IP_VERSION(11, 0, 1):
5282 		case IP_VERSION(11, 0, 4):
5283 		case IP_VERSION(11, 5, 0):
5284 		case IP_VERSION(11, 5, 1):
5285 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5286 			break;
5287 		default:
5288 			break;
5289 		}
5290 	}
5291 }
5292 
5293 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5294 {
5295 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5296 
5297 	gfx_v11_cntl_power_gating(adev, enable);
5298 
5299 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5300 }
5301 
5302 static int gfx_v11_0_set_powergating_state(void *handle,
5303 					   enum amd_powergating_state state)
5304 {
5305 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5306 	bool enable = (state == AMD_PG_STATE_GATE);
5307 
5308 	if (amdgpu_sriov_vf(adev))
5309 		return 0;
5310 
5311 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5312 	case IP_VERSION(11, 0, 0):
5313 	case IP_VERSION(11, 0, 2):
5314 	case IP_VERSION(11, 0, 3):
5315 		amdgpu_gfx_off_ctrl(adev, enable);
5316 		break;
5317 	case IP_VERSION(11, 0, 1):
5318 	case IP_VERSION(11, 0, 4):
5319 	case IP_VERSION(11, 5, 0):
5320 	case IP_VERSION(11, 5, 1):
5321 		if (!enable)
5322 			amdgpu_gfx_off_ctrl(adev, false);
5323 
5324 		gfx_v11_cntl_pg(adev, enable);
5325 
5326 		if (enable)
5327 			amdgpu_gfx_off_ctrl(adev, true);
5328 
5329 		break;
5330 	default:
5331 		break;
5332 	}
5333 
5334 	return 0;
5335 }
5336 
5337 static int gfx_v11_0_set_clockgating_state(void *handle,
5338 					  enum amd_clockgating_state state)
5339 {
5340 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5341 
5342 	if (amdgpu_sriov_vf(adev))
5343 	        return 0;
5344 
5345 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5346 	case IP_VERSION(11, 0, 0):
5347 	case IP_VERSION(11, 0, 1):
5348 	case IP_VERSION(11, 0, 2):
5349 	case IP_VERSION(11, 0, 3):
5350 	case IP_VERSION(11, 0, 4):
5351 	case IP_VERSION(11, 5, 0):
5352 	case IP_VERSION(11, 5, 1):
5353 	        gfx_v11_0_update_gfx_clock_gating(adev,
5354 	                        state ==  AMD_CG_STATE_GATE);
5355 	        break;
5356 	default:
5357 	        break;
5358 	}
5359 
5360 	return 0;
5361 }
5362 
5363 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5364 {
5365 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5366 	int data;
5367 
5368 	/* AMD_CG_SUPPORT_GFX_MGCG */
5369 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5370 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5371 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5372 
5373 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5374 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5375 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5376 
5377 	/* AMD_CG_SUPPORT_GFX_FGCG */
5378 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5379 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5380 
5381 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5382 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5383 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5384 
5385 	/* AMD_CG_SUPPORT_GFX_CGCG */
5386 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5387 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5388 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5389 
5390 	/* AMD_CG_SUPPORT_GFX_CGLS */
5391 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5392 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5393 
5394 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5395 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5396 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5397 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5398 
5399 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5400 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5401 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5402 }
5403 
5404 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5405 {
5406 	/* gfx11 is 32bit rptr*/
5407 	return *(uint32_t *)ring->rptr_cpu_addr;
5408 }
5409 
5410 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5411 {
5412 	struct amdgpu_device *adev = ring->adev;
5413 	u64 wptr;
5414 
5415 	/* XXX check if swapping is necessary on BE */
5416 	if (ring->use_doorbell) {
5417 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5418 	} else {
5419 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5420 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5421 	}
5422 
5423 	return wptr;
5424 }
5425 
5426 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5427 {
5428 	struct amdgpu_device *adev = ring->adev;
5429 
5430 	if (ring->use_doorbell) {
5431 		/* XXX check if swapping is necessary on BE */
5432 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5433 			     ring->wptr);
5434 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5435 	} else {
5436 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5437 			     lower_32_bits(ring->wptr));
5438 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5439 			     upper_32_bits(ring->wptr));
5440 	}
5441 }
5442 
5443 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5444 {
5445 	/* gfx11 hardware is 32bit rptr */
5446 	return *(uint32_t *)ring->rptr_cpu_addr;
5447 }
5448 
5449 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5450 {
5451 	u64 wptr;
5452 
5453 	/* XXX check if swapping is necessary on BE */
5454 	if (ring->use_doorbell)
5455 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5456 	else
5457 		BUG();
5458 	return wptr;
5459 }
5460 
5461 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5462 {
5463 	struct amdgpu_device *adev = ring->adev;
5464 
5465 	/* XXX check if swapping is necessary on BE */
5466 	if (ring->use_doorbell) {
5467 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5468 			     ring->wptr);
5469 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5470 	} else {
5471 		BUG(); /* only DOORBELL method supported on gfx11 now */
5472 	}
5473 }
5474 
5475 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5476 {
5477 	struct amdgpu_device *adev = ring->adev;
5478 	u32 ref_and_mask, reg_mem_engine;
5479 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5480 
5481 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5482 		switch (ring->me) {
5483 		case 1:
5484 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5485 			break;
5486 		case 2:
5487 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5488 			break;
5489 		default:
5490 			return;
5491 		}
5492 		reg_mem_engine = 0;
5493 	} else {
5494 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5495 		reg_mem_engine = 1; /* pfp */
5496 	}
5497 
5498 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5499 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5500 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5501 			       ref_and_mask, ref_and_mask, 0x20);
5502 }
5503 
5504 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5505 				       struct amdgpu_job *job,
5506 				       struct amdgpu_ib *ib,
5507 				       uint32_t flags)
5508 {
5509 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5510 	u32 header, control = 0;
5511 
5512 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5513 
5514 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5515 
5516 	control |= ib->length_dw | (vmid << 24);
5517 
5518 	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5519 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5520 
5521 		if (flags & AMDGPU_IB_PREEMPTED)
5522 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5523 
5524 		if (vmid)
5525 			gfx_v11_0_ring_emit_de_meta(ring,
5526 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5527 	}
5528 
5529 	if (ring->is_mes_queue)
5530 		/* inherit vmid from mqd */
5531 		control |= 0x400000;
5532 
5533 	amdgpu_ring_write(ring, header);
5534 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5535 	amdgpu_ring_write(ring,
5536 #ifdef __BIG_ENDIAN
5537 		(2 << 0) |
5538 #endif
5539 		lower_32_bits(ib->gpu_addr));
5540 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5541 	amdgpu_ring_write(ring, control);
5542 }
5543 
5544 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5545 					   struct amdgpu_job *job,
5546 					   struct amdgpu_ib *ib,
5547 					   uint32_t flags)
5548 {
5549 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5550 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5551 
5552 	if (ring->is_mes_queue)
5553 		/* inherit vmid from mqd */
5554 		control |= 0x40000000;
5555 
5556 	/* Currently, there is a high possibility to get wave ID mismatch
5557 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5558 	 * different wave IDs than the GDS expects. This situation happens
5559 	 * randomly when at least 5 compute pipes use GDS ordered append.
5560 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5561 	 * Those are probably bugs somewhere else in the kernel driver.
5562 	 *
5563 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5564 	 * GDS to 0 for this ring (me/pipe).
5565 	 */
5566 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5567 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5568 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5569 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5570 	}
5571 
5572 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5573 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5574 	amdgpu_ring_write(ring,
5575 #ifdef __BIG_ENDIAN
5576 				(2 << 0) |
5577 #endif
5578 				lower_32_bits(ib->gpu_addr));
5579 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5580 	amdgpu_ring_write(ring, control);
5581 }
5582 
5583 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5584 				     u64 seq, unsigned flags)
5585 {
5586 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5587 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5588 
5589 	/* RELEASE_MEM - flush caches, send int */
5590 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5591 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5592 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5593 				 PACKET3_RELEASE_MEM_GCR_GL2_INV |
5594 				 PACKET3_RELEASE_MEM_GCR_GL2_US |
5595 				 PACKET3_RELEASE_MEM_GCR_GL1_INV |
5596 				 PACKET3_RELEASE_MEM_GCR_GLV_INV |
5597 				 PACKET3_RELEASE_MEM_GCR_GLM_INV |
5598 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5599 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5600 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5601 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5602 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5603 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5604 
5605 	/*
5606 	 * the address should be Qword aligned if 64bit write, Dword
5607 	 * aligned if only send 32bit data low (discard data high)
5608 	 */
5609 	if (write64bit)
5610 		BUG_ON(addr & 0x7);
5611 	else
5612 		BUG_ON(addr & 0x3);
5613 	amdgpu_ring_write(ring, lower_32_bits(addr));
5614 	amdgpu_ring_write(ring, upper_32_bits(addr));
5615 	amdgpu_ring_write(ring, lower_32_bits(seq));
5616 	amdgpu_ring_write(ring, upper_32_bits(seq));
5617 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5618 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5619 }
5620 
5621 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5622 {
5623 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5624 	uint32_t seq = ring->fence_drv.sync_seq;
5625 	uint64_t addr = ring->fence_drv.gpu_addr;
5626 
5627 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5628 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5629 }
5630 
5631 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5632 				   uint16_t pasid, uint32_t flush_type,
5633 				   bool all_hub, uint8_t dst_sel)
5634 {
5635 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5636 	amdgpu_ring_write(ring,
5637 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5638 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5639 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5640 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5641 }
5642 
5643 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5644 					 unsigned vmid, uint64_t pd_addr)
5645 {
5646 	if (ring->is_mes_queue)
5647 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5648 	else
5649 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5650 
5651 	/* compute doesn't have PFP */
5652 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5653 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5654 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5655 		amdgpu_ring_write(ring, 0x0);
5656 	}
5657 
5658 	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
5659 	 * changed in any way.
5660 	 */
5661 	ring->set_q_mode_offs = 0;
5662 	ring->set_q_mode_ptr = NULL;
5663 }
5664 
5665 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5666 					  u64 seq, unsigned int flags)
5667 {
5668 	struct amdgpu_device *adev = ring->adev;
5669 
5670 	/* we only allocate 32bit for each seq wb address */
5671 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5672 
5673 	/* write fence seq to the "addr" */
5674 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5675 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5676 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5677 	amdgpu_ring_write(ring, lower_32_bits(addr));
5678 	amdgpu_ring_write(ring, upper_32_bits(addr));
5679 	amdgpu_ring_write(ring, lower_32_bits(seq));
5680 
5681 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5682 		/* set register to trigger INT */
5683 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5684 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5685 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5686 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5687 		amdgpu_ring_write(ring, 0);
5688 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5689 	}
5690 }
5691 
5692 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5693 					 uint32_t flags)
5694 {
5695 	uint32_t dw2 = 0;
5696 
5697 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5698 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5699 		/* set load_global_config & load_global_uconfig */
5700 		dw2 |= 0x8001;
5701 		/* set load_cs_sh_regs */
5702 		dw2 |= 0x01000000;
5703 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5704 		dw2 |= 0x10002;
5705 	}
5706 
5707 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5708 	amdgpu_ring_write(ring, dw2);
5709 	amdgpu_ring_write(ring, 0);
5710 }
5711 
5712 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5713 						   uint64_t addr)
5714 {
5715 	unsigned ret;
5716 
5717 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5718 	amdgpu_ring_write(ring, lower_32_bits(addr));
5719 	amdgpu_ring_write(ring, upper_32_bits(addr));
5720 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5721 	amdgpu_ring_write(ring, 0);
5722 	ret = ring->wptr & ring->buf_mask;
5723 	/* patch dummy value later */
5724 	amdgpu_ring_write(ring, 0);
5725 
5726 	return ret;
5727 }
5728 
5729 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5730 					   u64 shadow_va, u64 csa_va,
5731 					   u64 gds_va, bool init_shadow,
5732 					   int vmid)
5733 {
5734 	struct amdgpu_device *adev = ring->adev;
5735 	unsigned int offs, end;
5736 
5737 	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
5738 		return;
5739 
5740 	/*
5741 	 * The logic here isn't easy to understand because we need to keep state
5742 	 * accross multiple executions of the function as well as between the
5743 	 * CPU and GPU. The general idea is that the newly written GPU command
5744 	 * has a condition on the previous one and only executed if really
5745 	 * necessary.
5746 	 */
5747 
5748 	/*
5749 	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
5750 	 * executed or not. Reserve 64bits just to be on the save side.
5751 	 */
5752 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
5753 	offs = ring->wptr & ring->buf_mask;
5754 
5755 	/*
5756 	 * We start with skipping the prefix SET_Q_MODE and always executing
5757 	 * the postfix SET_Q_MODE packet. This is changed below with a
5758 	 * WRITE_DATA command when the postfix executed.
5759 	 */
5760 	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
5761 	amdgpu_ring_write(ring, 0);
5762 
5763 	if (ring->set_q_mode_offs) {
5764 		uint64_t addr;
5765 
5766 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5767 		addr += ring->set_q_mode_offs << 2;
5768 		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
5769 	}
5770 
5771 	/*
5772 	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
5773 	 * next prefix SET_Q_MODE packet executes as well.
5774 	 */
5775 	if (!shadow_va) {
5776 		uint64_t addr;
5777 
5778 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5779 		addr += offs << 2;
5780 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5781 		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5782 		amdgpu_ring_write(ring, lower_32_bits(addr));
5783 		amdgpu_ring_write(ring, upper_32_bits(addr));
5784 		amdgpu_ring_write(ring, 0x1);
5785 	}
5786 
5787 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
5788 	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
5789 	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
5790 	amdgpu_ring_write(ring, lower_32_bits(gds_va));
5791 	amdgpu_ring_write(ring, upper_32_bits(gds_va));
5792 	amdgpu_ring_write(ring, lower_32_bits(csa_va));
5793 	amdgpu_ring_write(ring, upper_32_bits(csa_va));
5794 	amdgpu_ring_write(ring, shadow_va ?
5795 			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
5796 	amdgpu_ring_write(ring, init_shadow ?
5797 			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5798 
5799 	if (ring->set_q_mode_offs)
5800 		amdgpu_ring_patch_cond_exec(ring, end);
5801 
5802 	if (shadow_va) {
5803 		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
5804 
5805 		/*
5806 		 * If the tokens match try to skip the last postfix SET_Q_MODE
5807 		 * packet to avoid saving/restoring the state all the time.
5808 		 */
5809 		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
5810 			*ring->set_q_mode_ptr = 0;
5811 
5812 		ring->set_q_mode_token = token;
5813 	} else {
5814 		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
5815 	}
5816 
5817 	ring->set_q_mode_offs = offs;
5818 }
5819 
5820 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5821 {
5822 	int i, r = 0;
5823 	struct amdgpu_device *adev = ring->adev;
5824 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5825 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5826 	unsigned long flags;
5827 
5828 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5829 		return -EINVAL;
5830 
5831 	spin_lock_irqsave(&kiq->ring_lock, flags);
5832 
5833 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5834 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
5835 		return -ENOMEM;
5836 	}
5837 
5838 	/* assert preemption condition */
5839 	amdgpu_ring_set_preempt_cond_exec(ring, false);
5840 
5841 	/* assert IB preemption, emit the trailing fence */
5842 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5843 				   ring->trail_fence_gpu_addr,
5844 				   ++ring->trail_seq);
5845 	amdgpu_ring_commit(kiq_ring);
5846 
5847 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
5848 
5849 	/* poll the trailing fence */
5850 	for (i = 0; i < adev->usec_timeout; i++) {
5851 		if (ring->trail_seq ==
5852 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
5853 			break;
5854 		udelay(1);
5855 	}
5856 
5857 	if (i >= adev->usec_timeout) {
5858 		r = -EINVAL;
5859 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
5860 	}
5861 
5862 	/* deassert preemption condition */
5863 	amdgpu_ring_set_preempt_cond_exec(ring, true);
5864 	return r;
5865 }
5866 
5867 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
5868 {
5869 	struct amdgpu_device *adev = ring->adev;
5870 	struct v10_de_ib_state de_payload = {0};
5871 	uint64_t offset, gds_addr, de_payload_gpu_addr;
5872 	void *de_payload_cpu_addr;
5873 	int cnt;
5874 
5875 	if (ring->is_mes_queue) {
5876 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5877 				  gfx[0].gfx_meta_data) +
5878 			offsetof(struct v10_gfx_meta_data, de_payload);
5879 		de_payload_gpu_addr =
5880 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5881 		de_payload_cpu_addr =
5882 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5883 
5884 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5885 				  gfx[0].gds_backup) +
5886 			offsetof(struct v10_gfx_meta_data, de_payload);
5887 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5888 	} else {
5889 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
5890 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5891 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5892 
5893 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5894 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5895 				 PAGE_SIZE);
5896 	}
5897 
5898 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5899 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5900 
5901 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5902 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5903 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5904 				 WRITE_DATA_DST_SEL(8) |
5905 				 WR_CONFIRM) |
5906 				 WRITE_DATA_CACHE_POLICY(0));
5907 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5908 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5909 
5910 	if (resume)
5911 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5912 					   sizeof(de_payload) >> 2);
5913 	else
5914 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5915 					   sizeof(de_payload) >> 2);
5916 }
5917 
5918 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5919 				    bool secure)
5920 {
5921 	uint32_t v = secure ? FRAME_TMZ : 0;
5922 
5923 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5924 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5925 }
5926 
5927 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5928 				     uint32_t reg_val_offs)
5929 {
5930 	struct amdgpu_device *adev = ring->adev;
5931 
5932 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5933 	amdgpu_ring_write(ring, 0 |	/* src: register*/
5934 				(5 << 8) |	/* dst: memory */
5935 				(1 << 20));	/* write confirm */
5936 	amdgpu_ring_write(ring, reg);
5937 	amdgpu_ring_write(ring, 0);
5938 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5939 				reg_val_offs * 4));
5940 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5941 				reg_val_offs * 4));
5942 }
5943 
5944 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5945 				   uint32_t val)
5946 {
5947 	uint32_t cmd = 0;
5948 
5949 	switch (ring->funcs->type) {
5950 	case AMDGPU_RING_TYPE_GFX:
5951 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5952 		break;
5953 	case AMDGPU_RING_TYPE_KIQ:
5954 		cmd = (1 << 16); /* no inc addr */
5955 		break;
5956 	default:
5957 		cmd = WR_CONFIRM;
5958 		break;
5959 	}
5960 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5961 	amdgpu_ring_write(ring, cmd);
5962 	amdgpu_ring_write(ring, reg);
5963 	amdgpu_ring_write(ring, 0);
5964 	amdgpu_ring_write(ring, val);
5965 }
5966 
5967 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5968 					uint32_t val, uint32_t mask)
5969 {
5970 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5971 }
5972 
5973 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5974 						   uint32_t reg0, uint32_t reg1,
5975 						   uint32_t ref, uint32_t mask)
5976 {
5977 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5978 
5979 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5980 			       ref, mask, 0x20);
5981 }
5982 
5983 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
5984 					 unsigned vmid)
5985 {
5986 	struct amdgpu_device *adev = ring->adev;
5987 	uint32_t value = 0;
5988 
5989 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5990 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5991 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5992 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5993 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
5994 }
5995 
5996 static void
5997 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5998 				      uint32_t me, uint32_t pipe,
5999 				      enum amdgpu_interrupt_state state)
6000 {
6001 	uint32_t cp_int_cntl, cp_int_cntl_reg;
6002 
6003 	if (!me) {
6004 		switch (pipe) {
6005 		case 0:
6006 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
6007 			break;
6008 		case 1:
6009 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
6010 			break;
6011 		default:
6012 			DRM_DEBUG("invalid pipe %d\n", pipe);
6013 			return;
6014 		}
6015 	} else {
6016 		DRM_DEBUG("invalid me %d\n", me);
6017 		return;
6018 	}
6019 
6020 	switch (state) {
6021 	case AMDGPU_IRQ_STATE_DISABLE:
6022 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6023 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6024 					    TIME_STAMP_INT_ENABLE, 0);
6025 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6026 					    GENERIC0_INT_ENABLE, 0);
6027 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6028 		break;
6029 	case AMDGPU_IRQ_STATE_ENABLE:
6030 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6031 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6032 					    TIME_STAMP_INT_ENABLE, 1);
6033 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6034 					    GENERIC0_INT_ENABLE, 1);
6035 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6036 		break;
6037 	default:
6038 		break;
6039 	}
6040 }
6041 
6042 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6043 						     int me, int pipe,
6044 						     enum amdgpu_interrupt_state state)
6045 {
6046 	u32 mec_int_cntl, mec_int_cntl_reg;
6047 
6048 	/*
6049 	 * amdgpu controls only the first MEC. That's why this function only
6050 	 * handles the setting of interrupts for this specific MEC. All other
6051 	 * pipes' interrupts are set by amdkfd.
6052 	 */
6053 
6054 	if (me == 1) {
6055 		switch (pipe) {
6056 		case 0:
6057 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6058 			break;
6059 		case 1:
6060 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
6061 			break;
6062 		case 2:
6063 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
6064 			break;
6065 		case 3:
6066 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
6067 			break;
6068 		default:
6069 			DRM_DEBUG("invalid pipe %d\n", pipe);
6070 			return;
6071 		}
6072 	} else {
6073 		DRM_DEBUG("invalid me %d\n", me);
6074 		return;
6075 	}
6076 
6077 	switch (state) {
6078 	case AMDGPU_IRQ_STATE_DISABLE:
6079 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6080 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6081 					     TIME_STAMP_INT_ENABLE, 0);
6082 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6083 					     GENERIC0_INT_ENABLE, 0);
6084 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6085 		break;
6086 	case AMDGPU_IRQ_STATE_ENABLE:
6087 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6088 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6089 					     TIME_STAMP_INT_ENABLE, 1);
6090 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6091 					     GENERIC0_INT_ENABLE, 1);
6092 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6093 		break;
6094 	default:
6095 		break;
6096 	}
6097 }
6098 
6099 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6100 					    struct amdgpu_irq_src *src,
6101 					    unsigned type,
6102 					    enum amdgpu_interrupt_state state)
6103 {
6104 	switch (type) {
6105 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6106 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
6107 		break;
6108 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
6109 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
6110 		break;
6111 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6112 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6113 		break;
6114 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6115 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6116 		break;
6117 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6118 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6119 		break;
6120 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6121 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6122 		break;
6123 	default:
6124 		break;
6125 	}
6126 	return 0;
6127 }
6128 
6129 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
6130 			     struct amdgpu_irq_src *source,
6131 			     struct amdgpu_iv_entry *entry)
6132 {
6133 	int i;
6134 	u8 me_id, pipe_id, queue_id;
6135 	struct amdgpu_ring *ring;
6136 	uint32_t mes_queue_id = entry->src_data[0];
6137 
6138 	DRM_DEBUG("IH: CP EOP\n");
6139 
6140 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
6141 		struct amdgpu_mes_queue *queue;
6142 
6143 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
6144 
6145 		spin_lock(&adev->mes.queue_id_lock);
6146 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
6147 		if (queue) {
6148 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
6149 			amdgpu_fence_process(queue->ring);
6150 		}
6151 		spin_unlock(&adev->mes.queue_id_lock);
6152 	} else {
6153 		me_id = (entry->ring_id & 0x0c) >> 2;
6154 		pipe_id = (entry->ring_id & 0x03) >> 0;
6155 		queue_id = (entry->ring_id & 0x70) >> 4;
6156 
6157 		switch (me_id) {
6158 		case 0:
6159 			if (pipe_id == 0)
6160 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6161 			else
6162 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6163 			break;
6164 		case 1:
6165 		case 2:
6166 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6167 				ring = &adev->gfx.compute_ring[i];
6168 				/* Per-queue interrupt is supported for MEC starting from VI.
6169 				 * The interrupt can only be enabled/disabled per pipe instead
6170 				 * of per queue.
6171 				 */
6172 				if ((ring->me == me_id) &&
6173 				    (ring->pipe == pipe_id) &&
6174 				    (ring->queue == queue_id))
6175 					amdgpu_fence_process(ring);
6176 			}
6177 			break;
6178 		}
6179 	}
6180 
6181 	return 0;
6182 }
6183 
6184 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6185 					      struct amdgpu_irq_src *source,
6186 					      unsigned type,
6187 					      enum amdgpu_interrupt_state state)
6188 {
6189 	switch (state) {
6190 	case AMDGPU_IRQ_STATE_DISABLE:
6191 	case AMDGPU_IRQ_STATE_ENABLE:
6192 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
6193 			       PRIV_REG_INT_ENABLE,
6194 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6195 		break;
6196 	default:
6197 		break;
6198 	}
6199 
6200 	return 0;
6201 }
6202 
6203 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6204 					       struct amdgpu_irq_src *source,
6205 					       unsigned type,
6206 					       enum amdgpu_interrupt_state state)
6207 {
6208 	switch (state) {
6209 	case AMDGPU_IRQ_STATE_DISABLE:
6210 	case AMDGPU_IRQ_STATE_ENABLE:
6211 		WREG32_FIELD15_PREREG(GC, 0, CP_INT_CNTL_RING0,
6212 			       PRIV_INSTR_INT_ENABLE,
6213 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6214 		break;
6215 	default:
6216 		break;
6217 	}
6218 
6219 	return 0;
6220 }
6221 
6222 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6223 					struct amdgpu_iv_entry *entry)
6224 {
6225 	u8 me_id, pipe_id, queue_id;
6226 	struct amdgpu_ring *ring;
6227 	int i;
6228 
6229 	me_id = (entry->ring_id & 0x0c) >> 2;
6230 	pipe_id = (entry->ring_id & 0x03) >> 0;
6231 	queue_id = (entry->ring_id & 0x70) >> 4;
6232 
6233 	switch (me_id) {
6234 	case 0:
6235 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6236 			ring = &adev->gfx.gfx_ring[i];
6237 			/* we only enabled 1 gfx queue per pipe for now */
6238 			if (ring->me == me_id && ring->pipe == pipe_id)
6239 				drm_sched_fault(&ring->sched);
6240 		}
6241 		break;
6242 	case 1:
6243 	case 2:
6244 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6245 			ring = &adev->gfx.compute_ring[i];
6246 			if (ring->me == me_id && ring->pipe == pipe_id &&
6247 			    ring->queue == queue_id)
6248 				drm_sched_fault(&ring->sched);
6249 		}
6250 		break;
6251 	default:
6252 		BUG();
6253 		break;
6254 	}
6255 }
6256 
6257 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6258 				  struct amdgpu_irq_src *source,
6259 				  struct amdgpu_iv_entry *entry)
6260 {
6261 	DRM_ERROR("Illegal register access in command stream\n");
6262 	gfx_v11_0_handle_priv_fault(adev, entry);
6263 	return 0;
6264 }
6265 
6266 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6267 				   struct amdgpu_irq_src *source,
6268 				   struct amdgpu_iv_entry *entry)
6269 {
6270 	DRM_ERROR("Illegal instruction in command stream\n");
6271 	gfx_v11_0_handle_priv_fault(adev, entry);
6272 	return 0;
6273 }
6274 
6275 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
6276 				  struct amdgpu_irq_src *source,
6277 				  struct amdgpu_iv_entry *entry)
6278 {
6279 	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
6280 		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
6281 
6282 	return 0;
6283 }
6284 
6285 #if 0
6286 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6287 					     struct amdgpu_irq_src *src,
6288 					     unsigned int type,
6289 					     enum amdgpu_interrupt_state state)
6290 {
6291 	uint32_t tmp, target;
6292 	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6293 
6294 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6295 	target += ring->pipe;
6296 
6297 	switch (type) {
6298 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6299 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6300 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6301 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6302 					    GENERIC2_INT_ENABLE, 0);
6303 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6304 
6305 			tmp = RREG32_SOC15_IP(GC, target);
6306 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6307 					    GENERIC2_INT_ENABLE, 0);
6308 			WREG32_SOC15_IP(GC, target, tmp);
6309 		} else {
6310 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6311 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6312 					    GENERIC2_INT_ENABLE, 1);
6313 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6314 
6315 			tmp = RREG32_SOC15_IP(GC, target);
6316 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6317 					    GENERIC2_INT_ENABLE, 1);
6318 			WREG32_SOC15_IP(GC, target, tmp);
6319 		}
6320 		break;
6321 	default:
6322 		BUG(); /* kiq only support GENERIC2_INT now */
6323 		break;
6324 	}
6325 	return 0;
6326 }
6327 #endif
6328 
6329 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6330 {
6331 	const unsigned int gcr_cntl =
6332 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6333 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6334 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6335 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6336 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6337 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6338 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6339 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6340 
6341 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6342 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6343 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6344 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6345 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6346 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6347 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6348 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6349 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6350 }
6351 
6352 static void gfx_v11_ip_print(void *handle, struct drm_printer *p)
6353 {
6354 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6355 	uint32_t i, j, k, reg, index = 0;
6356 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6357 
6358 	if (!adev->gfx.ip_dump_core)
6359 		return;
6360 
6361 	for (i = 0; i < reg_count; i++)
6362 		drm_printf(p, "%-50s \t 0x%08x\n",
6363 			   gc_reg_list_11_0[i].reg_name,
6364 			   adev->gfx.ip_dump_core[i]);
6365 
6366 	/* print compute queue registers for all instances */
6367 	if (!adev->gfx.ip_dump_cp_queues)
6368 		return;
6369 
6370 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6371 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
6372 		   adev->gfx.mec.num_mec,
6373 		   adev->gfx.mec.num_pipe_per_mec,
6374 		   adev->gfx.mec.num_queue_per_pipe);
6375 
6376 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6377 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6378 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6379 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
6380 				for (reg = 0; reg < reg_count; reg++) {
6381 					drm_printf(p, "%-50s \t 0x%08x\n",
6382 						   gc_cp_reg_list_11[reg].reg_name,
6383 						   adev->gfx.ip_dump_cp_queues[index + reg]);
6384 				}
6385 				index += reg_count;
6386 			}
6387 		}
6388 	}
6389 
6390 	/* print gfx queue registers for all instances */
6391 	if (!adev->gfx.ip_dump_gfx_queues)
6392 		return;
6393 
6394 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6395 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
6396 		   adev->gfx.me.num_me,
6397 		   adev->gfx.me.num_pipe_per_me,
6398 		   adev->gfx.me.num_queue_per_pipe);
6399 
6400 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6401 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6402 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6403 				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
6404 				for (reg = 0; reg < reg_count; reg++) {
6405 					drm_printf(p, "%-50s \t 0x%08x\n",
6406 						   gc_gfx_queue_reg_list_11[reg].reg_name,
6407 						   adev->gfx.ip_dump_gfx_queues[index + reg]);
6408 				}
6409 				index += reg_count;
6410 			}
6411 		}
6412 	}
6413 }
6414 
6415 static void gfx_v11_ip_dump(void *handle)
6416 {
6417 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6418 	uint32_t i, j, k, reg, index = 0;
6419 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6420 
6421 	if (!adev->gfx.ip_dump_core)
6422 		return;
6423 
6424 	amdgpu_gfx_off_ctrl(adev, false);
6425 	for (i = 0; i < reg_count; i++)
6426 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
6427 	amdgpu_gfx_off_ctrl(adev, true);
6428 
6429 	/* dump compute queue registers for all instances */
6430 	if (!adev->gfx.ip_dump_cp_queues)
6431 		return;
6432 
6433 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6434 	amdgpu_gfx_off_ctrl(adev, false);
6435 	mutex_lock(&adev->srbm_mutex);
6436 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6437 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6438 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6439 				/* ME0 is for GFX so start from 1 for CP */
6440 				soc21_grbm_select(adev, 1+i, j, k, 0);
6441 				for (reg = 0; reg < reg_count; reg++) {
6442 					adev->gfx.ip_dump_cp_queues[index + reg] =
6443 						RREG32(SOC15_REG_ENTRY_OFFSET(
6444 							gc_cp_reg_list_11[reg]));
6445 				}
6446 				index += reg_count;
6447 			}
6448 		}
6449 	}
6450 	soc21_grbm_select(adev, 0, 0, 0, 0);
6451 	mutex_unlock(&adev->srbm_mutex);
6452 	amdgpu_gfx_off_ctrl(adev, true);
6453 
6454 	/* dump gfx queue registers for all instances */
6455 	if (!adev->gfx.ip_dump_gfx_queues)
6456 		return;
6457 
6458 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6459 	amdgpu_gfx_off_ctrl(adev, false);
6460 	mutex_lock(&adev->srbm_mutex);
6461 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6462 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6463 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6464 				soc21_grbm_select(adev, i, j, k, 0);
6465 
6466 				for (reg = 0; reg < reg_count; reg++) {
6467 					adev->gfx.ip_dump_gfx_queues[index + reg] =
6468 						RREG32(SOC15_REG_ENTRY_OFFSET(
6469 							gc_gfx_queue_reg_list_11[reg]));
6470 				}
6471 				index += reg_count;
6472 			}
6473 		}
6474 	}
6475 	soc21_grbm_select(adev, 0, 0, 0, 0);
6476 	mutex_unlock(&adev->srbm_mutex);
6477 	amdgpu_gfx_off_ctrl(adev, true);
6478 }
6479 
6480 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6481 	.name = "gfx_v11_0",
6482 	.early_init = gfx_v11_0_early_init,
6483 	.late_init = gfx_v11_0_late_init,
6484 	.sw_init = gfx_v11_0_sw_init,
6485 	.sw_fini = gfx_v11_0_sw_fini,
6486 	.hw_init = gfx_v11_0_hw_init,
6487 	.hw_fini = gfx_v11_0_hw_fini,
6488 	.suspend = gfx_v11_0_suspend,
6489 	.resume = gfx_v11_0_resume,
6490 	.is_idle = gfx_v11_0_is_idle,
6491 	.wait_for_idle = gfx_v11_0_wait_for_idle,
6492 	.soft_reset = gfx_v11_0_soft_reset,
6493 	.check_soft_reset = gfx_v11_0_check_soft_reset,
6494 	.post_soft_reset = gfx_v11_0_post_soft_reset,
6495 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
6496 	.set_powergating_state = gfx_v11_0_set_powergating_state,
6497 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
6498 	.dump_ip_state = gfx_v11_ip_dump,
6499 	.print_ip_state = gfx_v11_ip_print,
6500 };
6501 
6502 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6503 	.type = AMDGPU_RING_TYPE_GFX,
6504 	.align_mask = 0xff,
6505 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6506 	.support_64bit_ptrs = true,
6507 	.secure_submission_supported = true,
6508 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6509 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6510 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6511 	.emit_frame_size = /* totally 247 maximum if 16 IBs */
6512 		5 + /* update_spm_vmid */
6513 		5 + /* COND_EXEC */
6514 		22 + /* SET_Q_PREEMPTION_MODE */
6515 		7 + /* PIPELINE_SYNC */
6516 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6517 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6518 		4 + /* VM_FLUSH */
6519 		8 + /* FENCE for VM_FLUSH */
6520 		20 + /* GDS switch */
6521 		5 + /* COND_EXEC */
6522 		7 + /* HDP_flush */
6523 		4 + /* VGT_flush */
6524 		31 + /*	DE_META */
6525 		3 + /* CNTX_CTRL */
6526 		5 + /* HDP_INVL */
6527 		22 + /* SET_Q_PREEMPTION_MODE */
6528 		8 + 8 + /* FENCE x2 */
6529 		8, /* gfx_v11_0_emit_mem_sync */
6530 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
6531 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6532 	.emit_fence = gfx_v11_0_ring_emit_fence,
6533 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6534 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6535 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6536 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6537 	.test_ring = gfx_v11_0_ring_test_ring,
6538 	.test_ib = gfx_v11_0_ring_test_ib,
6539 	.insert_nop = amdgpu_ring_insert_nop,
6540 	.pad_ib = amdgpu_ring_generic_pad_ib,
6541 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6542 	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
6543 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6544 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
6545 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6546 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6547 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6548 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6549 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6550 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6551 };
6552 
6553 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6554 	.type = AMDGPU_RING_TYPE_COMPUTE,
6555 	.align_mask = 0xff,
6556 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6557 	.support_64bit_ptrs = true,
6558 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6559 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6560 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6561 	.emit_frame_size =
6562 		5 + /* update_spm_vmid */
6563 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6564 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6565 		5 + /* hdp invalidate */
6566 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6567 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6568 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6569 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6570 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6571 		8, /* gfx_v11_0_emit_mem_sync */
6572 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6573 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6574 	.emit_fence = gfx_v11_0_ring_emit_fence,
6575 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6576 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6577 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6578 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6579 	.test_ring = gfx_v11_0_ring_test_ring,
6580 	.test_ib = gfx_v11_0_ring_test_ib,
6581 	.insert_nop = amdgpu_ring_insert_nop,
6582 	.pad_ib = amdgpu_ring_generic_pad_ib,
6583 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6584 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6585 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6586 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6587 };
6588 
6589 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6590 	.type = AMDGPU_RING_TYPE_KIQ,
6591 	.align_mask = 0xff,
6592 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6593 	.support_64bit_ptrs = true,
6594 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6595 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6596 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6597 	.emit_frame_size =
6598 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6599 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6600 		5 + /*hdp invalidate */
6601 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6602 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6603 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6604 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6605 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6606 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6607 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6608 	.test_ring = gfx_v11_0_ring_test_ring,
6609 	.test_ib = gfx_v11_0_ring_test_ib,
6610 	.insert_nop = amdgpu_ring_insert_nop,
6611 	.pad_ib = amdgpu_ring_generic_pad_ib,
6612 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
6613 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6614 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6615 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6616 };
6617 
6618 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6619 {
6620 	int i;
6621 
6622 	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6623 
6624 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6625 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6626 
6627 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6628 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6629 }
6630 
6631 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6632 	.set = gfx_v11_0_set_eop_interrupt_state,
6633 	.process = gfx_v11_0_eop_irq,
6634 };
6635 
6636 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6637 	.set = gfx_v11_0_set_priv_reg_fault_state,
6638 	.process = gfx_v11_0_priv_reg_irq,
6639 };
6640 
6641 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
6642 	.set = gfx_v11_0_set_priv_inst_fault_state,
6643 	.process = gfx_v11_0_priv_inst_irq,
6644 };
6645 
6646 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
6647 	.process = gfx_v11_0_rlc_gc_fed_irq,
6648 };
6649 
6650 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
6651 {
6652 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6653 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
6654 
6655 	adev->gfx.priv_reg_irq.num_types = 1;
6656 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
6657 
6658 	adev->gfx.priv_inst_irq.num_types = 1;
6659 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
6660 
6661 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
6662 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
6663 
6664 }
6665 
6666 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
6667 {
6668 	if (adev->flags & AMD_IS_APU)
6669 		adev->gfx.imu.mode = MISSION_MODE;
6670 	else
6671 		adev->gfx.imu.mode = DEBUG_MODE;
6672 
6673 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
6674 }
6675 
6676 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
6677 {
6678 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
6679 }
6680 
6681 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
6682 {
6683 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
6684 			    adev->gfx.config.max_sh_per_se *
6685 			    adev->gfx.config.max_shader_engines;
6686 
6687 	adev->gds.gds_size = 0x1000;
6688 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
6689 	adev->gds.gws_size = 64;
6690 	adev->gds.oa_size = 16;
6691 }
6692 
6693 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
6694 {
6695 	/* set gfx eng mqd */
6696 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
6697 		sizeof(struct v11_gfx_mqd);
6698 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
6699 		gfx_v11_0_gfx_mqd_init;
6700 	/* set compute eng mqd */
6701 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
6702 		sizeof(struct v11_compute_mqd);
6703 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
6704 		gfx_v11_0_compute_mqd_init;
6705 }
6706 
6707 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
6708 							  u32 bitmap)
6709 {
6710 	u32 data;
6711 
6712 	if (!bitmap)
6713 		return;
6714 
6715 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6716 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6717 
6718 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
6719 }
6720 
6721 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
6722 {
6723 	u32 data, wgp_bitmask;
6724 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
6725 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
6726 
6727 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
6728 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
6729 
6730 	wgp_bitmask =
6731 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
6732 
6733 	return (~data) & wgp_bitmask;
6734 }
6735 
6736 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
6737 {
6738 	u32 wgp_idx, wgp_active_bitmap;
6739 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
6740 
6741 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
6742 	cu_active_bitmap = 0;
6743 
6744 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
6745 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
6746 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
6747 		if (wgp_active_bitmap & (1 << wgp_idx))
6748 			cu_active_bitmap |= cu_bitmap_per_wgp;
6749 	}
6750 
6751 	return cu_active_bitmap;
6752 }
6753 
6754 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
6755 				 struct amdgpu_cu_info *cu_info)
6756 {
6757 	int i, j, k, counter, active_cu_number = 0;
6758 	u32 mask, bitmap;
6759 	unsigned disable_masks[8 * 2];
6760 
6761 	if (!adev || !cu_info)
6762 		return -EINVAL;
6763 
6764 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
6765 
6766 	mutex_lock(&adev->grbm_idx_mutex);
6767 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6768 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6769 			bitmap = i * adev->gfx.config.max_sh_per_se + j;
6770 			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
6771 				continue;
6772 			mask = 1;
6773 			counter = 0;
6774 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
6775 			if (i < 8 && j < 2)
6776 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
6777 					adev, disable_masks[i * 2 + j]);
6778 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
6779 
6780 			/**
6781 			 * GFX11 could support more than 4 SEs, while the bitmap
6782 			 * in cu_info struct is 4x4 and ioctl interface struct
6783 			 * drm_amdgpu_info_device should keep stable.
6784 			 * So we use last two columns of bitmap to store cu mask for
6785 			 * SEs 4 to 7, the layout of the bitmap is as below:
6786 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
6787 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
6788 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
6789 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
6790 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
6791 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
6792 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
6793 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
6794 			 */
6795 			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
6796 
6797 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
6798 				if (bitmap & mask)
6799 					counter++;
6800 
6801 				mask <<= 1;
6802 			}
6803 			active_cu_number += counter;
6804 		}
6805 	}
6806 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
6807 	mutex_unlock(&adev->grbm_idx_mutex);
6808 
6809 	cu_info->number = active_cu_number;
6810 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6811 
6812 	return 0;
6813 }
6814 
6815 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
6816 {
6817 	.type = AMD_IP_BLOCK_TYPE_GFX,
6818 	.major = 11,
6819 	.minor = 0,
6820 	.rev = 0,
6821 	.funcs = &gfx_v11_0_ip_funcs,
6822 };
6823