xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c (revision 2c1ed907520c50326b8f604907a8478b27881a2e)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "imu_v11_0.h"
34 #include "soc21.h"
35 #include "nvd.h"
36 
37 #include "gc/gc_11_0_0_offset.h"
38 #include "gc/gc_11_0_0_sh_mask.h"
39 #include "smuio/smuio_13_0_6_offset.h"
40 #include "smuio/smuio_13_0_6_sh_mask.h"
41 #include "navi10_enum.h"
42 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
43 
44 #include "soc15.h"
45 #include "soc15d.h"
46 #include "clearstate_gfx11.h"
47 #include "v11_structs.h"
48 #include "gfx_v11_0.h"
49 #include "gfx_v11_0_cleaner_shader.h"
50 #include "gfx_v11_0_3.h"
51 #include "nbio_v4_3.h"
52 #include "mes_v11_0.h"
53 
54 #define GFX11_NUM_GFX_RINGS		1
55 #define GFX11_MEC_HPD_SIZE	2048
56 
57 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
58 #define RLC_PG_DELAY_3_DEFAULT_GC_11_0_1	0x1388
59 
60 #define regCGTT_WD_CLK_CTRL		0x5086
61 #define regCGTT_WD_CLK_CTRL_BASE_IDX	1
62 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1	0x4e7e
63 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX	1
64 #define regPC_CONFIG_CNTL_1		0x194d
65 #define regPC_CONFIG_CNTL_1_BASE_IDX	1
66 
67 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin");
69 MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin");
70 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin");
71 MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin");
72 MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin");
73 MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin");
74 MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin");
75 MODULE_FIRMWARE("amdgpu/gc_11_0_1_mec.bin");
76 MODULE_FIRMWARE("amdgpu/gc_11_0_1_rlc.bin");
77 MODULE_FIRMWARE("amdgpu/gc_11_0_2_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/gc_11_0_2_me.bin");
79 MODULE_FIRMWARE("amdgpu/gc_11_0_2_mec.bin");
80 MODULE_FIRMWARE("amdgpu/gc_11_0_2_rlc.bin");
81 MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
83 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
84 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
85 MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
87 MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
88 MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
89 MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin");
90 MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin");
91 MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin");
92 MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin");
93 MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin");
94 MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
95 MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
96 MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin");
98 MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin");
99 MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin");
100 MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin");
101 
102 static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
103 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
104 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
105 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
106 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
107 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
108 	SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
109 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
110 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
111 	SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
112 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
113 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
114 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
115 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
116 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
117 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
118 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
119 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
120 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
121 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
122 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
123 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
124 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
125 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
126 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
127 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
128 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
129 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
130 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
131 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
132 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
133 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
134 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
135 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
136 	SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
137 	SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
138 	SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
139 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
140 	SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
141 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
142 	SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
143 	SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
144 	SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
145 	SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
146 	SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
147 	SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
148 	SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
149 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
150 	SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
151 	SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
152 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
153 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
154 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
155 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
156 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
157 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
158 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
159 	SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
160 	/* cp header registers */
161 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
162 	SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
163 	SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
164 	SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
165 	/* SE status registers */
166 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
167 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
168 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
169 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
170 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
171 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
172 };
173 
174 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
175 	/* compute registers */
176 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
177 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
178 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
179 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
180 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
181 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
182 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
183 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
184 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
185 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
186 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
187 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
188 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
189 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
190 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
191 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
192 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
193 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
194 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
195 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
196 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
197 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
198 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
199 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
200 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
201 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
202 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
203 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
204 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
205 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
206 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
207 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
208 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
209 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
210 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
211 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
212 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
213 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
214 	SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
215 };
216 
217 static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
218 	/* gfx queue registers */
219 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
220 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
221 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
222 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
223 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
224 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
225 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
226 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
227 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
228 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
229 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
230 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
231 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
232 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
233 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
234 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
235 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
236 	SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
237 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
238 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
239 	SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
240 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
241 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
242 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
243 	SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
244 };
245 
246 static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
248 };
249 
250 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
251 {
252 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_GS_NGG_CLK_CTRL, 0x9fff8fff, 0x00000010),
253 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCGTT_WD_CLK_CTRL, 0xffff8fff, 0x00000010),
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, regCPF_GCR_CNTL, 0x0007ffff, 0x0000c200),
255 	SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xffff001b, 0x00f01988),
256 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf0ffffff, 0x00880007),
257 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_ENHANCE_3, 0xfffffffd, 0x00000008),
258 	SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_SC_VRS_SURFACE_CNTL_1, 0xfff891ff, 0x55480100),
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
261 };
262 
263 #define DEFAULT_SH_MEM_CONFIG \
264 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
265 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
266 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
267 
268 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev);
269 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev);
270 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev);
271 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev);
272 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev);
273 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev);
274 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev);
275 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
276                                  struct amdgpu_cu_info *cu_info);
277 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
278 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
279 				   u32 sh_num, u32 instance, int xcc_id);
280 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
281 
282 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
283 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
284 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
285 				     uint32_t val);
286 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
287 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
288 					   uint16_t pasid, uint32_t flush_type,
289 					   bool all_hub, uint8_t dst_sel);
290 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
291 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
292 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
293 				      bool enable);
294 
gfx11_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)295 static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
296 {
297 	struct amdgpu_device *adev = kiq_ring->adev;
298 	u64 shader_mc_addr;
299 
300 	/* Cleaner shader MC address */
301 	shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
302 
303 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
304 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
305 			  PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */
306 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
307 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
308 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
309 	amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
310 	amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
311 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
312 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
313 }
314 
gfx11_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)315 static void gfx11_kiq_map_queues(struct amdgpu_ring *kiq_ring,
316 				 struct amdgpu_ring *ring)
317 {
318 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
319 	uint64_t wptr_addr = ring->wptr_gpu_addr;
320 	uint32_t me = 0, eng_sel = 0;
321 
322 	switch (ring->funcs->type) {
323 	case AMDGPU_RING_TYPE_COMPUTE:
324 		me = 1;
325 		eng_sel = 0;
326 		break;
327 	case AMDGPU_RING_TYPE_GFX:
328 		me = 0;
329 		eng_sel = 4;
330 		break;
331 	case AMDGPU_RING_TYPE_MES:
332 		me = 2;
333 		eng_sel = 5;
334 		break;
335 	default:
336 		WARN_ON(1);
337 	}
338 
339 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
340 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
341 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
342 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
343 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
344 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
345 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
346 			  PACKET3_MAP_QUEUES_ME((me)) |
347 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
348 			  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
349 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
350 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
351 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
352 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
353 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
354 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
355 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
356 }
357 
gfx11_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)358 static void gfx11_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
359 				   struct amdgpu_ring *ring,
360 				   enum amdgpu_unmap_queues_action action,
361 				   u64 gpu_addr, u64 seq)
362 {
363 	struct amdgpu_device *adev = kiq_ring->adev;
364 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
365 
366 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
367 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
368 		return;
369 	}
370 
371 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
372 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
373 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
374 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
375 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
376 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
377 	amdgpu_ring_write(kiq_ring,
378 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
379 
380 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
381 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
382 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
383 		amdgpu_ring_write(kiq_ring, seq);
384 	} else {
385 		amdgpu_ring_write(kiq_ring, 0);
386 		amdgpu_ring_write(kiq_ring, 0);
387 		amdgpu_ring_write(kiq_ring, 0);
388 	}
389 }
390 
gfx11_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)391 static void gfx11_kiq_query_status(struct amdgpu_ring *kiq_ring,
392 				   struct amdgpu_ring *ring,
393 				   u64 addr,
394 				   u64 seq)
395 {
396 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
397 
398 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
399 	amdgpu_ring_write(kiq_ring,
400 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
401 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
402 			  PACKET3_QUERY_STATUS_COMMAND(2));
403 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
404 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
405 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
406 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
407 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
408 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
409 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
410 }
411 
gfx11_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)412 static void gfx11_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
413 				uint16_t pasid, uint32_t flush_type,
414 				bool all_hub)
415 {
416 	gfx_v11_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
417 }
418 
419 static const struct kiq_pm4_funcs gfx_v11_0_kiq_pm4_funcs = {
420 	.kiq_set_resources = gfx11_kiq_set_resources,
421 	.kiq_map_queues = gfx11_kiq_map_queues,
422 	.kiq_unmap_queues = gfx11_kiq_unmap_queues,
423 	.kiq_query_status = gfx11_kiq_query_status,
424 	.kiq_invalidate_tlbs = gfx11_kiq_invalidate_tlbs,
425 	.set_resources_size = 8,
426 	.map_queues_size = 7,
427 	.unmap_queues_size = 6,
428 	.query_status_size = 7,
429 	.invalidate_tlbs_size = 2,
430 };
431 
gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)432 static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
433 {
434 	adev->gfx.kiq[0].pmf = &gfx_v11_0_kiq_pm4_funcs;
435 }
436 
gfx_v11_0_init_golden_registers(struct amdgpu_device * adev)437 static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
438 {
439 	if (amdgpu_sriov_vf(adev))
440 		return;
441 
442 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
443 	case IP_VERSION(11, 0, 1):
444 	case IP_VERSION(11, 0, 4):
445 		soc15_program_register_sequence(adev,
446 						golden_settings_gc_11_0_1,
447 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
448 		break;
449 	default:
450 		break;
451 	}
452 	soc15_program_register_sequence(adev,
453 					golden_settings_gc_11_0,
454 					(const u32)ARRAY_SIZE(golden_settings_gc_11_0));
455 
456 }
457 
gfx_v11_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)458 static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
459 				       bool wc, uint32_t reg, uint32_t val)
460 {
461 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
462 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
463 			  WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
464 	amdgpu_ring_write(ring, reg);
465 	amdgpu_ring_write(ring, 0);
466 	amdgpu_ring_write(ring, val);
467 }
468 
gfx_v11_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)469 static void gfx_v11_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
470 				  int mem_space, int opt, uint32_t addr0,
471 				  uint32_t addr1, uint32_t ref, uint32_t mask,
472 				  uint32_t inv)
473 {
474 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
475 	amdgpu_ring_write(ring,
476 			  /* memory (1) or register (0) */
477 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
478 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
479 			   WAIT_REG_MEM_FUNCTION(3) |  /* equal */
480 			   WAIT_REG_MEM_ENGINE(eng_sel)));
481 
482 	if (mem_space)
483 		BUG_ON(addr0 & 0x3); /* Dword align */
484 	amdgpu_ring_write(ring, addr0);
485 	amdgpu_ring_write(ring, addr1);
486 	amdgpu_ring_write(ring, ref);
487 	amdgpu_ring_write(ring, mask);
488 	amdgpu_ring_write(ring, inv); /* poll interval */
489 }
490 
gfx_v11_ring_insert_nop(struct amdgpu_ring * ring,uint32_t num_nop)491 static void gfx_v11_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
492 {
493 	/* Header itself is a NOP packet */
494 	if (num_nop == 1) {
495 		amdgpu_ring_write(ring, ring->funcs->nop);
496 		return;
497 	}
498 
499 	/* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
500 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
501 
502 	/* Header is at index 0, followed by num_nops - 1 NOP packet's */
503 	amdgpu_ring_insert_nop(ring, num_nop - 1);
504 }
505 
gfx_v11_0_ring_test_ring(struct amdgpu_ring * ring)506 static int gfx_v11_0_ring_test_ring(struct amdgpu_ring *ring)
507 {
508 	struct amdgpu_device *adev = ring->adev;
509 	uint32_t scratch = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
510 	uint32_t tmp = 0;
511 	unsigned i;
512 	int r;
513 
514 	WREG32(scratch, 0xCAFEDEAD);
515 	r = amdgpu_ring_alloc(ring, 5);
516 	if (r) {
517 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
518 			  ring->idx, r);
519 		return r;
520 	}
521 
522 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
523 		gfx_v11_0_ring_emit_wreg(ring, scratch, 0xDEADBEEF);
524 	} else {
525 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
526 		amdgpu_ring_write(ring, scratch -
527 				  PACKET3_SET_UCONFIG_REG_START);
528 		amdgpu_ring_write(ring, 0xDEADBEEF);
529 	}
530 	amdgpu_ring_commit(ring);
531 
532 	for (i = 0; i < adev->usec_timeout; i++) {
533 		tmp = RREG32(scratch);
534 		if (tmp == 0xDEADBEEF)
535 			break;
536 		if (amdgpu_emu_mode == 1)
537 			msleep(1);
538 		else
539 			udelay(1);
540 	}
541 
542 	if (i >= adev->usec_timeout)
543 		r = -ETIMEDOUT;
544 	return r;
545 }
546 
gfx_v11_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)547 static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
548 {
549 	struct amdgpu_device *adev = ring->adev;
550 	struct amdgpu_ib ib;
551 	struct dma_fence *f = NULL;
552 	unsigned index;
553 	uint64_t gpu_addr;
554 	volatile uint32_t *cpu_ptr;
555 	long r;
556 
557 	/* MES KIQ fw hasn't indirect buffer support for now */
558 	if (adev->enable_mes_kiq &&
559 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
560 		return 0;
561 
562 	memset(&ib, 0, sizeof(ib));
563 
564 	if (ring->is_mes_queue) {
565 		uint32_t padding, offset;
566 
567 		offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
568 		padding = amdgpu_mes_ctx_get_offs(ring,
569 						  AMDGPU_MES_CTX_PADDING_OFFS);
570 
571 		ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
572 		ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
573 
574 		gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
575 		cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
576 		*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
577 	} else {
578 		r = amdgpu_device_wb_get(adev, &index);
579 		if (r)
580 			return r;
581 
582 		gpu_addr = adev->wb.gpu_addr + (index * 4);
583 		adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
584 		cpu_ptr = &adev->wb.wb[index];
585 
586 		r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
587 		if (r) {
588 			DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
589 			goto err1;
590 		}
591 	}
592 
593 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
594 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
595 	ib.ptr[2] = lower_32_bits(gpu_addr);
596 	ib.ptr[3] = upper_32_bits(gpu_addr);
597 	ib.ptr[4] = 0xDEADBEEF;
598 	ib.length_dw = 5;
599 
600 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
601 	if (r)
602 		goto err2;
603 
604 	r = dma_fence_wait_timeout(f, false, timeout);
605 	if (r == 0) {
606 		r = -ETIMEDOUT;
607 		goto err2;
608 	} else if (r < 0) {
609 		goto err2;
610 	}
611 
612 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
613 		r = 0;
614 	else
615 		r = -EINVAL;
616 err2:
617 	if (!ring->is_mes_queue)
618 		amdgpu_ib_free(&ib, NULL);
619 	dma_fence_put(f);
620 err1:
621 	if (!ring->is_mes_queue)
622 		amdgpu_device_wb_free(adev, index);
623 	return r;
624 }
625 
gfx_v11_0_free_microcode(struct amdgpu_device * adev)626 static void gfx_v11_0_free_microcode(struct amdgpu_device *adev)
627 {
628 	amdgpu_ucode_release(&adev->gfx.pfp_fw);
629 	amdgpu_ucode_release(&adev->gfx.me_fw);
630 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
631 	amdgpu_ucode_release(&adev->gfx.mec_fw);
632 
633 	kfree(adev->gfx.rlc.register_list_format);
634 }
635 
gfx_v11_0_init_toc_microcode(struct amdgpu_device * adev,const char * ucode_prefix)636 static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
637 {
638 	const struct psp_firmware_header_v1_0 *toc_hdr;
639 	int err = 0;
640 
641 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
642 				   AMDGPU_UCODE_REQUIRED,
643 				   "amdgpu/%s_toc.bin", ucode_prefix);
644 	if (err)
645 		goto out;
646 
647 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
648 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
649 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
650 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
651 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
652 				le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
653 	return 0;
654 out:
655 	amdgpu_ucode_release(&adev->psp.toc_fw);
656 	return err;
657 }
658 
gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device * adev)659 static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
660 {
661 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
662 	case IP_VERSION(11, 0, 0):
663 	case IP_VERSION(11, 0, 2):
664 	case IP_VERSION(11, 0, 3):
665 		if ((adev->gfx.me_fw_version >= 1505) &&
666 		    (adev->gfx.pfp_fw_version >= 1600) &&
667 		    (adev->gfx.mec_fw_version >= 512)) {
668 			if (amdgpu_sriov_vf(adev))
669 				adev->gfx.cp_gfx_shadow = true;
670 			else
671 				adev->gfx.cp_gfx_shadow = false;
672 		}
673 		break;
674 	default:
675 		adev->gfx.cp_gfx_shadow = false;
676 		break;
677 	}
678 }
679 
gfx_v11_0_init_microcode(struct amdgpu_device * adev)680 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
681 {
682 	char ucode_prefix[25];
683 	int err;
684 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
685 	uint16_t version_major;
686 	uint16_t version_minor;
687 
688 	DRM_DEBUG("\n");
689 
690 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
691 	err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
692 				   AMDGPU_UCODE_REQUIRED,
693 				   "amdgpu/%s_pfp.bin", ucode_prefix);
694 	if (err)
695 		goto out;
696 	/* check pfp fw hdr version to decide if enable rs64 for gfx11.*/
697 	adev->gfx.rs64_enable = amdgpu_ucode_hdr_version(
698 				(union amdgpu_firmware_header *)
699 				adev->gfx.pfp_fw->data, 2, 0);
700 	if (adev->gfx.rs64_enable) {
701 		dev_info(adev->dev, "CP RS64 enable\n");
702 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP);
703 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK);
704 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK);
705 	} else {
706 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
707 	}
708 
709 	err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
710 				   AMDGPU_UCODE_REQUIRED,
711 				   "amdgpu/%s_me.bin", ucode_prefix);
712 	if (err)
713 		goto out;
714 	if (adev->gfx.rs64_enable) {
715 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME);
716 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK);
717 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK);
718 	} else {
719 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
720 	}
721 
722 	if (!amdgpu_sriov_vf(adev)) {
723 		if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) &&
724 		    adev->pdev->revision == 0xCE)
725 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
726 						   AMDGPU_UCODE_REQUIRED,
727 						   "amdgpu/gc_11_0_0_rlc_1.bin");
728 		else
729 			err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
730 						   AMDGPU_UCODE_REQUIRED,
731 						   "amdgpu/%s_rlc.bin", ucode_prefix);
732 		if (err)
733 			goto out;
734 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
735 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
736 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
737 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
738 		if (err)
739 			goto out;
740 	}
741 
742 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
743 				   AMDGPU_UCODE_REQUIRED,
744 				   "amdgpu/%s_mec.bin", ucode_prefix);
745 	if (err)
746 		goto out;
747 	if (adev->gfx.rs64_enable) {
748 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
749 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
750 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
751 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
752 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
753 	} else {
754 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
755 		amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
756 	}
757 
758 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
759 		err = gfx_v11_0_init_toc_microcode(adev, ucode_prefix);
760 
761 	/* only one MEC for gfx 11.0.0. */
762 	adev->gfx.mec2_fw = NULL;
763 
764 	gfx_v11_0_check_fw_cp_gfx_shadow(adev);
765 
766 	if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) {
767 		err = adev->gfx.imu.funcs->init_microcode(adev);
768 		if (err)
769 			DRM_ERROR("Failed to init imu firmware!\n");
770 		return err;
771 	}
772 
773 out:
774 	if (err) {
775 		amdgpu_ucode_release(&adev->gfx.pfp_fw);
776 		amdgpu_ucode_release(&adev->gfx.me_fw);
777 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
778 		amdgpu_ucode_release(&adev->gfx.mec_fw);
779 	}
780 
781 	return err;
782 }
783 
gfx_v11_0_get_csb_size(struct amdgpu_device * adev)784 static u32 gfx_v11_0_get_csb_size(struct amdgpu_device *adev)
785 {
786 	u32 count = 0;
787 	const struct cs_section_def *sect = NULL;
788 	const struct cs_extent_def *ext = NULL;
789 
790 	/* begin clear state */
791 	count += 2;
792 	/* context control state */
793 	count += 3;
794 
795 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
796 		for (ext = sect->section; ext->extent != NULL; ++ext) {
797 			if (sect->id == SECT_CONTEXT)
798 				count += 2 + ext->reg_count;
799 			else
800 				return 0;
801 		}
802 	}
803 
804 	/* set PA_SC_TILE_STEERING_OVERRIDE */
805 	count += 3;
806 	/* end clear state */
807 	count += 2;
808 	/* clear state */
809 	count += 2;
810 
811 	return count;
812 }
813 
gfx_v11_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)814 static void gfx_v11_0_get_csb_buffer(struct amdgpu_device *adev,
815 				    volatile u32 *buffer)
816 {
817 	u32 count = 0, i;
818 	const struct cs_section_def *sect = NULL;
819 	const struct cs_extent_def *ext = NULL;
820 	int ctx_reg_offset;
821 
822 	if (adev->gfx.rlc.cs_data == NULL)
823 		return;
824 	if (buffer == NULL)
825 		return;
826 
827 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
828 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
829 
830 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
831 	buffer[count++] = cpu_to_le32(0x80000000);
832 	buffer[count++] = cpu_to_le32(0x80000000);
833 
834 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
835 		for (ext = sect->section; ext->extent != NULL; ++ext) {
836 			if (sect->id == SECT_CONTEXT) {
837 				buffer[count++] =
838 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
839 				buffer[count++] = cpu_to_le32(ext->reg_index -
840 						PACKET3_SET_CONTEXT_REG_START);
841 				for (i = 0; i < ext->reg_count; i++)
842 					buffer[count++] = cpu_to_le32(ext->extent[i]);
843 			} else {
844 				return;
845 			}
846 		}
847 	}
848 
849 	ctx_reg_offset =
850 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
851 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
852 	buffer[count++] = cpu_to_le32(ctx_reg_offset);
853 	buffer[count++] = cpu_to_le32(adev->gfx.config.pa_sc_tile_steering_override);
854 
855 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
856 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
857 
858 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
859 	buffer[count++] = cpu_to_le32(0);
860 }
861 
gfx_v11_0_rlc_fini(struct amdgpu_device * adev)862 static void gfx_v11_0_rlc_fini(struct amdgpu_device *adev)
863 {
864 	/* clear state block */
865 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
866 			&adev->gfx.rlc.clear_state_gpu_addr,
867 			(void **)&adev->gfx.rlc.cs_ptr);
868 
869 	/* jump table block */
870 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
871 			&adev->gfx.rlc.cp_table_gpu_addr,
872 			(void **)&adev->gfx.rlc.cp_table_ptr);
873 }
874 
gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)875 static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
876 {
877 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
878 
879 	reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
880 	reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
881 	reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1);
882 	reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2);
883 	reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG3);
884 	reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL);
885 	reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX);
886 	reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, regRLC_SPARE_INT_0);
887 	adev->gfx.rlc.rlcg_reg_access_supported = true;
888 }
889 
gfx_v11_0_rlc_init(struct amdgpu_device * adev)890 static int gfx_v11_0_rlc_init(struct amdgpu_device *adev)
891 {
892 	const struct cs_section_def *cs_data;
893 	int r;
894 
895 	adev->gfx.rlc.cs_data = gfx11_cs_data;
896 
897 	cs_data = adev->gfx.rlc.cs_data;
898 
899 	if (cs_data) {
900 		/* init clear state block */
901 		r = amdgpu_gfx_rlc_init_csb(adev);
902 		if (r)
903 			return r;
904 	}
905 
906 	/* init spm vmid with 0xf */
907 	if (adev->gfx.rlc.funcs->update_spm_vmid)
908 		adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf);
909 
910 	return 0;
911 }
912 
gfx_v11_0_mec_fini(struct amdgpu_device * adev)913 static void gfx_v11_0_mec_fini(struct amdgpu_device *adev)
914 {
915 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
916 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
917 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
918 }
919 
gfx_v11_0_me_init(struct amdgpu_device * adev)920 static void gfx_v11_0_me_init(struct amdgpu_device *adev)
921 {
922 	bitmap_zero(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
923 
924 	amdgpu_gfx_graphics_queue_acquire(adev);
925 }
926 
gfx_v11_0_mec_init(struct amdgpu_device * adev)927 static int gfx_v11_0_mec_init(struct amdgpu_device *adev)
928 {
929 	int r;
930 	u32 *hpd;
931 	size_t mec_hpd_size;
932 
933 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
934 
935 	/* take ownership of the relevant compute queues */
936 	amdgpu_gfx_compute_queue_acquire(adev);
937 	mec_hpd_size = adev->gfx.num_compute_rings * GFX11_MEC_HPD_SIZE;
938 
939 	if (mec_hpd_size) {
940 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
941 					      AMDGPU_GEM_DOMAIN_GTT,
942 					      &adev->gfx.mec.hpd_eop_obj,
943 					      &adev->gfx.mec.hpd_eop_gpu_addr,
944 					      (void **)&hpd);
945 		if (r) {
946 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
947 			gfx_v11_0_mec_fini(adev);
948 			return r;
949 		}
950 
951 		memset(hpd, 0, mec_hpd_size);
952 
953 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
954 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
955 	}
956 
957 	return 0;
958 }
959 
wave_read_ind(struct amdgpu_device * adev,uint32_t wave,uint32_t address)960 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t wave, uint32_t address)
961 {
962 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
963 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
964 		(address << SQ_IND_INDEX__INDEX__SHIFT));
965 	return RREG32_SOC15(GC, 0, regSQ_IND_DATA);
966 }
967 
wave_read_regs(struct amdgpu_device * adev,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)968 static void wave_read_regs(struct amdgpu_device *adev, uint32_t wave,
969 			   uint32_t thread, uint32_t regno,
970 			   uint32_t num, uint32_t *out)
971 {
972 	WREG32_SOC15(GC, 0, regSQ_IND_INDEX,
973 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
974 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
975 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
976 		(SQ_IND_INDEX__AUTO_INCR_MASK));
977 	while (num--)
978 		*(out++) = RREG32_SOC15(GC, 0, regSQ_IND_DATA);
979 }
980 
gfx_v11_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)981 static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
982 {
983 	/* in gfx11 the SIMD_ID is specified as part of the INSTANCE
984 	 * field when performing a select_se_sh so it should be
985 	 * zero here */
986 	WARN_ON(simd != 0);
987 
988 	/* type 3 wave data */
989 	dst[(*no_fields)++] = 3;
990 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_STATUS);
991 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_LO);
992 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_PC_HI);
993 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_LO);
994 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_EXEC_HI);
995 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID1);
996 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_HW_ID2);
997 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_GPR_ALLOC);
998 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_LDS_ALLOC);
999 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_TRAPSTS);
1000 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS);
1001 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
1002 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
1003 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
1004 	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
1005 }
1006 
gfx_v11_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1007 static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1008 				     uint32_t wave, uint32_t start,
1009 				     uint32_t size, uint32_t *dst)
1010 {
1011 	WARN_ON(simd != 0);
1012 
1013 	wave_read_regs(
1014 		adev, wave, 0, start + SQIND_WAVE_SGPRS_OFFSET, size,
1015 		dst);
1016 }
1017 
gfx_v11_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1018 static void gfx_v11_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1019 				      uint32_t wave, uint32_t thread,
1020 				      uint32_t start, uint32_t size,
1021 				      uint32_t *dst)
1022 {
1023 	wave_read_regs(
1024 		adev, wave, thread,
1025 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1026 }
1027 
gfx_v11_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1028 static void gfx_v11_0_select_me_pipe_q(struct amdgpu_device *adev,
1029 					u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1030 {
1031 	soc21_grbm_select(adev, me, pipe, q, vm);
1032 }
1033 
1034 /* all sizes are in bytes */
1035 #define MQD_SHADOW_BASE_SIZE      73728
1036 #define MQD_SHADOW_BASE_ALIGNMENT 256
1037 #define MQD_FWWORKAREA_SIZE       484
1038 #define MQD_FWWORKAREA_ALIGNMENT  256
1039 
gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device * adev,struct amdgpu_gfx_shadow_info * shadow_info)1040 static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
1041 					 struct amdgpu_gfx_shadow_info *shadow_info)
1042 {
1043 	if (adev->gfx.cp_gfx_shadow) {
1044 		shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
1045 		shadow_info->shadow_alignment = MQD_SHADOW_BASE_ALIGNMENT;
1046 		shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
1047 		shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
1048 		return 0;
1049 	} else {
1050 		memset(shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
1051 		return -ENOTSUPP;
1052 	}
1053 }
1054 
1055 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
1056 	.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
1057 	.select_se_sh = &gfx_v11_0_select_se_sh,
1058 	.read_wave_data = &gfx_v11_0_read_wave_data,
1059 	.read_wave_sgprs = &gfx_v11_0_read_wave_sgprs,
1060 	.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
1061 	.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
1062 	.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
1063 	.get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
1064 };
1065 
gfx_v11_0_gpu_early_init(struct amdgpu_device * adev)1066 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
1067 {
1068 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1069 	case IP_VERSION(11, 0, 0):
1070 	case IP_VERSION(11, 0, 2):
1071 		adev->gfx.config.max_hw_contexts = 8;
1072 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1073 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1074 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1075 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1076 		break;
1077 	case IP_VERSION(11, 0, 3):
1078 		adev->gfx.ras = &gfx_v11_0_3_ras;
1079 		adev->gfx.config.max_hw_contexts = 8;
1080 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1081 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1082 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
1083 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1084 		break;
1085 	case IP_VERSION(11, 0, 1):
1086 	case IP_VERSION(11, 0, 4):
1087 	case IP_VERSION(11, 5, 0):
1088 	case IP_VERSION(11, 5, 1):
1089 	case IP_VERSION(11, 5, 2):
1090 		adev->gfx.config.max_hw_contexts = 8;
1091 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1092 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1093 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1094 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x300;
1095 		break;
1096 	default:
1097 		BUG();
1098 		break;
1099 	}
1100 
1101 	return 0;
1102 }
1103 
gfx_v11_0_gfx_ring_init(struct amdgpu_device * adev,int ring_id,int me,int pipe,int queue)1104 static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
1105 				   int me, int pipe, int queue)
1106 {
1107 	struct amdgpu_ring *ring;
1108 	unsigned int irq_type;
1109 	unsigned int hw_prio;
1110 
1111 	ring = &adev->gfx.gfx_ring[ring_id];
1112 
1113 	ring->me = me;
1114 	ring->pipe = pipe;
1115 	ring->queue = queue;
1116 
1117 	ring->ring_obj = NULL;
1118 	ring->use_doorbell = true;
1119 
1120 	if (!ring_id)
1121 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1122 	else
1123 		ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1;
1124 	ring->vm_hub = AMDGPU_GFXHUB(0);
1125 	sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1126 
1127 	irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
1128 	hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
1129 		AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1130 	return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1131 				hw_prio, NULL);
1132 }
1133 
gfx_v11_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)1134 static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1135 				       int mec, int pipe, int queue)
1136 {
1137 	int r;
1138 	unsigned irq_type;
1139 	struct amdgpu_ring *ring;
1140 	unsigned int hw_prio;
1141 
1142 	ring = &adev->gfx.compute_ring[ring_id];
1143 
1144 	/* mec0 is me1 */
1145 	ring->me = mec + 1;
1146 	ring->pipe = pipe;
1147 	ring->queue = queue;
1148 
1149 	ring->ring_obj = NULL;
1150 	ring->use_doorbell = true;
1151 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1152 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1153 				+ (ring_id * GFX11_MEC_HPD_SIZE);
1154 	ring->vm_hub = AMDGPU_GFXHUB(0);
1155 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1156 
1157 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1158 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1159 		+ ring->pipe;
1160 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
1161 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
1162 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1163 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
1164 			     hw_prio, NULL);
1165 	if (r)
1166 		return r;
1167 
1168 	return 0;
1169 }
1170 
1171 static struct {
1172 	SOC21_FIRMWARE_ID	id;
1173 	unsigned int		offset;
1174 	unsigned int		size;
1175 } rlc_autoload_info[SOC21_FIRMWARE_ID_MAX];
1176 
gfx_v11_0_parse_rlc_toc(struct amdgpu_device * adev,void * rlc_toc)1177 static void gfx_v11_0_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
1178 {
1179 	RLC_TABLE_OF_CONTENT *ucode = rlc_toc;
1180 
1181 	while (ucode && (ucode->id > SOC21_FIRMWARE_ID_INVALID) &&
1182 			(ucode->id < SOC21_FIRMWARE_ID_MAX)) {
1183 		rlc_autoload_info[ucode->id].id = ucode->id;
1184 		rlc_autoload_info[ucode->id].offset = ucode->offset * 4;
1185 		rlc_autoload_info[ucode->id].size = ucode->size * 4;
1186 
1187 		ucode++;
1188 	}
1189 }
1190 
gfx_v11_0_calc_toc_total_size(struct amdgpu_device * adev)1191 static uint32_t gfx_v11_0_calc_toc_total_size(struct amdgpu_device *adev)
1192 {
1193 	uint32_t total_size = 0;
1194 	SOC21_FIRMWARE_ID id;
1195 
1196 	gfx_v11_0_parse_rlc_toc(adev, adev->psp.toc.start_addr);
1197 
1198 	for (id = SOC21_FIRMWARE_ID_RLC_G_UCODE; id < SOC21_FIRMWARE_ID_MAX; id++)
1199 		total_size += rlc_autoload_info[id].size;
1200 
1201 	/* In case the offset in rlc toc ucode is aligned */
1202 	if (total_size < rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset)
1203 		total_size = rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].offset +
1204 			rlc_autoload_info[SOC21_FIRMWARE_ID_MAX-1].size;
1205 
1206 	return total_size;
1207 }
1208 
gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device * adev)1209 static int gfx_v11_0_rlc_autoload_buffer_init(struct amdgpu_device *adev)
1210 {
1211 	int r;
1212 	uint32_t total_size;
1213 
1214 	total_size = gfx_v11_0_calc_toc_total_size(adev);
1215 
1216 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
1217 				      AMDGPU_GEM_DOMAIN_VRAM |
1218 				      AMDGPU_GEM_DOMAIN_GTT,
1219 				      &adev->gfx.rlc.rlc_autoload_bo,
1220 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
1221 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1222 
1223 	if (r) {
1224 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
1225 		return r;
1226 	}
1227 
1228 	return 0;
1229 }
1230 
gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device * adev,SOC21_FIRMWARE_ID id,const void * fw_data,uint32_t fw_size,uint32_t * fw_autoload_mask)1231 static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
1232 					      SOC21_FIRMWARE_ID id,
1233 			    		      const void *fw_data,
1234 					      uint32_t fw_size,
1235 					      uint32_t *fw_autoload_mask)
1236 {
1237 	uint32_t toc_offset;
1238 	uint32_t toc_fw_size;
1239 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
1240 
1241 	if (id <= SOC21_FIRMWARE_ID_INVALID || id >= SOC21_FIRMWARE_ID_MAX)
1242 		return;
1243 
1244 	toc_offset = rlc_autoload_info[id].offset;
1245 	toc_fw_size = rlc_autoload_info[id].size;
1246 
1247 	if (fw_size == 0)
1248 		fw_size = toc_fw_size;
1249 
1250 	if (fw_size > toc_fw_size)
1251 		fw_size = toc_fw_size;
1252 
1253 	memcpy(ptr + toc_offset, fw_data, fw_size);
1254 
1255 	if (fw_size < toc_fw_size)
1256 		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
1257 
1258 	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
1259 		*(uint64_t *)fw_autoload_mask |= 1ULL << id;
1260 }
1261 
gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device * adev,uint32_t * fw_autoload_mask)1262 static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
1263 							uint32_t *fw_autoload_mask)
1264 {
1265 	void *data;
1266 	uint32_t size;
1267 	uint64_t *toc_ptr;
1268 
1269 	*(uint64_t *)fw_autoload_mask |= 0x1;
1270 
1271 	DRM_DEBUG("rlc autoload enabled fw: 0x%llx\n", *(uint64_t *)fw_autoload_mask);
1272 
1273 	data = adev->psp.toc.start_addr;
1274 	size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_TOC].size;
1275 
1276 	toc_ptr = (uint64_t *)data + size / 8 - 1;
1277 	*toc_ptr = *(uint64_t *)fw_autoload_mask;
1278 
1279 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_TOC,
1280 					data, size, fw_autoload_mask);
1281 }
1282 
gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device * adev,uint32_t * fw_autoload_mask)1283 static void gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev,
1284 							uint32_t *fw_autoload_mask)
1285 {
1286 	const __le32 *fw_data;
1287 	uint32_t fw_size;
1288 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1289 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
1290 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
1291 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
1292 	uint16_t version_major, version_minor;
1293 
1294 	if (adev->gfx.rs64_enable) {
1295 		/* pfp ucode */
1296 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1297 			adev->gfx.pfp_fw->data;
1298 		/* instruction */
1299 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1300 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1301 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1302 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP,
1303 						fw_data, fw_size, fw_autoload_mask);
1304 		/* data */
1305 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1306 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1307 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1308 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK,
1309 						fw_data, fw_size, fw_autoload_mask);
1310 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_PFP_P1_STACK,
1311 						fw_data, fw_size, fw_autoload_mask);
1312 		/* me ucode */
1313 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1314 			adev->gfx.me_fw->data;
1315 		/* instruction */
1316 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1317 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1318 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1319 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME,
1320 						fw_data, fw_size, fw_autoload_mask);
1321 		/* data */
1322 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1323 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1324 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1325 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P0_STACK,
1326 						fw_data, fw_size, fw_autoload_mask);
1327 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_ME_P1_STACK,
1328 						fw_data, fw_size, fw_autoload_mask);
1329 		/* mec ucode */
1330 		cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
1331 			adev->gfx.mec_fw->data;
1332 		/* instruction */
1333 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1334 			le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
1335 		fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
1336 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC,
1337 						fw_data, fw_size, fw_autoload_mask);
1338 		/* data */
1339 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1340 			le32_to_cpu(cpv2_hdr->data_offset_bytes));
1341 		fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
1342 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK,
1343 						fw_data, fw_size, fw_autoload_mask);
1344 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P1_STACK,
1345 						fw_data, fw_size, fw_autoload_mask);
1346 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P2_STACK,
1347 						fw_data, fw_size, fw_autoload_mask);
1348 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RS64_MEC_P3_STACK,
1349 						fw_data, fw_size, fw_autoload_mask);
1350 	} else {
1351 		/* pfp ucode */
1352 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1353 			adev->gfx.pfp_fw->data;
1354 		fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
1355 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1356 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1357 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_PFP,
1358 						fw_data, fw_size, fw_autoload_mask);
1359 
1360 		/* me ucode */
1361 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1362 			adev->gfx.me_fw->data;
1363 		fw_data = (const __le32 *)(adev->gfx.me_fw->data +
1364 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1365 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1366 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_ME,
1367 						fw_data, fw_size, fw_autoload_mask);
1368 
1369 		/* mec ucode */
1370 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1371 			adev->gfx.mec_fw->data;
1372 		fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1373 				le32_to_cpu(cp_hdr->header.ucode_array_offset_bytes));
1374 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1375 			cp_hdr->jt_size * 4;
1376 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_CP_MEC,
1377 						fw_data, fw_size, fw_autoload_mask);
1378 	}
1379 
1380 	/* rlc ucode */
1381 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
1382 		adev->gfx.rlc_fw->data;
1383 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1384 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
1385 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
1386 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLC_G_UCODE,
1387 					fw_data, fw_size, fw_autoload_mask);
1388 
1389 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1390 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1391 	if (version_major == 2) {
1392 		if (version_minor >= 2) {
1393 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1394 
1395 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1396 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1397 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1398 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_UCODE,
1399 					fw_data, fw_size, fw_autoload_mask);
1400 
1401 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1402 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1403 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1404 			gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev, SOC21_FIRMWARE_ID_RLX6_DRAM_BOOT,
1405 					fw_data, fw_size, fw_autoload_mask);
1406 		}
1407 	}
1408 }
1409 
gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device * adev,uint32_t * fw_autoload_mask)1410 static void gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev,
1411 							uint32_t *fw_autoload_mask)
1412 {
1413 	const __le32 *fw_data;
1414 	uint32_t fw_size;
1415 	const struct sdma_firmware_header_v2_0 *sdma_hdr;
1416 
1417 	sdma_hdr = (const struct sdma_firmware_header_v2_0 *)
1418 		adev->sdma.instance[0].fw->data;
1419 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1420 			le32_to_cpu(sdma_hdr->header.ucode_array_offset_bytes));
1421 	fw_size = le32_to_cpu(sdma_hdr->ctx_ucode_size_bytes);
1422 
1423 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1424 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH0, fw_data, fw_size, fw_autoload_mask);
1425 
1426 	fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1427 			le32_to_cpu(sdma_hdr->ctl_ucode_offset));
1428 	fw_size = le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes);
1429 
1430 	gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1431 			SOC21_FIRMWARE_ID_SDMA_UCODE_TH1, fw_data, fw_size, fw_autoload_mask);
1432 }
1433 
gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device * adev,uint32_t * fw_autoload_mask)1434 static void gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev,
1435 							uint32_t *fw_autoload_mask)
1436 {
1437 	const __le32 *fw_data;
1438 	unsigned fw_size;
1439 	const struct mes_firmware_header_v1_0 *mes_hdr;
1440 	int pipe, ucode_id, data_id;
1441 
1442 	for (pipe = 0; pipe < 2; pipe++) {
1443 		if (pipe==0) {
1444 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P0;
1445 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P0_STACK;
1446 		} else {
1447 			ucode_id = SOC21_FIRMWARE_ID_RS64_MES_P1;
1448 			data_id  = SOC21_FIRMWARE_ID_RS64_MES_P1_STACK;
1449 		}
1450 
1451 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1452 			adev->mes.fw[pipe]->data;
1453 
1454 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1455 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1456 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1457 
1458 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1459 				ucode_id, fw_data, fw_size, fw_autoload_mask);
1460 
1461 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1462 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1463 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1464 
1465 		gfx_v11_0_rlc_backdoor_autoload_copy_ucode(adev,
1466 				data_id, fw_data, fw_size, fw_autoload_mask);
1467 	}
1468 }
1469 
gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device * adev)1470 static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1471 {
1472 	uint32_t rlc_g_offset, rlc_g_size;
1473 	uint64_t gpu_addr;
1474 	uint32_t autoload_fw_id[2];
1475 
1476 	memset(autoload_fw_id, 0, sizeof(uint32_t) * 2);
1477 
1478 	/* RLC autoload sequence 2: copy ucode */
1479 	gfx_v11_0_rlc_backdoor_autoload_copy_sdma_ucode(adev, autoload_fw_id);
1480 	gfx_v11_0_rlc_backdoor_autoload_copy_gfx_ucode(adev, autoload_fw_id);
1481 	gfx_v11_0_rlc_backdoor_autoload_copy_mes_ucode(adev, autoload_fw_id);
1482 	gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(adev, autoload_fw_id);
1483 
1484 	rlc_g_offset = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].offset;
1485 	rlc_g_size = rlc_autoload_info[SOC21_FIRMWARE_ID_RLC_G_UCODE].size;
1486 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset;
1487 
1488 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_HI, upper_32_bits(gpu_addr));
1489 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_ADDR_LO, lower_32_bits(gpu_addr));
1490 
1491 	WREG32_SOC15(GC, 0, regGFX_IMU_RLC_BOOTLOADER_SIZE, rlc_g_size);
1492 
1493 	/* RLC autoload sequence 3: load IMU fw */
1494 	if (adev->gfx.imu.funcs->load_microcode)
1495 		adev->gfx.imu.funcs->load_microcode(adev);
1496 	/* RLC autoload sequence 4 init IMU fw */
1497 	if (adev->gfx.imu.funcs->setup_imu)
1498 		adev->gfx.imu.funcs->setup_imu(adev);
1499 	if (adev->gfx.imu.funcs->start_imu)
1500 		adev->gfx.imu.funcs->start_imu(adev);
1501 
1502 	/* RLC autoload sequence 5 disable gpa mode */
1503 	gfx_v11_0_disable_gpa_mode(adev);
1504 
1505 	return 0;
1506 }
1507 
gfx_v11_0_alloc_ip_dump(struct amdgpu_device * adev)1508 static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
1509 {
1510 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
1511 	uint32_t *ptr;
1512 	uint32_t inst;
1513 
1514 	ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
1515 	if (!ptr) {
1516 		DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
1517 		adev->gfx.ip_dump_core = NULL;
1518 	} else {
1519 		adev->gfx.ip_dump_core = ptr;
1520 	}
1521 
1522 	/* Allocate memory for compute queue registers for all the instances */
1523 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
1524 	inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
1525 		adev->gfx.mec.num_queue_per_pipe;
1526 
1527 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1528 	if (!ptr) {
1529 		DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
1530 		adev->gfx.ip_dump_compute_queues = NULL;
1531 	} else {
1532 		adev->gfx.ip_dump_compute_queues = ptr;
1533 	}
1534 
1535 	/* Allocate memory for gfx queue registers for all the instances */
1536 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
1537 	inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
1538 		adev->gfx.me.num_queue_per_pipe;
1539 
1540 	ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
1541 	if (!ptr) {
1542 		DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
1543 		adev->gfx.ip_dump_gfx_queues = NULL;
1544 	} else {
1545 		adev->gfx.ip_dump_gfx_queues = ptr;
1546 	}
1547 }
1548 
gfx_v11_0_sw_init(struct amdgpu_ip_block * ip_block)1549 static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
1550 {
1551 	int i, j, k, r, ring_id = 0;
1552 	int xcc_id = 0;
1553 	struct amdgpu_device *adev = ip_block->adev;
1554 
1555 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1556 	case IP_VERSION(11, 0, 0):
1557 	case IP_VERSION(11, 0, 2):
1558 	case IP_VERSION(11, 0, 3):
1559 		adev->gfx.me.num_me = 1;
1560 		adev->gfx.me.num_pipe_per_me = 1;
1561 		adev->gfx.me.num_queue_per_pipe = 1;
1562 		adev->gfx.mec.num_mec = 2;
1563 		adev->gfx.mec.num_pipe_per_mec = 4;
1564 		adev->gfx.mec.num_queue_per_pipe = 4;
1565 		break;
1566 	case IP_VERSION(11, 0, 1):
1567 	case IP_VERSION(11, 0, 4):
1568 	case IP_VERSION(11, 5, 0):
1569 	case IP_VERSION(11, 5, 1):
1570 	case IP_VERSION(11, 5, 2):
1571 		adev->gfx.me.num_me = 1;
1572 		adev->gfx.me.num_pipe_per_me = 1;
1573 		adev->gfx.me.num_queue_per_pipe = 1;
1574 		adev->gfx.mec.num_mec = 1;
1575 		adev->gfx.mec.num_pipe_per_mec = 4;
1576 		adev->gfx.mec.num_queue_per_pipe = 4;
1577 		break;
1578 	default:
1579 		adev->gfx.me.num_me = 1;
1580 		adev->gfx.me.num_pipe_per_me = 1;
1581 		adev->gfx.me.num_queue_per_pipe = 1;
1582 		adev->gfx.mec.num_mec = 1;
1583 		adev->gfx.mec.num_pipe_per_mec = 4;
1584 		adev->gfx.mec.num_queue_per_pipe = 8;
1585 		break;
1586 	}
1587 
1588 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1589 	case IP_VERSION(11, 0, 0):
1590 	case IP_VERSION(11, 0, 2):
1591 	case IP_VERSION(11, 0, 3):
1592 		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
1593 		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
1594 		if (adev->gfx.me_fw_version  >= 2280 &&
1595 		    adev->gfx.pfp_fw_version >= 2370 &&
1596 		    adev->gfx.mec_fw_version >= 2450  &&
1597 		    adev->mes.fw_version[0] >= 99) {
1598 			adev->gfx.enable_cleaner_shader = true;
1599 			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
1600 			if (r) {
1601 				adev->gfx.enable_cleaner_shader = false;
1602 				dev_err(adev->dev, "Failed to initialize cleaner shader\n");
1603 			}
1604 		}
1605 		break;
1606 	default:
1607 		adev->gfx.enable_cleaner_shader = false;
1608 		break;
1609 	}
1610 
1611 	/* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */
1612 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) &&
1613 	    amdgpu_sriov_is_pp_one_vf(adev))
1614 		adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG;
1615 
1616 	/* EOP Event */
1617 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1618 			      GFX_11_0_0__SRCID__CP_EOP_INTERRUPT,
1619 			      &adev->gfx.eop_irq);
1620 	if (r)
1621 		return r;
1622 
1623 	/* Bad opcode Event */
1624 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1625 			      GFX_11_0_0__SRCID__CP_BAD_OPCODE_ERROR,
1626 			      &adev->gfx.bad_op_irq);
1627 	if (r)
1628 		return r;
1629 
1630 	/* Privileged reg */
1631 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1632 			      GFX_11_0_0__SRCID__CP_PRIV_REG_FAULT,
1633 			      &adev->gfx.priv_reg_irq);
1634 	if (r)
1635 		return r;
1636 
1637 	/* Privileged inst */
1638 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GRBM_CP,
1639 			      GFX_11_0_0__SRCID__CP_PRIV_INSTR_FAULT,
1640 			      &adev->gfx.priv_inst_irq);
1641 	if (r)
1642 		return r;
1643 
1644 	/* FED error */
1645 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1646 				  GFX_11_0_0__SRCID__RLC_GC_FED_INTERRUPT,
1647 				  &adev->gfx.rlc_gc_fed_irq);
1648 	if (r)
1649 		return r;
1650 
1651 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1652 
1653 	gfx_v11_0_me_init(adev);
1654 
1655 	r = gfx_v11_0_rlc_init(adev);
1656 	if (r) {
1657 		DRM_ERROR("Failed to init rlc BOs!\n");
1658 		return r;
1659 	}
1660 
1661 	r = gfx_v11_0_mec_init(adev);
1662 	if (r) {
1663 		DRM_ERROR("Failed to init MEC BOs!\n");
1664 		return r;
1665 	}
1666 
1667 	/* set up the gfx ring */
1668 	for (i = 0; i < adev->gfx.me.num_me; i++) {
1669 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
1670 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
1671 				if (!amdgpu_gfx_is_me_queue_enabled(adev, i, k, j))
1672 					continue;
1673 
1674 				r = gfx_v11_0_gfx_ring_init(adev, ring_id,
1675 							    i, k, j);
1676 				if (r)
1677 					return r;
1678 				ring_id++;
1679 			}
1680 		}
1681 	}
1682 
1683 	ring_id = 0;
1684 	/* set up the compute queues - allocate horizontally across pipes */
1685 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1686 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1687 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1688 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
1689 								     k, j))
1690 					continue;
1691 
1692 				r = gfx_v11_0_compute_ring_init(adev, ring_id,
1693 								i, k, j);
1694 				if (r)
1695 					return r;
1696 
1697 				ring_id++;
1698 			}
1699 		}
1700 	}
1701 
1702 	adev->gfx.gfx_supported_reset =
1703 		amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
1704 	adev->gfx.compute_supported_reset =
1705 		amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
1706 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1707 	case IP_VERSION(11, 0, 0):
1708 	case IP_VERSION(11, 0, 2):
1709 	case IP_VERSION(11, 0, 3):
1710 		if ((adev->gfx.me_fw_version >= 2280) &&
1711 			    (adev->gfx.mec_fw_version >= 2410)) {
1712 				adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1713 				adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1714 		}
1715 		break;
1716 	default:
1717 		break;
1718 	}
1719 
1720 	if (!adev->enable_mes_kiq) {
1721 		r = amdgpu_gfx_kiq_init(adev, GFX11_MEC_HPD_SIZE, 0);
1722 		if (r) {
1723 			DRM_ERROR("Failed to init KIQ BOs!\n");
1724 			return r;
1725 		}
1726 
1727 		r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1728 		if (r)
1729 			return r;
1730 	}
1731 
1732 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v11_compute_mqd), 0);
1733 	if (r)
1734 		return r;
1735 
1736 	/* allocate visible FB for rlc auto-loading fw */
1737 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1738 		r = gfx_v11_0_rlc_autoload_buffer_init(adev);
1739 		if (r)
1740 			return r;
1741 	}
1742 
1743 	r = gfx_v11_0_gpu_early_init(adev);
1744 	if (r)
1745 		return r;
1746 
1747 	if (amdgpu_gfx_ras_sw_init(adev)) {
1748 		dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
1749 		return -EINVAL;
1750 	}
1751 
1752 	gfx_v11_0_alloc_ip_dump(adev);
1753 
1754 	r = amdgpu_gfx_sysfs_init(adev);
1755 	if (r)
1756 		return r;
1757 
1758 	return 0;
1759 }
1760 
gfx_v11_0_pfp_fini(struct amdgpu_device * adev)1761 static void gfx_v11_0_pfp_fini(struct amdgpu_device *adev)
1762 {
1763 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_obj,
1764 			      &adev->gfx.pfp.pfp_fw_gpu_addr,
1765 			      (void **)&adev->gfx.pfp.pfp_fw_ptr);
1766 
1767 	amdgpu_bo_free_kernel(&adev->gfx.pfp.pfp_fw_data_obj,
1768 			      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
1769 			      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
1770 }
1771 
gfx_v11_0_me_fini(struct amdgpu_device * adev)1772 static void gfx_v11_0_me_fini(struct amdgpu_device *adev)
1773 {
1774 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
1775 			      &adev->gfx.me.me_fw_gpu_addr,
1776 			      (void **)&adev->gfx.me.me_fw_ptr);
1777 
1778 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_data_obj,
1779 			       &adev->gfx.me.me_fw_data_gpu_addr,
1780 			       (void **)&adev->gfx.me.me_fw_data_ptr);
1781 }
1782 
gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device * adev)1783 static void gfx_v11_0_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1784 {
1785 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1786 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1787 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1788 }
1789 
gfx_v11_0_sw_fini(struct amdgpu_ip_block * ip_block)1790 static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
1791 {
1792 	int i;
1793 	struct amdgpu_device *adev = ip_block->adev;
1794 
1795 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1796 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1797 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1798 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1799 
1800 	amdgpu_gfx_mqd_sw_fini(adev, 0);
1801 
1802 	if (!adev->enable_mes_kiq) {
1803 		amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
1804 		amdgpu_gfx_kiq_fini(adev, 0);
1805 	}
1806 
1807 	amdgpu_gfx_cleaner_shader_sw_fini(adev);
1808 
1809 	gfx_v11_0_pfp_fini(adev);
1810 	gfx_v11_0_me_fini(adev);
1811 	gfx_v11_0_rlc_fini(adev);
1812 	gfx_v11_0_mec_fini(adev);
1813 
1814 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1815 		gfx_v11_0_rlc_autoload_buffer_fini(adev);
1816 
1817 	gfx_v11_0_free_microcode(adev);
1818 
1819 	amdgpu_gfx_sysfs_fini(adev);
1820 
1821 	kfree(adev->gfx.ip_dump_core);
1822 	kfree(adev->gfx.ip_dump_compute_queues);
1823 	kfree(adev->gfx.ip_dump_gfx_queues);
1824 
1825 	return 0;
1826 }
1827 
gfx_v11_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)1828 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1829 				   u32 sh_num, u32 instance, int xcc_id)
1830 {
1831 	u32 data;
1832 
1833 	if (instance == 0xffffffff)
1834 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1835 				     INSTANCE_BROADCAST_WRITES, 1);
1836 	else
1837 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1838 				     instance);
1839 
1840 	if (se_num == 0xffffffff)
1841 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1842 				     1);
1843 	else
1844 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1845 
1846 	if (sh_num == 0xffffffff)
1847 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1848 				     1);
1849 	else
1850 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1851 
1852 	WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, data);
1853 }
1854 
gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device * adev)1855 static u32 gfx_v11_0_get_sa_active_bitmap(struct amdgpu_device *adev)
1856 {
1857 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1858 
1859 	gc_disabled_sa_mask = RREG32_SOC15(GC, 0, regCC_GC_SA_UNIT_DISABLE);
1860 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1861 					   CC_GC_SA_UNIT_DISABLE,
1862 					   SA_DISABLE);
1863 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, 0, regGC_USER_SA_UNIT_DISABLE);
1864 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1865 						 GC_USER_SA_UNIT_DISABLE,
1866 						 SA_DISABLE);
1867 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1868 					    adev->gfx.config.max_shader_engines);
1869 
1870 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1871 }
1872 
gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device * adev)1873 static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1874 {
1875 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1876 	u32 rb_mask;
1877 
1878 	gc_disabled_rb_mask = RREG32_SOC15(GC, 0, regCC_RB_BACKEND_DISABLE);
1879 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1880 					    CC_RB_BACKEND_DISABLE,
1881 					    BACKEND_DISABLE);
1882 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, 0, regGC_USER_RB_BACKEND_DISABLE);
1883 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1884 						 GC_USER_RB_BACKEND_DISABLE,
1885 						 BACKEND_DISABLE);
1886 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1887 					    adev->gfx.config.max_shader_engines);
1888 
1889 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1890 }
1891 
gfx_v11_0_setup_rb(struct amdgpu_device * adev)1892 static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
1893 {
1894 	u32 rb_bitmap_per_sa;
1895 	u32 rb_bitmap_width_per_sa;
1896 	u32 max_sa;
1897 	u32 active_sa_bitmap;
1898 	u32 global_active_rb_bitmap;
1899 	u32 active_rb_bitmap = 0;
1900 	u32 i;
1901 
1902 	/* query sa bitmap from SA_UNIT_DISABLE registers */
1903 	active_sa_bitmap = gfx_v11_0_get_sa_active_bitmap(adev);
1904 	/* query rb bitmap from RB_BACKEND_DISABLE registers */
1905 	global_active_rb_bitmap = gfx_v11_0_get_rb_active_bitmap(adev);
1906 
1907 	/* generate active rb bitmap according to active sa bitmap */
1908 	max_sa = adev->gfx.config.max_shader_engines *
1909 		 adev->gfx.config.max_sh_per_se;
1910 	rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1911 				 adev->gfx.config.max_sh_per_se;
1912 	rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa);
1913 
1914 	for (i = 0; i < max_sa; i++) {
1915 		if (active_sa_bitmap & (1 << i))
1916 			active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa));
1917 	}
1918 
1919 	active_rb_bitmap &= global_active_rb_bitmap;
1920 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1921 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1922 }
1923 
1924 #define DEFAULT_SH_MEM_BASES	(0x6000)
1925 #define LDS_APP_BASE           0x1
1926 #define SCRATCH_APP_BASE       0x2
1927 
gfx_v11_0_init_compute_vmid(struct amdgpu_device * adev)1928 static void gfx_v11_0_init_compute_vmid(struct amdgpu_device *adev)
1929 {
1930 	int i;
1931 	uint32_t sh_mem_bases;
1932 	uint32_t data;
1933 
1934 	/*
1935 	 * Configure apertures:
1936 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1937 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1938 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1939 	 */
1940 	sh_mem_bases = (LDS_APP_BASE << SH_MEM_BASES__SHARED_BASE__SHIFT) |
1941 			SCRATCH_APP_BASE;
1942 
1943 	mutex_lock(&adev->srbm_mutex);
1944 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1945 		soc21_grbm_select(adev, 0, 0, 0, i);
1946 		/* CP and shaders */
1947 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1948 		WREG32_SOC15(GC, 0, regSH_MEM_BASES, sh_mem_bases);
1949 
1950 		/* Enable trap for each kfd vmid. */
1951 		data = RREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL);
1952 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1953 		WREG32_SOC15(GC, 0, regSPI_GDBG_PER_VMID_CNTL, data);
1954 	}
1955 	soc21_grbm_select(adev, 0, 0, 0, 0);
1956 	mutex_unlock(&adev->srbm_mutex);
1957 
1958 	/*
1959 	 * Initialize all compute VMIDs to have no GDS, GWS, or OA
1960 	 * access. These should be enabled by FW for target VMIDs.
1961 	 */
1962 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1963 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * i, 0);
1964 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * i, 0);
1965 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, i, 0);
1966 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, i, 0);
1967 	}
1968 }
1969 
gfx_v11_0_init_gds_vmid(struct amdgpu_device * adev)1970 static void gfx_v11_0_init_gds_vmid(struct amdgpu_device *adev)
1971 {
1972 	int vmid;
1973 
1974 	/*
1975 	 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
1976 	 * access. Compute VMIDs should be enabled by FW for target VMIDs,
1977 	 * the driver can enable them for graphics. VMID0 should maintain
1978 	 * access so that HWS firmware can save/restore entries.
1979 	 */
1980 	for (vmid = 1; vmid < 16; vmid++) {
1981 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_BASE, 2 * vmid, 0);
1982 		WREG32_SOC15_OFFSET(GC, 0, regGDS_VMID0_SIZE, 2 * vmid, 0);
1983 		WREG32_SOC15_OFFSET(GC, 0, regGDS_GWS_VMID0, vmid, 0);
1984 		WREG32_SOC15_OFFSET(GC, 0, regGDS_OA_VMID0, vmid, 0);
1985 	}
1986 }
1987 
gfx_v11_0_tcp_harvest(struct amdgpu_device * adev)1988 static void gfx_v11_0_tcp_harvest(struct amdgpu_device *adev)
1989 {
1990 	/* TODO: harvest feature to be added later. */
1991 }
1992 
gfx_v11_0_get_tcc_info(struct amdgpu_device * adev)1993 static void gfx_v11_0_get_tcc_info(struct amdgpu_device *adev)
1994 {
1995 	/* TCCs are global (not instanced). */
1996 	uint32_t tcc_disable = RREG32_SOC15(GC, 0, regCGTS_TCC_DISABLE) |
1997 			       RREG32_SOC15(GC, 0, regCGTS_USER_TCC_DISABLE);
1998 
1999 	adev->gfx.config.tcc_disabled_mask =
2000 		REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, TCC_DISABLE) |
2001 		(REG_GET_FIELD(tcc_disable, CGTS_TCC_DISABLE, HI_TCC_DISABLE) << 16);
2002 }
2003 
gfx_v11_0_constants_init(struct amdgpu_device * adev)2004 static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
2005 {
2006 	u32 tmp;
2007 	int i;
2008 
2009 	if (!amdgpu_sriov_vf(adev))
2010 		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2011 
2012 	gfx_v11_0_setup_rb(adev);
2013 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
2014 	gfx_v11_0_get_tcc_info(adev);
2015 	adev->gfx.config.pa_sc_tile_steering_override = 0;
2016 
2017 	/* Set whether texture coordinate truncation is conformant. */
2018 	tmp = RREG32_SOC15(GC, 0, regTA_CNTL2);
2019 	adev->gfx.config.ta_cntl2_truncate_coord_mode =
2020 		REG_GET_FIELD(tmp, TA_CNTL2, TRUNCATE_COORD_MODE);
2021 
2022 	/* XXX SH_MEM regs */
2023 	/* where to put LDS, scratch, GPUVM in FSA64 space */
2024 	mutex_lock(&adev->srbm_mutex);
2025 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2026 		soc21_grbm_select(adev, 0, 0, 0, i);
2027 		/* CP and shaders */
2028 		WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
2029 		if (i != 0) {
2030 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2031 				(adev->gmc.private_aperture_start >> 48));
2032 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2033 				(adev->gmc.shared_aperture_start >> 48));
2034 			WREG32_SOC15(GC, 0, regSH_MEM_BASES, tmp);
2035 		}
2036 	}
2037 	soc21_grbm_select(adev, 0, 0, 0, 0);
2038 
2039 	mutex_unlock(&adev->srbm_mutex);
2040 
2041 	gfx_v11_0_init_compute_vmid(adev);
2042 	gfx_v11_0_init_gds_vmid(adev);
2043 }
2044 
gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device * adev,int me,int pipe)2045 static u32 gfx_v11_0_get_cpg_int_cntl(struct amdgpu_device *adev,
2046 				      int me, int pipe)
2047 {
2048 	if (me != 0)
2049 		return 0;
2050 
2051 	switch (pipe) {
2052 	case 0:
2053 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
2054 	case 1:
2055 		return SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
2056 	default:
2057 		return 0;
2058 	}
2059 }
2060 
gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device * adev,int me,int pipe)2061 static u32 gfx_v11_0_get_cpc_int_cntl(struct amdgpu_device *adev,
2062 				      int me, int pipe)
2063 {
2064 	/*
2065 	 * amdgpu controls only the first MEC. That's why this function only
2066 	 * handles the setting of interrupts for this specific MEC. All other
2067 	 * pipes' interrupts are set by amdkfd.
2068 	 */
2069 	if (me != 1)
2070 		return 0;
2071 
2072 	switch (pipe) {
2073 	case 0:
2074 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
2075 	case 1:
2076 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
2077 	case 2:
2078 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
2079 	case 3:
2080 		return SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
2081 	default:
2082 		return 0;
2083 	}
2084 }
2085 
gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2086 static void gfx_v11_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2087 					       bool enable)
2088 {
2089 	u32 tmp, cp_int_cntl_reg;
2090 	int i, j;
2091 
2092 	if (amdgpu_sriov_vf(adev))
2093 		return;
2094 
2095 	for (i = 0; i < adev->gfx.me.num_me; i++) {
2096 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
2097 			cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
2098 
2099 			if (cp_int_cntl_reg) {
2100 				tmp = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
2101 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
2102 						    enable ? 1 : 0);
2103 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
2104 						    enable ? 1 : 0);
2105 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
2106 						    enable ? 1 : 0);
2107 				tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
2108 						    enable ? 1 : 0);
2109 				WREG32_SOC15_IP(GC, cp_int_cntl_reg, tmp);
2110 			}
2111 		}
2112 	}
2113 }
2114 
gfx_v11_0_init_csb(struct amdgpu_device * adev)2115 static int gfx_v11_0_init_csb(struct amdgpu_device *adev)
2116 {
2117 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2118 
2119 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_HI,
2120 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2121 	WREG32_SOC15(GC, 0, regRLC_CSIB_ADDR_LO,
2122 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2123 	WREG32_SOC15(GC, 0, regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
2124 
2125 	return 0;
2126 }
2127 
gfx_v11_0_rlc_stop(struct amdgpu_device * adev)2128 static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
2129 {
2130 	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
2131 
2132 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2133 	WREG32_SOC15(GC, 0, regRLC_CNTL, tmp);
2134 }
2135 
gfx_v11_0_rlc_reset(struct amdgpu_device * adev)2136 static void gfx_v11_0_rlc_reset(struct amdgpu_device *adev)
2137 {
2138 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2139 	udelay(50);
2140 	WREG32_FIELD15_PREREG(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2141 	udelay(50);
2142 }
2143 
gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device * adev,bool enable)2144 static void gfx_v11_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
2145 					     bool enable)
2146 {
2147 	uint32_t rlc_pg_cntl;
2148 
2149 	rlc_pg_cntl = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
2150 
2151 	if (!enable) {
2152 		/* RLC_PG_CNTL[23] = 0 (default)
2153 		 * RLC will wait for handshake acks with SMU
2154 		 * GFXOFF will be enabled
2155 		 * RLC_PG_CNTL[23] = 1
2156 		 * RLC will not issue any message to SMU
2157 		 * hence no handshake between SMU & RLC
2158 		 * GFXOFF will be disabled
2159 		 */
2160 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2161 	} else
2162 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
2163 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, rlc_pg_cntl);
2164 }
2165 
gfx_v11_0_rlc_start(struct amdgpu_device * adev)2166 static void gfx_v11_0_rlc_start(struct amdgpu_device *adev)
2167 {
2168 	/* TODO: enable rlc & smu handshake until smu
2169 	 * and gfxoff feature works as expected */
2170 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
2171 		gfx_v11_0_rlc_smu_handshake_cntl(adev, false);
2172 
2173 	WREG32_FIELD15_PREREG(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2174 	udelay(50);
2175 }
2176 
gfx_v11_0_rlc_enable_srm(struct amdgpu_device * adev)2177 static void gfx_v11_0_rlc_enable_srm(struct amdgpu_device *adev)
2178 {
2179 	uint32_t tmp;
2180 
2181 	/* enable Save Restore Machine */
2182 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL));
2183 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2184 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
2185 	WREG32(SOC15_REG_OFFSET(GC, 0, regRLC_SRM_CNTL), tmp);
2186 }
2187 
gfx_v11_0_load_rlcg_microcode(struct amdgpu_device * adev)2188 static void gfx_v11_0_load_rlcg_microcode(struct amdgpu_device *adev)
2189 {
2190 	const struct rlc_firmware_header_v2_0 *hdr;
2191 	const __le32 *fw_data;
2192 	unsigned i, fw_size;
2193 
2194 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2195 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2196 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2197 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2198 
2199 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR,
2200 		     RLCG_UCODE_LOADING_START_ADDRESS);
2201 
2202 	for (i = 0; i < fw_size; i++)
2203 		WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_DATA,
2204 			     le32_to_cpup(fw_data++));
2205 
2206 	WREG32_SOC15(GC, 0, regRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2207 }
2208 
gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device * adev)2209 static void gfx_v11_0_load_rlc_iram_dram_microcode(struct amdgpu_device *adev)
2210 {
2211 	const struct rlc_firmware_header_v2_2 *hdr;
2212 	const __le32 *fw_data;
2213 	unsigned i, fw_size;
2214 	u32 tmp;
2215 
2216 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
2217 
2218 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2219 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
2220 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
2221 
2222 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, 0);
2223 
2224 	for (i = 0; i < fw_size; i++) {
2225 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2226 			msleep(1);
2227 		WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_DATA,
2228 				le32_to_cpup(fw_data++));
2229 	}
2230 
2231 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2232 
2233 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2234 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
2235 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
2236 
2237 	WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_ADDR, 0);
2238 	for (i = 0; i < fw_size; i++) {
2239 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2240 			msleep(1);
2241 		WREG32_SOC15(GC, 0, regRLC_LX6_DRAM_DATA,
2242 				le32_to_cpup(fw_data++));
2243 	}
2244 
2245 	WREG32_SOC15(GC, 0, regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
2246 
2247 	tmp = RREG32_SOC15(GC, 0, regRLC_LX6_CNTL);
2248 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
2249 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
2250 	WREG32_SOC15(GC, 0, regRLC_LX6_CNTL, tmp);
2251 }
2252 
gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device * adev)2253 static void gfx_v11_0_load_rlcp_rlcv_microcode(struct amdgpu_device *adev)
2254 {
2255 	const struct rlc_firmware_header_v2_3 *hdr;
2256 	const __le32 *fw_data;
2257 	unsigned i, fw_size;
2258 	u32 tmp;
2259 
2260 	hdr = (const struct rlc_firmware_header_v2_3 *)adev->gfx.rlc_fw->data;
2261 
2262 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2263 			le32_to_cpu(hdr->rlcp_ucode_offset_bytes));
2264 	fw_size = le32_to_cpu(hdr->rlcp_ucode_size_bytes) / 4;
2265 
2266 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, 0);
2267 
2268 	for (i = 0; i < fw_size; i++) {
2269 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2270 			msleep(1);
2271 		WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_DATA,
2272 				le32_to_cpup(fw_data++));
2273 	}
2274 
2275 	WREG32_SOC15(GC, 0, regRLC_PACE_UCODE_ADDR, adev->gfx.rlc_fw_version);
2276 
2277 	tmp = RREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE);
2278 	tmp = REG_SET_FIELD(tmp, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
2279 	WREG32_SOC15(GC, 0, regRLC_GPM_THREAD_ENABLE, tmp);
2280 
2281 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2282 			le32_to_cpu(hdr->rlcv_ucode_offset_bytes));
2283 	fw_size = le32_to_cpu(hdr->rlcv_ucode_size_bytes) / 4;
2284 
2285 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, 0);
2286 
2287 	for (i = 0; i < fw_size; i++) {
2288 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
2289 			msleep(1);
2290 		WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_DATA,
2291 				le32_to_cpup(fw_data++));
2292 	}
2293 
2294 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_UCODE_ADDR, adev->gfx.rlc_fw_version);
2295 
2296 	tmp = RREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL);
2297 	tmp = REG_SET_FIELD(tmp, RLC_GPU_IOV_F32_CNTL, ENABLE, 1);
2298 	WREG32_SOC15(GC, 0, regRLC_GPU_IOV_F32_CNTL, tmp);
2299 }
2300 
gfx_v11_0_rlc_load_microcode(struct amdgpu_device * adev)2301 static int gfx_v11_0_rlc_load_microcode(struct amdgpu_device *adev)
2302 {
2303 	const struct rlc_firmware_header_v2_0 *hdr;
2304 	uint16_t version_major;
2305 	uint16_t version_minor;
2306 
2307 	if (!adev->gfx.rlc_fw)
2308 		return -EINVAL;
2309 
2310 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2311 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2312 
2313 	version_major = le16_to_cpu(hdr->header.header_version_major);
2314 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
2315 
2316 	if (version_major == 2) {
2317 		gfx_v11_0_load_rlcg_microcode(adev);
2318 		if (amdgpu_dpm == 1) {
2319 			if (version_minor >= 2)
2320 				gfx_v11_0_load_rlc_iram_dram_microcode(adev);
2321 			if (version_minor == 3)
2322 				gfx_v11_0_load_rlcp_rlcv_microcode(adev);
2323 		}
2324 
2325 		return 0;
2326 	}
2327 
2328 	return -EINVAL;
2329 }
2330 
gfx_v11_0_rlc_resume(struct amdgpu_device * adev)2331 static int gfx_v11_0_rlc_resume(struct amdgpu_device *adev)
2332 {
2333 	int r;
2334 
2335 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
2336 		gfx_v11_0_init_csb(adev);
2337 
2338 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
2339 			gfx_v11_0_rlc_enable_srm(adev);
2340 	} else {
2341 		if (amdgpu_sriov_vf(adev)) {
2342 			gfx_v11_0_init_csb(adev);
2343 			return 0;
2344 		}
2345 
2346 		adev->gfx.rlc.funcs->stop(adev);
2347 
2348 		/* disable CG */
2349 		WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, 0);
2350 
2351 		/* disable PG */
2352 		WREG32_SOC15(GC, 0, regRLC_PG_CNTL, 0);
2353 
2354 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2355 			/* legacy rlc firmware loading */
2356 			r = gfx_v11_0_rlc_load_microcode(adev);
2357 			if (r)
2358 				return r;
2359 		}
2360 
2361 		gfx_v11_0_init_csb(adev);
2362 
2363 		adev->gfx.rlc.funcs->start(adev);
2364 	}
2365 	return 0;
2366 }
2367 
gfx_v11_0_config_me_cache(struct amdgpu_device * adev,uint64_t addr)2368 static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
2369 {
2370 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2371 	uint32_t tmp;
2372 	int i;
2373 
2374 	/* Trigger an invalidation of the L1 instruction caches */
2375 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2376 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2377 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2378 
2379 	/* Wait for invalidation complete */
2380 	for (i = 0; i < usec_timeout; i++) {
2381 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2382 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2383 					INVALIDATE_CACHE_COMPLETE))
2384 			break;
2385 		udelay(1);
2386 	}
2387 
2388 	if (i >= usec_timeout) {
2389 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2390 		return -EINVAL;
2391 	}
2392 
2393 	if (amdgpu_emu_mode == 1)
2394 		adev->hdp.funcs->flush_hdp(adev, NULL);
2395 
2396 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2397 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2398 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2399 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2400 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2401 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2402 
2403 	/* Program me ucode address into intruction cache address register */
2404 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2405 			lower_32_bits(addr) & 0xFFFFF000);
2406 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2407 			upper_32_bits(addr));
2408 
2409 	return 0;
2410 }
2411 
gfx_v11_0_config_pfp_cache(struct amdgpu_device * adev,uint64_t addr)2412 static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
2413 {
2414 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2415 	uint32_t tmp;
2416 	int i;
2417 
2418 	/* Trigger an invalidation of the L1 instruction caches */
2419 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2420 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2421 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2422 
2423 	/* Wait for invalidation complete */
2424 	for (i = 0; i < usec_timeout; i++) {
2425 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2426 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2427 					INVALIDATE_CACHE_COMPLETE))
2428 			break;
2429 		udelay(1);
2430 	}
2431 
2432 	if (i >= usec_timeout) {
2433 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2434 		return -EINVAL;
2435 	}
2436 
2437 	if (amdgpu_emu_mode == 1)
2438 		adev->hdp.funcs->flush_hdp(adev, NULL);
2439 
2440 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2441 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2442 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2443 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2444 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2445 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2446 
2447 	/* Program pfp ucode address into intruction cache address register */
2448 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2449 			lower_32_bits(addr) & 0xFFFFF000);
2450 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2451 			upper_32_bits(addr));
2452 
2453 	return 0;
2454 }
2455 
gfx_v11_0_config_mec_cache(struct amdgpu_device * adev,uint64_t addr)2456 static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
2457 {
2458 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2459 	uint32_t tmp;
2460 	int i;
2461 
2462 	/* Trigger an invalidation of the L1 instruction caches */
2463 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2464 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2465 
2466 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2467 
2468 	/* Wait for invalidation complete */
2469 	for (i = 0; i < usec_timeout; i++) {
2470 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2471 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2472 					INVALIDATE_CACHE_COMPLETE))
2473 			break;
2474 		udelay(1);
2475 	}
2476 
2477 	if (i >= usec_timeout) {
2478 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2479 		return -EINVAL;
2480 	}
2481 
2482 	if (amdgpu_emu_mode == 1)
2483 		adev->hdp.funcs->flush_hdp(adev, NULL);
2484 
2485 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2486 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2487 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2488 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, ADDRESS_CLAMP, 1);
2489 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2490 
2491 	/* Program mec1 ucode address into intruction cache address register */
2492 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO,
2493 			lower_32_bits(addr) & 0xFFFFF000);
2494 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2495 			upper_32_bits(addr));
2496 
2497 	return 0;
2498 }
2499 
gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device * adev,uint64_t addr,uint64_t addr2)2500 static int gfx_v11_0_config_pfp_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2501 {
2502 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2503 	uint32_t tmp;
2504 	unsigned i, pipe_id;
2505 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2506 
2507 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2508 		adev->gfx.pfp_fw->data;
2509 
2510 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
2511 		lower_32_bits(addr));
2512 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
2513 		upper_32_bits(addr));
2514 
2515 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
2516 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
2517 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
2518 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
2519 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
2520 
2521 	/*
2522 	 * Programming any of the CP_PFP_IC_BASE registers
2523 	 * forces invalidation of the ME L1 I$. Wait for the
2524 	 * invalidation complete
2525 	 */
2526 	for (i = 0; i < usec_timeout; i++) {
2527 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2528 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2529 			INVALIDATE_CACHE_COMPLETE))
2530 			break;
2531 		udelay(1);
2532 	}
2533 
2534 	if (i >= usec_timeout) {
2535 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2536 		return -EINVAL;
2537 	}
2538 
2539 	/* Prime the L1 instruction caches */
2540 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2541 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
2542 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
2543 	/* Waiting for cache primed*/
2544 	for (i = 0; i < usec_timeout; i++) {
2545 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
2546 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
2547 			ICACHE_PRIMED))
2548 			break;
2549 		udelay(1);
2550 	}
2551 
2552 	if (i >= usec_timeout) {
2553 		dev_err(adev->dev, "failed to prime instruction cache\n");
2554 		return -EINVAL;
2555 	}
2556 
2557 	mutex_lock(&adev->srbm_mutex);
2558 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2559 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2560 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2561 			(pfp_hdr->ucode_start_addr_hi << 30) |
2562 			(pfp_hdr->ucode_start_addr_lo >> 2));
2563 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2564 			pfp_hdr->ucode_start_addr_hi >> 2);
2565 
2566 		/*
2567 		 * Program CP_ME_CNTL to reset given PIPE to take
2568 		 * effect of CP_PFP_PRGRM_CNTR_START.
2569 		 */
2570 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2571 		if (pipe_id == 0)
2572 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2573 					PFP_PIPE0_RESET, 1);
2574 		else
2575 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2576 					PFP_PIPE1_RESET, 1);
2577 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2578 
2579 		/* Clear pfp pipe0 reset bit. */
2580 		if (pipe_id == 0)
2581 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2582 					PFP_PIPE0_RESET, 0);
2583 		else
2584 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2585 					PFP_PIPE1_RESET, 0);
2586 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2587 
2588 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
2589 			lower_32_bits(addr2));
2590 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
2591 			upper_32_bits(addr2));
2592 	}
2593 	soc21_grbm_select(adev, 0, 0, 0, 0);
2594 	mutex_unlock(&adev->srbm_mutex);
2595 
2596 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2597 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2598 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2599 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2600 
2601 	/* Invalidate the data caches */
2602 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2603 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2604 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2605 
2606 	for (i = 0; i < usec_timeout; i++) {
2607 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2608 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2609 			INVALIDATE_DCACHE_COMPLETE))
2610 			break;
2611 		udelay(1);
2612 	}
2613 
2614 	if (i >= usec_timeout) {
2615 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2616 		return -EINVAL;
2617 	}
2618 
2619 	return 0;
2620 }
2621 
gfx_v11_0_config_me_cache_rs64(struct amdgpu_device * adev,uint64_t addr,uint64_t addr2)2622 static int gfx_v11_0_config_me_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2623 {
2624 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2625 	uint32_t tmp;
2626 	unsigned i, pipe_id;
2627 	const struct gfx_firmware_header_v2_0 *me_hdr;
2628 
2629 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2630 		adev->gfx.me_fw->data;
2631 
2632 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
2633 		lower_32_bits(addr));
2634 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
2635 		upper_32_bits(addr));
2636 
2637 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
2638 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
2639 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
2640 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
2641 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
2642 
2643 	/*
2644 	 * Programming any of the CP_ME_IC_BASE registers
2645 	 * forces invalidation of the ME L1 I$. Wait for the
2646 	 * invalidation complete
2647 	 */
2648 	for (i = 0; i < usec_timeout; i++) {
2649 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2650 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2651 			INVALIDATE_CACHE_COMPLETE))
2652 			break;
2653 		udelay(1);
2654 	}
2655 
2656 	if (i >= usec_timeout) {
2657 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2658 		return -EINVAL;
2659 	}
2660 
2661 	/* Prime the instruction caches */
2662 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2663 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
2664 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
2665 
2666 	/* Waiting for instruction cache primed*/
2667 	for (i = 0; i < usec_timeout; i++) {
2668 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
2669 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
2670 			ICACHE_PRIMED))
2671 			break;
2672 		udelay(1);
2673 	}
2674 
2675 	if (i >= usec_timeout) {
2676 		dev_err(adev->dev, "failed to prime instruction cache\n");
2677 		return -EINVAL;
2678 	}
2679 
2680 	mutex_lock(&adev->srbm_mutex);
2681 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
2682 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2683 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2684 			(me_hdr->ucode_start_addr_hi << 30) |
2685 			(me_hdr->ucode_start_addr_lo >> 2) );
2686 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2687 			me_hdr->ucode_start_addr_hi>>2);
2688 
2689 		/*
2690 		 * Program CP_ME_CNTL to reset given PIPE to take
2691 		 * effect of CP_PFP_PRGRM_CNTR_START.
2692 		 */
2693 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2694 		if (pipe_id == 0)
2695 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2696 					ME_PIPE0_RESET, 1);
2697 		else
2698 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2699 					ME_PIPE1_RESET, 1);
2700 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2701 
2702 		/* Clear pfp pipe0 reset bit. */
2703 		if (pipe_id == 0)
2704 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2705 					ME_PIPE0_RESET, 0);
2706 		else
2707 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
2708 					ME_PIPE1_RESET, 0);
2709 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2710 
2711 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
2712 			lower_32_bits(addr2));
2713 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
2714 			upper_32_bits(addr2));
2715 	}
2716 	soc21_grbm_select(adev, 0, 0, 0, 0);
2717 	mutex_unlock(&adev->srbm_mutex);
2718 
2719 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
2720 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
2721 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
2722 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
2723 
2724 	/* Invalidate the data caches */
2725 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2726 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2727 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
2728 
2729 	for (i = 0; i < usec_timeout; i++) {
2730 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
2731 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
2732 			INVALIDATE_DCACHE_COMPLETE))
2733 			break;
2734 		udelay(1);
2735 	}
2736 
2737 	if (i >= usec_timeout) {
2738 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
2739 		return -EINVAL;
2740 	}
2741 
2742 	return 0;
2743 }
2744 
gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device * adev,uint64_t addr,uint64_t addr2)2745 static int gfx_v11_0_config_mec_cache_rs64(struct amdgpu_device *adev, uint64_t addr, uint64_t addr2)
2746 {
2747 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
2748 	uint32_t tmp;
2749 	unsigned i;
2750 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2751 
2752 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2753 		adev->gfx.mec_fw->data;
2754 
2755 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
2756 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2757 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2758 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2759 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
2760 
2761 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
2762 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2763 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2764 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
2765 
2766 	mutex_lock(&adev->srbm_mutex);
2767 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2768 		soc21_grbm_select(adev, 1, i, 0, 0);
2769 
2770 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, addr2);
2771 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
2772 		     upper_32_bits(addr2));
2773 
2774 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2775 					mec_hdr->ucode_start_addr_lo >> 2 |
2776 					mec_hdr->ucode_start_addr_hi << 30);
2777 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2778 					mec_hdr->ucode_start_addr_hi >> 2);
2779 
2780 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, addr);
2781 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
2782 		     upper_32_bits(addr));
2783 	}
2784 	mutex_unlock(&adev->srbm_mutex);
2785 	soc21_grbm_select(adev, 0, 0, 0, 0);
2786 
2787 	/* Trigger an invalidation of the L1 instruction caches */
2788 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2789 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2790 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
2791 
2792 	/* Wait for invalidation complete */
2793 	for (i = 0; i < usec_timeout; i++) {
2794 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
2795 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2796 				       INVALIDATE_DCACHE_COMPLETE))
2797 			break;
2798 		udelay(1);
2799 	}
2800 
2801 	if (i >= usec_timeout) {
2802 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2803 		return -EINVAL;
2804 	}
2805 
2806 	/* Trigger an invalidation of the L1 instruction caches */
2807 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2808 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2809 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
2810 
2811 	/* Wait for invalidation complete */
2812 	for (i = 0; i < usec_timeout; i++) {
2813 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
2814 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2815 				       INVALIDATE_CACHE_COMPLETE))
2816 			break;
2817 		udelay(1);
2818 	}
2819 
2820 	if (i >= usec_timeout) {
2821 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2822 		return -EINVAL;
2823 	}
2824 
2825 	return 0;
2826 }
2827 
gfx_v11_0_config_gfx_rs64(struct amdgpu_device * adev)2828 static void gfx_v11_0_config_gfx_rs64(struct amdgpu_device *adev)
2829 {
2830 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
2831 	const struct gfx_firmware_header_v2_0 *me_hdr;
2832 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2833 	uint32_t pipe_id, tmp;
2834 
2835 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
2836 		adev->gfx.mec_fw->data;
2837 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
2838 		adev->gfx.me_fw->data;
2839 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
2840 		adev->gfx.pfp_fw->data;
2841 
2842 	/* config pfp program start addr */
2843 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2844 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2845 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
2846 			(pfp_hdr->ucode_start_addr_hi << 30) |
2847 			(pfp_hdr->ucode_start_addr_lo >> 2));
2848 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
2849 			pfp_hdr->ucode_start_addr_hi >> 2);
2850 	}
2851 	soc21_grbm_select(adev, 0, 0, 0, 0);
2852 
2853 	/* reset pfp pipe */
2854 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2855 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 1);
2856 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 1);
2857 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2858 
2859 	/* clear pfp pipe reset */
2860 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, 0);
2861 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, 0);
2862 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2863 
2864 	/* config me program start addr */
2865 	for (pipe_id = 0; pipe_id < 2; pipe_id++) {
2866 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
2867 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
2868 			(me_hdr->ucode_start_addr_hi << 30) |
2869 			(me_hdr->ucode_start_addr_lo >> 2) );
2870 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
2871 			me_hdr->ucode_start_addr_hi>>2);
2872 	}
2873 	soc21_grbm_select(adev, 0, 0, 0, 0);
2874 
2875 	/* reset me pipe */
2876 	tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2877 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 1);
2878 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 1);
2879 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2880 
2881 	/* clear me pipe reset */
2882 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, 0);
2883 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, 0);
2884 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
2885 
2886 	/* config mec program start addr */
2887 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
2888 		soc21_grbm_select(adev, 1, pipe_id, 0, 0);
2889 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
2890 					mec_hdr->ucode_start_addr_lo >> 2 |
2891 					mec_hdr->ucode_start_addr_hi << 30);
2892 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
2893 					mec_hdr->ucode_start_addr_hi >> 2);
2894 	}
2895 	soc21_grbm_select(adev, 0, 0, 0, 0);
2896 
2897 	/* reset mec pipe */
2898 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
2899 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
2900 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
2901 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
2902 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
2903 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2904 
2905 	/* clear mec pipe reset */
2906 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
2907 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
2908 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
2909 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
2910 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, tmp);
2911 }
2912 
gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device * adev)2913 static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
2914 {
2915 	uint32_t cp_status;
2916 	uint32_t bootload_status;
2917 	int i, r;
2918 	uint64_t addr, addr2;
2919 
2920 	for (i = 0; i < adev->usec_timeout; i++) {
2921 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
2922 
2923 		if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
2924 			    IP_VERSION(11, 0, 1) ||
2925 		    amdgpu_ip_version(adev, GC_HWIP, 0) ==
2926 			    IP_VERSION(11, 0, 4) ||
2927 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) ||
2928 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) ||
2929 		    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2))
2930 			bootload_status = RREG32_SOC15(GC, 0,
2931 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
2932 		else
2933 			bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS);
2934 
2935 		if ((cp_status == 0) &&
2936 		    (REG_GET_FIELD(bootload_status,
2937 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
2938 			break;
2939 		}
2940 		udelay(1);
2941 	}
2942 
2943 	if (i >= adev->usec_timeout) {
2944 		dev_err(adev->dev, "rlc autoload: gc ucode autoload timeout\n");
2945 		return -ETIMEDOUT;
2946 	}
2947 
2948 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2949 		if (adev->gfx.rs64_enable) {
2950 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2951 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME].offset;
2952 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2953 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_ME_P0_STACK].offset;
2954 			r = gfx_v11_0_config_me_cache_rs64(adev, addr, addr2);
2955 			if (r)
2956 				return r;
2957 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2958 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP].offset;
2959 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2960 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_PFP_P0_STACK].offset;
2961 			r = gfx_v11_0_config_pfp_cache_rs64(adev, addr, addr2);
2962 			if (r)
2963 				return r;
2964 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2965 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC].offset;
2966 			addr2 = adev->gfx.rlc.rlc_autoload_gpu_addr +
2967 				rlc_autoload_info[SOC21_FIRMWARE_ID_RS64_MEC_P0_STACK].offset;
2968 			r = gfx_v11_0_config_mec_cache_rs64(adev, addr, addr2);
2969 			if (r)
2970 				return r;
2971 		} else {
2972 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2973 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_ME].offset;
2974 			r = gfx_v11_0_config_me_cache(adev, addr);
2975 			if (r)
2976 				return r;
2977 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2978 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_PFP].offset;
2979 			r = gfx_v11_0_config_pfp_cache(adev, addr);
2980 			if (r)
2981 				return r;
2982 			addr = adev->gfx.rlc.rlc_autoload_gpu_addr +
2983 				rlc_autoload_info[SOC21_FIRMWARE_ID_CP_MEC].offset;
2984 			r = gfx_v11_0_config_mec_cache(adev, addr);
2985 			if (r)
2986 				return r;
2987 		}
2988 	}
2989 
2990 	return 0;
2991 }
2992 
gfx_v11_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)2993 static int gfx_v11_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2994 {
2995 	int i;
2996 	u32 tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
2997 
2998 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2999 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3000 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3001 
3002 	for (i = 0; i < adev->usec_timeout; i++) {
3003 		if (RREG32_SOC15(GC, 0, regCP_STAT) == 0)
3004 			break;
3005 		udelay(1);
3006 	}
3007 
3008 	if (i >= adev->usec_timeout)
3009 		DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt");
3010 
3011 	return 0;
3012 }
3013 
gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device * adev)3014 static int gfx_v11_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
3015 {
3016 	int r;
3017 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
3018 	const __le32 *fw_data;
3019 	unsigned i, fw_size;
3020 
3021 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3022 		adev->gfx.pfp_fw->data;
3023 
3024 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3025 
3026 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3027 		le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3028 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes);
3029 
3030 	r = amdgpu_bo_create_reserved(adev, pfp_hdr->header.ucode_size_bytes,
3031 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3032 				      &adev->gfx.pfp.pfp_fw_obj,
3033 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3034 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3035 	if (r) {
3036 		dev_err(adev->dev, "(%d) failed to create pfp fw bo\n", r);
3037 		gfx_v11_0_pfp_fini(adev);
3038 		return r;
3039 	}
3040 
3041 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_data, fw_size);
3042 
3043 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3044 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3045 
3046 	gfx_v11_0_config_pfp_cache(adev, adev->gfx.pfp.pfp_fw_gpu_addr);
3047 
3048 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, 0);
3049 
3050 	for (i = 0; i < pfp_hdr->jt_size; i++)
3051 		WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_DATA,
3052 			     le32_to_cpup(fw_data + pfp_hdr->jt_offset + i));
3053 
3054 	WREG32_SOC15(GC, 0, regCP_HYP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3055 
3056 	return 0;
3057 }
3058 
gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device * adev)3059 static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
3060 {
3061 	int r;
3062 	const struct gfx_firmware_header_v2_0 *pfp_hdr;
3063 	const __le32 *fw_ucode, *fw_data;
3064 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3065 	uint32_t tmp;
3066 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3067 
3068 	pfp_hdr = (const struct gfx_firmware_header_v2_0 *)
3069 		adev->gfx.pfp_fw->data;
3070 
3071 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3072 
3073 	/* instruction */
3074 	fw_ucode = (const __le32 *)(adev->gfx.pfp_fw->data +
3075 		le32_to_cpu(pfp_hdr->ucode_offset_bytes));
3076 	fw_ucode_size = le32_to_cpu(pfp_hdr->ucode_size_bytes);
3077 	/* data */
3078 	fw_data = (const __le32 *)(adev->gfx.pfp_fw->data +
3079 		le32_to_cpu(pfp_hdr->data_offset_bytes));
3080 	fw_data_size = le32_to_cpu(pfp_hdr->data_size_bytes);
3081 
3082 	/* 64kb align */
3083 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3084 				      64 * 1024,
3085 				      AMDGPU_GEM_DOMAIN_VRAM |
3086 				      AMDGPU_GEM_DOMAIN_GTT,
3087 				      &adev->gfx.pfp.pfp_fw_obj,
3088 				      &adev->gfx.pfp.pfp_fw_gpu_addr,
3089 				      (void **)&adev->gfx.pfp.pfp_fw_ptr);
3090 	if (r) {
3091 		dev_err(adev->dev, "(%d) failed to create pfp ucode fw bo\n", r);
3092 		gfx_v11_0_pfp_fini(adev);
3093 		return r;
3094 	}
3095 
3096 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3097 				      64 * 1024,
3098 				      AMDGPU_GEM_DOMAIN_VRAM |
3099 				      AMDGPU_GEM_DOMAIN_GTT,
3100 				      &adev->gfx.pfp.pfp_fw_data_obj,
3101 				      &adev->gfx.pfp.pfp_fw_data_gpu_addr,
3102 				      (void **)&adev->gfx.pfp.pfp_fw_data_ptr);
3103 	if (r) {
3104 		dev_err(adev->dev, "(%d) failed to create pfp data fw bo\n", r);
3105 		gfx_v11_0_pfp_fini(adev);
3106 		return r;
3107 	}
3108 
3109 	memcpy(adev->gfx.pfp.pfp_fw_ptr, fw_ucode, fw_ucode_size);
3110 	memcpy(adev->gfx.pfp.pfp_fw_data_ptr, fw_data, fw_data_size);
3111 
3112 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_obj);
3113 	amdgpu_bo_kunmap(adev->gfx.pfp.pfp_fw_data_obj);
3114 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_obj);
3115 	amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
3116 
3117 	if (amdgpu_emu_mode == 1)
3118 		adev->hdp.funcs->flush_hdp(adev, NULL);
3119 
3120 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
3121 		lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3122 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_HI,
3123 		upper_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
3124 
3125 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
3126 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
3127 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, CACHE_POLICY, 0);
3128 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, EXE_DISABLE, 0);
3129 	WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL, tmp);
3130 
3131 	/*
3132 	 * Programming any of the CP_PFP_IC_BASE registers
3133 	 * forces invalidation of the ME L1 I$. Wait for the
3134 	 * invalidation complete
3135 	 */
3136 	for (i = 0; i < usec_timeout; i++) {
3137 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3138 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3139 			INVALIDATE_CACHE_COMPLETE))
3140 			break;
3141 		udelay(1);
3142 	}
3143 
3144 	if (i >= usec_timeout) {
3145 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3146 		return -EINVAL;
3147 	}
3148 
3149 	/* Prime the L1 instruction caches */
3150 	tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3151 	tmp = REG_SET_FIELD(tmp, CP_PFP_IC_OP_CNTL, PRIME_ICACHE, 1);
3152 	WREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL, tmp);
3153 	/* Waiting for cache primed*/
3154 	for (i = 0; i < usec_timeout; i++) {
3155 		tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_OP_CNTL);
3156 		if (1 == REG_GET_FIELD(tmp, CP_PFP_IC_OP_CNTL,
3157 			ICACHE_PRIMED))
3158 			break;
3159 		udelay(1);
3160 	}
3161 
3162 	if (i >= usec_timeout) {
3163 		dev_err(adev->dev, "failed to prime instruction cache\n");
3164 		return -EINVAL;
3165 	}
3166 
3167 	mutex_lock(&adev->srbm_mutex);
3168 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3169 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3170 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START,
3171 			(pfp_hdr->ucode_start_addr_hi << 30) |
3172 			(pfp_hdr->ucode_start_addr_lo >> 2) );
3173 		WREG32_SOC15(GC, 0, regCP_PFP_PRGRM_CNTR_START_HI,
3174 			pfp_hdr->ucode_start_addr_hi>>2);
3175 
3176 		/*
3177 		 * Program CP_ME_CNTL to reset given PIPE to take
3178 		 * effect of CP_PFP_PRGRM_CNTR_START.
3179 		 */
3180 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3181 		if (pipe_id == 0)
3182 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3183 					PFP_PIPE0_RESET, 1);
3184 		else
3185 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3186 					PFP_PIPE1_RESET, 1);
3187 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3188 
3189 		/* Clear pfp pipe0 reset bit. */
3190 		if (pipe_id == 0)
3191 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3192 					PFP_PIPE0_RESET, 0);
3193 		else
3194 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3195 					PFP_PIPE1_RESET, 0);
3196 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3197 
3198 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_LO,
3199 			lower_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3200 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE0_HI,
3201 			upper_32_bits(adev->gfx.pfp.pfp_fw_data_gpu_addr));
3202 	}
3203 	soc21_grbm_select(adev, 0, 0, 0, 0);
3204 	mutex_unlock(&adev->srbm_mutex);
3205 
3206 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3207 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3208 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3209 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3210 
3211 	/* Invalidate the data caches */
3212 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3213 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3214 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3215 
3216 	for (i = 0; i < usec_timeout; i++) {
3217 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3218 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3219 			INVALIDATE_DCACHE_COMPLETE))
3220 			break;
3221 		udelay(1);
3222 	}
3223 
3224 	if (i >= usec_timeout) {
3225 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3226 		return -EINVAL;
3227 	}
3228 
3229 	return 0;
3230 }
3231 
gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device * adev)3232 static int gfx_v11_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
3233 {
3234 	int r;
3235 	const struct gfx_firmware_header_v1_0 *me_hdr;
3236 	const __le32 *fw_data;
3237 	unsigned i, fw_size;
3238 
3239 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
3240 		adev->gfx.me_fw->data;
3241 
3242 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3243 
3244 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3245 		le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3246 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes);
3247 
3248 	r = amdgpu_bo_create_reserved(adev, me_hdr->header.ucode_size_bytes,
3249 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3250 				      &adev->gfx.me.me_fw_obj,
3251 				      &adev->gfx.me.me_fw_gpu_addr,
3252 				      (void **)&adev->gfx.me.me_fw_ptr);
3253 	if (r) {
3254 		dev_err(adev->dev, "(%d) failed to create me fw bo\n", r);
3255 		gfx_v11_0_me_fini(adev);
3256 		return r;
3257 	}
3258 
3259 	memcpy(adev->gfx.me.me_fw_ptr, fw_data, fw_size);
3260 
3261 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3262 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3263 
3264 	gfx_v11_0_config_me_cache(adev, adev->gfx.me.me_fw_gpu_addr);
3265 
3266 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, 0);
3267 
3268 	for (i = 0; i < me_hdr->jt_size; i++)
3269 		WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_DATA,
3270 			     le32_to_cpup(fw_data + me_hdr->jt_offset + i));
3271 
3272 	WREG32_SOC15(GC, 0, regCP_HYP_ME_UCODE_ADDR, adev->gfx.me_fw_version);
3273 
3274 	return 0;
3275 }
3276 
gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device * adev)3277 static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
3278 {
3279 	int r;
3280 	const struct gfx_firmware_header_v2_0 *me_hdr;
3281 	const __le32 *fw_ucode, *fw_data;
3282 	unsigned i, pipe_id, fw_ucode_size, fw_data_size;
3283 	uint32_t tmp;
3284 	uint32_t usec_timeout = 50000;  /* wait for 50ms */
3285 
3286 	me_hdr = (const struct gfx_firmware_header_v2_0 *)
3287 		adev->gfx.me_fw->data;
3288 
3289 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3290 
3291 	/* instruction */
3292 	fw_ucode = (const __le32 *)(adev->gfx.me_fw->data +
3293 		le32_to_cpu(me_hdr->ucode_offset_bytes));
3294 	fw_ucode_size = le32_to_cpu(me_hdr->ucode_size_bytes);
3295 	/* data */
3296 	fw_data = (const __le32 *)(adev->gfx.me_fw->data +
3297 		le32_to_cpu(me_hdr->data_offset_bytes));
3298 	fw_data_size = le32_to_cpu(me_hdr->data_size_bytes);
3299 
3300 	/* 64kb align*/
3301 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3302 				      64 * 1024,
3303 				      AMDGPU_GEM_DOMAIN_VRAM |
3304 				      AMDGPU_GEM_DOMAIN_GTT,
3305 				      &adev->gfx.me.me_fw_obj,
3306 				      &adev->gfx.me.me_fw_gpu_addr,
3307 				      (void **)&adev->gfx.me.me_fw_ptr);
3308 	if (r) {
3309 		dev_err(adev->dev, "(%d) failed to create me ucode bo\n", r);
3310 		gfx_v11_0_me_fini(adev);
3311 		return r;
3312 	}
3313 
3314 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3315 				      64 * 1024,
3316 				      AMDGPU_GEM_DOMAIN_VRAM |
3317 				      AMDGPU_GEM_DOMAIN_GTT,
3318 				      &adev->gfx.me.me_fw_data_obj,
3319 				      &adev->gfx.me.me_fw_data_gpu_addr,
3320 				      (void **)&adev->gfx.me.me_fw_data_ptr);
3321 	if (r) {
3322 		dev_err(adev->dev, "(%d) failed to create me data bo\n", r);
3323 		gfx_v11_0_pfp_fini(adev);
3324 		return r;
3325 	}
3326 
3327 	memcpy(adev->gfx.me.me_fw_ptr, fw_ucode, fw_ucode_size);
3328 	memcpy(adev->gfx.me.me_fw_data_ptr, fw_data, fw_data_size);
3329 
3330 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_obj);
3331 	amdgpu_bo_kunmap(adev->gfx.me.me_fw_data_obj);
3332 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_obj);
3333 	amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
3334 
3335 	if (amdgpu_emu_mode == 1)
3336 		adev->hdp.funcs->flush_hdp(adev, NULL);
3337 
3338 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
3339 		lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
3340 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_HI,
3341 		upper_32_bits(adev->gfx.me.me_fw_gpu_addr));
3342 
3343 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
3344 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
3345 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, CACHE_POLICY, 0);
3346 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, EXE_DISABLE, 0);
3347 	WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL, tmp);
3348 
3349 	/*
3350 	 * Programming any of the CP_ME_IC_BASE registers
3351 	 * forces invalidation of the ME L1 I$. Wait for the
3352 	 * invalidation complete
3353 	 */
3354 	for (i = 0; i < usec_timeout; i++) {
3355 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3356 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3357 			INVALIDATE_CACHE_COMPLETE))
3358 			break;
3359 		udelay(1);
3360 	}
3361 
3362 	if (i >= usec_timeout) {
3363 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3364 		return -EINVAL;
3365 	}
3366 
3367 	/* Prime the instruction caches */
3368 	tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3369 	tmp = REG_SET_FIELD(tmp, CP_ME_IC_OP_CNTL, PRIME_ICACHE, 1);
3370 	WREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL, tmp);
3371 
3372 	/* Waiting for instruction cache primed*/
3373 	for (i = 0; i < usec_timeout; i++) {
3374 		tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_OP_CNTL);
3375 		if (1 == REG_GET_FIELD(tmp, CP_ME_IC_OP_CNTL,
3376 			ICACHE_PRIMED))
3377 			break;
3378 		udelay(1);
3379 	}
3380 
3381 	if (i >= usec_timeout) {
3382 		dev_err(adev->dev, "failed to prime instruction cache\n");
3383 		return -EINVAL;
3384 	}
3385 
3386 	mutex_lock(&adev->srbm_mutex);
3387 	for (pipe_id = 0; pipe_id < adev->gfx.me.num_pipe_per_me; pipe_id++) {
3388 		soc21_grbm_select(adev, 0, pipe_id, 0, 0);
3389 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START,
3390 			(me_hdr->ucode_start_addr_hi << 30) |
3391 			(me_hdr->ucode_start_addr_lo >> 2) );
3392 		WREG32_SOC15(GC, 0, regCP_ME_PRGRM_CNTR_START_HI,
3393 			me_hdr->ucode_start_addr_hi>>2);
3394 
3395 		/*
3396 		 * Program CP_ME_CNTL to reset given PIPE to take
3397 		 * effect of CP_PFP_PRGRM_CNTR_START.
3398 		 */
3399 		tmp = RREG32_SOC15(GC, 0, regCP_ME_CNTL);
3400 		if (pipe_id == 0)
3401 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3402 					ME_PIPE0_RESET, 1);
3403 		else
3404 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3405 					ME_PIPE1_RESET, 1);
3406 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3407 
3408 		/* Clear pfp pipe0 reset bit. */
3409 		if (pipe_id == 0)
3410 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3411 					ME_PIPE0_RESET, 0);
3412 		else
3413 			tmp = REG_SET_FIELD(tmp, CP_ME_CNTL,
3414 					ME_PIPE1_RESET, 0);
3415 		WREG32_SOC15(GC, 0, regCP_ME_CNTL, tmp);
3416 
3417 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_LO,
3418 			lower_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3419 		WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE1_HI,
3420 			upper_32_bits(adev->gfx.me.me_fw_data_gpu_addr));
3421 	}
3422 	soc21_grbm_select(adev, 0, 0, 0, 0);
3423 	mutex_unlock(&adev->srbm_mutex);
3424 
3425 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL);
3426 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, VMID, 0);
3427 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_BASE_CNTL, CACHE_POLICY, 0);
3428 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_BASE_CNTL, tmp);
3429 
3430 	/* Invalidate the data caches */
3431 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3432 	tmp = REG_SET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3433 	WREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL, tmp);
3434 
3435 	for (i = 0; i < usec_timeout; i++) {
3436 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_RS64_DC_OP_CNTL);
3437 		if (1 == REG_GET_FIELD(tmp, CP_GFX_RS64_DC_OP_CNTL,
3438 			INVALIDATE_DCACHE_COMPLETE))
3439 			break;
3440 		udelay(1);
3441 	}
3442 
3443 	if (i >= usec_timeout) {
3444 		dev_err(adev->dev, "failed to invalidate RS64 data cache\n");
3445 		return -EINVAL;
3446 	}
3447 
3448 	return 0;
3449 }
3450 
gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device * adev)3451 static int gfx_v11_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3452 {
3453 	int r;
3454 
3455 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw)
3456 		return -EINVAL;
3457 
3458 	gfx_v11_0_cp_gfx_enable(adev, false);
3459 
3460 	if (adev->gfx.rs64_enable)
3461 		r = gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(adev);
3462 	else
3463 		r = gfx_v11_0_cp_gfx_load_pfp_microcode(adev);
3464 	if (r) {
3465 		dev_err(adev->dev, "(%d) failed to load pfp fw\n", r);
3466 		return r;
3467 	}
3468 
3469 	if (adev->gfx.rs64_enable)
3470 		r = gfx_v11_0_cp_gfx_load_me_microcode_rs64(adev);
3471 	else
3472 		r = gfx_v11_0_cp_gfx_load_me_microcode(adev);
3473 	if (r) {
3474 		dev_err(adev->dev, "(%d) failed to load me fw\n", r);
3475 		return r;
3476 	}
3477 
3478 	return 0;
3479 }
3480 
gfx_v11_0_cp_gfx_start(struct amdgpu_device * adev)3481 static int gfx_v11_0_cp_gfx_start(struct amdgpu_device *adev)
3482 {
3483 	struct amdgpu_ring *ring;
3484 	const struct cs_section_def *sect = NULL;
3485 	const struct cs_extent_def *ext = NULL;
3486 	int r, i;
3487 	int ctx_reg_offset;
3488 
3489 	/* init the CP */
3490 	WREG32_SOC15(GC, 0, regCP_MAX_CONTEXT,
3491 		     adev->gfx.config.max_hw_contexts - 1);
3492 	WREG32_SOC15(GC, 0, regCP_DEVICE_ID, 1);
3493 
3494 	if (!amdgpu_async_gfx_ring)
3495 		gfx_v11_0_cp_gfx_enable(adev, true);
3496 
3497 	ring = &adev->gfx.gfx_ring[0];
3498 	r = amdgpu_ring_alloc(ring, gfx_v11_0_get_csb_size(adev));
3499 	if (r) {
3500 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3501 		return r;
3502 	}
3503 
3504 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3505 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3506 
3507 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3508 	amdgpu_ring_write(ring, 0x80000000);
3509 	amdgpu_ring_write(ring, 0x80000000);
3510 
3511 	for (sect = gfx11_cs_data; sect->section != NULL; ++sect) {
3512 		for (ext = sect->section; ext->extent != NULL; ++ext) {
3513 			if (sect->id == SECT_CONTEXT) {
3514 				amdgpu_ring_write(ring,
3515 						  PACKET3(PACKET3_SET_CONTEXT_REG,
3516 							  ext->reg_count));
3517 				amdgpu_ring_write(ring, ext->reg_index -
3518 						  PACKET3_SET_CONTEXT_REG_START);
3519 				for (i = 0; i < ext->reg_count; i++)
3520 					amdgpu_ring_write(ring, ext->extent[i]);
3521 			}
3522 		}
3523 	}
3524 
3525 	ctx_reg_offset =
3526 		SOC15_REG_OFFSET(GC, 0, regPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
3527 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
3528 	amdgpu_ring_write(ring, ctx_reg_offset);
3529 	amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
3530 
3531 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3532 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3533 
3534 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3535 	amdgpu_ring_write(ring, 0);
3536 
3537 	amdgpu_ring_commit(ring);
3538 
3539 	/* submit cs packet to copy state 0 to next available state */
3540 	if (adev->gfx.num_gfx_rings > 1) {
3541 		/* maximum supported gfx ring is 2 */
3542 		ring = &adev->gfx.gfx_ring[1];
3543 		r = amdgpu_ring_alloc(ring, 2);
3544 		if (r) {
3545 			DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3546 			return r;
3547 		}
3548 
3549 		amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3550 		amdgpu_ring_write(ring, 0);
3551 
3552 		amdgpu_ring_commit(ring);
3553 	}
3554 	return 0;
3555 }
3556 
gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device * adev,CP_PIPE_ID pipe)3557 static void gfx_v11_0_cp_gfx_switch_pipe(struct amdgpu_device *adev,
3558 					 CP_PIPE_ID pipe)
3559 {
3560 	u32 tmp;
3561 
3562 	tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
3563 	tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, pipe);
3564 
3565 	WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
3566 }
3567 
gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device * adev,struct amdgpu_ring * ring)3568 static void gfx_v11_0_cp_gfx_set_doorbell(struct amdgpu_device *adev,
3569 					  struct amdgpu_ring *ring)
3570 {
3571 	u32 tmp;
3572 
3573 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
3574 	if (ring->use_doorbell) {
3575 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3576 				    DOORBELL_OFFSET, ring->doorbell_index);
3577 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3578 				    DOORBELL_EN, 1);
3579 	} else {
3580 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3581 				    DOORBELL_EN, 0);
3582 	}
3583 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL, tmp);
3584 
3585 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3586 			    DOORBELL_RANGE_LOWER, ring->doorbell_index);
3587 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER, tmp);
3588 
3589 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3590 		     CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3591 }
3592 
gfx_v11_0_cp_gfx_resume(struct amdgpu_device * adev)3593 static int gfx_v11_0_cp_gfx_resume(struct amdgpu_device *adev)
3594 {
3595 	struct amdgpu_ring *ring;
3596 	u32 tmp;
3597 	u32 rb_bufsz;
3598 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
3599 
3600 	/* Set the write pointer delay */
3601 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
3602 
3603 	/* set the RB to use vmid 0 */
3604 	WREG32_SOC15(GC, 0, regCP_RB_VMID, 0);
3605 
3606 	/* Init gfx ring 0 for pipe 0 */
3607 	mutex_lock(&adev->srbm_mutex);
3608 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3609 
3610 	/* Set ring buffer size */
3611 	ring = &adev->gfx.gfx_ring[0];
3612 	rb_bufsz = order_base_2(ring->ring_size / 8);
3613 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3614 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3615 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3616 
3617 	/* Initialize the ring buffer's write pointers */
3618 	ring->wptr = 0;
3619 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
3620 	WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3621 
3622 	/* set the wb address whether it's enabled or not */
3623 	rptr_addr = ring->rptr_gpu_addr;
3624 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3625 	WREG32_SOC15(GC, 0, regCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3626 		     CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3627 
3628 	wptr_gpu_addr = ring->wptr_gpu_addr;
3629 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3630 		     lower_32_bits(wptr_gpu_addr));
3631 	WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3632 		     upper_32_bits(wptr_gpu_addr));
3633 
3634 	mdelay(1);
3635 	WREG32_SOC15(GC, 0, regCP_RB0_CNTL, tmp);
3636 
3637 	rb_addr = ring->gpu_addr >> 8;
3638 	WREG32_SOC15(GC, 0, regCP_RB0_BASE, rb_addr);
3639 	WREG32_SOC15(GC, 0, regCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3640 
3641 	WREG32_SOC15(GC, 0, regCP_RB_ACTIVE, 1);
3642 
3643 	gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3644 	mutex_unlock(&adev->srbm_mutex);
3645 
3646 	/* Init gfx ring 1 for pipe 1 */
3647 	if (adev->gfx.num_gfx_rings > 1) {
3648 		mutex_lock(&adev->srbm_mutex);
3649 		gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
3650 		/* maximum supported gfx ring is 2 */
3651 		ring = &adev->gfx.gfx_ring[1];
3652 		rb_bufsz = order_base_2(ring->ring_size / 8);
3653 		tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
3654 		tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
3655 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3656 		/* Initialize the ring buffer's write pointers */
3657 		ring->wptr = 0;
3658 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR, lower_32_bits(ring->wptr));
3659 		WREG32_SOC15(GC, 0, regCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
3660 		/* Set the wb address whether it's enabled or not */
3661 		rptr_addr = ring->rptr_gpu_addr;
3662 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
3663 		WREG32_SOC15(GC, 0, regCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
3664 			     CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3665 		wptr_gpu_addr = ring->wptr_gpu_addr;
3666 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO,
3667 			     lower_32_bits(wptr_gpu_addr));
3668 		WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI,
3669 			     upper_32_bits(wptr_gpu_addr));
3670 
3671 		mdelay(1);
3672 		WREG32_SOC15(GC, 0, regCP_RB1_CNTL, tmp);
3673 
3674 		rb_addr = ring->gpu_addr >> 8;
3675 		WREG32_SOC15(GC, 0, regCP_RB1_BASE, rb_addr);
3676 		WREG32_SOC15(GC, 0, regCP_RB1_BASE_HI, upper_32_bits(rb_addr));
3677 		WREG32_SOC15(GC, 0, regCP_RB1_ACTIVE, 1);
3678 
3679 		gfx_v11_0_cp_gfx_set_doorbell(adev, ring);
3680 		mutex_unlock(&adev->srbm_mutex);
3681 	}
3682 	/* Switch to pipe 0 */
3683 	mutex_lock(&adev->srbm_mutex);
3684 	gfx_v11_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
3685 	mutex_unlock(&adev->srbm_mutex);
3686 
3687 	/* start the ring */
3688 	gfx_v11_0_cp_gfx_start(adev);
3689 
3690 	return 0;
3691 }
3692 
gfx_v11_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3693 static void gfx_v11_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3694 {
3695 	u32 data;
3696 
3697 	if (adev->gfx.rs64_enable) {
3698 		data = RREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL);
3699 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
3700 							 enable ? 0 : 1);
3701 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
3702 							 enable ? 0 : 1);
3703 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
3704 							 enable ? 0 : 1);
3705 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
3706 							 enable ? 0 : 1);
3707 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
3708 							 enable ? 0 : 1);
3709 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
3710 							 enable ? 1 : 0);
3711 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
3712 				                         enable ? 1 : 0);
3713 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
3714 							 enable ? 1 : 0);
3715 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
3716 							 enable ? 1 : 0);
3717 		data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
3718 							 enable ? 0 : 1);
3719 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, data);
3720 	} else {
3721 		data = RREG32_SOC15(GC, 0, regCP_MEC_CNTL);
3722 
3723 		if (enable) {
3724 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 0);
3725 			if (!adev->enable_mes_kiq)
3726 				data = REG_SET_FIELD(data, CP_MEC_CNTL,
3727 						     MEC_ME2_HALT, 0);
3728 		} else {
3729 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME1_HALT, 1);
3730 			data = REG_SET_FIELD(data, CP_MEC_CNTL, MEC_ME2_HALT, 1);
3731 		}
3732 		WREG32_SOC15(GC, 0, regCP_MEC_CNTL, data);
3733 	}
3734 
3735 	udelay(50);
3736 }
3737 
gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device * adev)3738 static int gfx_v11_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3739 {
3740 	const struct gfx_firmware_header_v1_0 *mec_hdr;
3741 	const __le32 *fw_data;
3742 	unsigned i, fw_size;
3743 	u32 *fw = NULL;
3744 	int r;
3745 
3746 	if (!adev->gfx.mec_fw)
3747 		return -EINVAL;
3748 
3749 	gfx_v11_0_cp_compute_enable(adev, false);
3750 
3751 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3752 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3753 
3754 	fw_data = (const __le32 *)
3755 		(adev->gfx.mec_fw->data +
3756 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3757 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
3758 
3759 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
3760 					  PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
3761 					  &adev->gfx.mec.mec_fw_obj,
3762 					  &adev->gfx.mec.mec_fw_gpu_addr,
3763 					  (void **)&fw);
3764 	if (r) {
3765 		dev_err(adev->dev, "(%d) failed to create mec fw bo\n", r);
3766 		gfx_v11_0_mec_fini(adev);
3767 		return r;
3768 	}
3769 
3770 	memcpy(fw, fw_data, fw_size);
3771 
3772 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3773 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3774 
3775 	gfx_v11_0_config_mec_cache(adev, adev->gfx.mec.mec_fw_gpu_addr);
3776 
3777 	/* MEC1 */
3778 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, 0);
3779 
3780 	for (i = 0; i < mec_hdr->jt_size; i++)
3781 		WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_DATA,
3782 			     le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3783 
3784 	WREG32_SOC15(GC, 0, regCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3785 
3786 	return 0;
3787 }
3788 
gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device * adev)3789 static int gfx_v11_0_cp_compute_load_microcode_rs64(struct amdgpu_device *adev)
3790 {
3791 	const struct gfx_firmware_header_v2_0 *mec_hdr;
3792 	const __le32 *fw_ucode, *fw_data;
3793 	u32 tmp, fw_ucode_size, fw_data_size;
3794 	u32 i, usec_timeout = 50000; /* Wait for 50 ms */
3795 	u32 *fw_ucode_ptr, *fw_data_ptr;
3796 	int r;
3797 
3798 	if (!adev->gfx.mec_fw)
3799 		return -EINVAL;
3800 
3801 	gfx_v11_0_cp_compute_enable(adev, false);
3802 
3803 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
3804 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3805 
3806 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
3807 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
3808 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
3809 
3810 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
3811 				le32_to_cpu(mec_hdr->data_offset_bytes));
3812 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
3813 
3814 	r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
3815 				      64 * 1024,
3816 				      AMDGPU_GEM_DOMAIN_VRAM |
3817 				      AMDGPU_GEM_DOMAIN_GTT,
3818 				      &adev->gfx.mec.mec_fw_obj,
3819 				      &adev->gfx.mec.mec_fw_gpu_addr,
3820 				      (void **)&fw_ucode_ptr);
3821 	if (r) {
3822 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3823 		gfx_v11_0_mec_fini(adev);
3824 		return r;
3825 	}
3826 
3827 	r = amdgpu_bo_create_reserved(adev, fw_data_size,
3828 				      64 * 1024,
3829 				      AMDGPU_GEM_DOMAIN_VRAM |
3830 				      AMDGPU_GEM_DOMAIN_GTT,
3831 				      &adev->gfx.mec.mec_fw_data_obj,
3832 				      &adev->gfx.mec.mec_fw_data_gpu_addr,
3833 				      (void **)&fw_data_ptr);
3834 	if (r) {
3835 		dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
3836 		gfx_v11_0_mec_fini(adev);
3837 		return r;
3838 	}
3839 
3840 	memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
3841 	memcpy(fw_data_ptr, fw_data, fw_data_size);
3842 
3843 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
3844 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
3845 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
3846 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
3847 
3848 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
3849 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3850 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
3851 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3852 	WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL, tmp);
3853 
3854 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL);
3855 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
3856 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
3857 	WREG32_SOC15(GC, 0, regCP_MEC_DC_BASE_CNTL, tmp);
3858 
3859 	mutex_lock(&adev->srbm_mutex);
3860 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
3861 		soc21_grbm_select(adev, 1, i, 0, 0);
3862 
3863 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_LO, adev->gfx.mec.mec_fw_data_gpu_addr);
3864 		WREG32_SOC15(GC, 0, regCP_MEC_MDBASE_HI,
3865 		     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr));
3866 
3867 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START,
3868 					mec_hdr->ucode_start_addr_lo >> 2 |
3869 					mec_hdr->ucode_start_addr_hi << 30);
3870 		WREG32_SOC15(GC, 0, regCP_MEC_RS64_PRGRM_CNTR_START_HI,
3871 					mec_hdr->ucode_start_addr_hi >> 2);
3872 
3873 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_LO, adev->gfx.mec.mec_fw_gpu_addr);
3874 		WREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_HI,
3875 		     upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3876 	}
3877 	mutex_unlock(&adev->srbm_mutex);
3878 	soc21_grbm_select(adev, 0, 0, 0, 0);
3879 
3880 	/* Trigger an invalidation of the L1 instruction caches */
3881 	tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3882 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
3883 	WREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL, tmp);
3884 
3885 	/* Wait for invalidation complete */
3886 	for (i = 0; i < usec_timeout; i++) {
3887 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_DC_OP_CNTL);
3888 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
3889 				       INVALIDATE_DCACHE_COMPLETE))
3890 			break;
3891 		udelay(1);
3892 	}
3893 
3894 	if (i >= usec_timeout) {
3895 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3896 		return -EINVAL;
3897 	}
3898 
3899 	/* Trigger an invalidation of the L1 instruction caches */
3900 	tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3901 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
3902 	WREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL, tmp);
3903 
3904 	/* Wait for invalidation complete */
3905 	for (i = 0; i < usec_timeout; i++) {
3906 		tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_OP_CNTL);
3907 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
3908 				       INVALIDATE_CACHE_COMPLETE))
3909 			break;
3910 		udelay(1);
3911 	}
3912 
3913 	if (i >= usec_timeout) {
3914 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
3915 		return -EINVAL;
3916 	}
3917 
3918 	return 0;
3919 }
3920 
gfx_v11_0_kiq_setting(struct amdgpu_ring * ring)3921 static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring)
3922 {
3923 	uint32_t tmp;
3924 	struct amdgpu_device *adev = ring->adev;
3925 
3926 	/* tell RLC which is KIQ queue */
3927 	tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
3928 	tmp &= 0xffffff00;
3929 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3930 	WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80);
3931 }
3932 
gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device * adev)3933 static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
3934 {
3935 	/* set graphics engine doorbell range */
3936 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_LOWER,
3937 		     (adev->doorbell_index.gfx_ring0 * 2) << 2);
3938 	WREG32_SOC15(GC, 0, regCP_RB_DOORBELL_RANGE_UPPER,
3939 		     (adev->doorbell_index.gfx_userqueue_end * 2) << 2);
3940 
3941 	/* set compute engine doorbell range */
3942 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
3943 		     (adev->doorbell_index.kiq * 2) << 2);
3944 	WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
3945 		     (adev->doorbell_index.userqueue_end * 2) << 2);
3946 }
3947 
gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device * adev,struct v11_gfx_mqd * mqd,struct amdgpu_mqd_prop * prop)3948 static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
3949 					   struct v11_gfx_mqd *mqd,
3950 					   struct amdgpu_mqd_prop *prop)
3951 {
3952 	bool priority = 0;
3953 	u32 tmp;
3954 
3955 	/* set up default queue priority level
3956 	 * 0x0 = low priority, 0x1 = high priority
3957 	 */
3958 	if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
3959 		priority = 1;
3960 
3961 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
3962 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
3963 	mqd->cp_gfx_hqd_queue_priority = tmp;
3964 }
3965 
gfx_v11_0_gfx_mqd_init(struct amdgpu_device * adev,void * m,struct amdgpu_mqd_prop * prop)3966 static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
3967 				  struct amdgpu_mqd_prop *prop)
3968 {
3969 	struct v11_gfx_mqd *mqd = m;
3970 	uint64_t hqd_gpu_addr, wb_gpu_addr;
3971 	uint32_t tmp;
3972 	uint32_t rb_bufsz;
3973 
3974 	/* set up gfx hqd wptr */
3975 	mqd->cp_gfx_hqd_wptr = 0;
3976 	mqd->cp_gfx_hqd_wptr_hi = 0;
3977 
3978 	/* set the pointer to the MQD */
3979 	mqd->cp_mqd_base_addr = prop->mqd_gpu_addr & 0xfffffffc;
3980 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
3981 
3982 	/* set up mqd control */
3983 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL);
3984 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0);
3985 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1);
3986 	tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0);
3987 	mqd->cp_gfx_mqd_control = tmp;
3988 
3989 	/* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */
3990 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID);
3991 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
3992 	mqd->cp_gfx_hqd_vmid = 0;
3993 
3994 	/* set up gfx queue priority */
3995 	gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
3996 
3997 	/* set up time quantum */
3998 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
3999 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1);
4000 	mqd->cp_gfx_hqd_quantum = tmp;
4001 
4002 	/* set up gfx hqd base. this is similar as CP_RB_BASE */
4003 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4004 	mqd->cp_gfx_hqd_base = hqd_gpu_addr;
4005 	mqd->cp_gfx_hqd_base_hi = upper_32_bits(hqd_gpu_addr);
4006 
4007 	/* set up hqd_rptr_addr/_hi, similar as CP_RB_RPTR */
4008 	wb_gpu_addr = prop->rptr_gpu_addr;
4009 	mqd->cp_gfx_hqd_rptr_addr = wb_gpu_addr & 0xfffffffc;
4010 	mqd->cp_gfx_hqd_rptr_addr_hi =
4011 		upper_32_bits(wb_gpu_addr) & 0xffff;
4012 
4013 	/* set up rb_wptr_poll addr */
4014 	wb_gpu_addr = prop->wptr_gpu_addr;
4015 	mqd->cp_rb_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4016 	mqd->cp_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4017 
4018 	/* set up the gfx_hqd_control, similar as CP_RB0_CNTL */
4019 	rb_bufsz = order_base_2(prop->queue_size / 4) - 1;
4020 	tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL);
4021 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz);
4022 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2);
4023 #ifdef __BIG_ENDIAN
4024 	tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, BUF_SWAP, 1);
4025 #endif
4026 	mqd->cp_gfx_hqd_cntl = tmp;
4027 
4028 	/* set up cp_doorbell_control */
4029 	tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL);
4030 	if (prop->use_doorbell) {
4031 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4032 				    DOORBELL_OFFSET, prop->doorbell_index);
4033 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4034 				    DOORBELL_EN, 1);
4035 	} else
4036 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4037 				    DOORBELL_EN, 0);
4038 	mqd->cp_rb_doorbell_control = tmp;
4039 
4040 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4041 	mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR);
4042 
4043 	/* active the queue */
4044 	mqd->cp_gfx_hqd_active = 1;
4045 
4046 	return 0;
4047 }
4048 
gfx_v11_0_kgq_init_queue(struct amdgpu_ring * ring,bool reset)4049 static int gfx_v11_0_kgq_init_queue(struct amdgpu_ring *ring, bool reset)
4050 {
4051 	struct amdgpu_device *adev = ring->adev;
4052 	struct v11_gfx_mqd *mqd = ring->mqd_ptr;
4053 	int mqd_idx = ring - &adev->gfx.gfx_ring[0];
4054 
4055 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4056 		memset((void *)mqd, 0, sizeof(*mqd));
4057 		mutex_lock(&adev->srbm_mutex);
4058 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4059 		amdgpu_ring_init_mqd(ring);
4060 		soc21_grbm_select(adev, 0, 0, 0, 0);
4061 		mutex_unlock(&adev->srbm_mutex);
4062 		if (adev->gfx.me.mqd_backup[mqd_idx])
4063 			memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4064 	} else {
4065 		/* restore mqd with the backup copy */
4066 		if (adev->gfx.me.mqd_backup[mqd_idx])
4067 			memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
4068 		/* reset the ring */
4069 		ring->wptr = 0;
4070 		*ring->wptr_cpu_addr = 0;
4071 		amdgpu_ring_clear_ring(ring);
4072 	}
4073 
4074 	return 0;
4075 }
4076 
gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device * adev)4077 static int gfx_v11_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
4078 {
4079 	int r, i;
4080 	struct amdgpu_ring *ring;
4081 
4082 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4083 		ring = &adev->gfx.gfx_ring[i];
4084 
4085 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4086 		if (unlikely(r != 0))
4087 			return r;
4088 
4089 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4090 		if (!r) {
4091 			r = gfx_v11_0_kgq_init_queue(ring, false);
4092 			amdgpu_bo_kunmap(ring->mqd_obj);
4093 			ring->mqd_ptr = NULL;
4094 		}
4095 		amdgpu_bo_unreserve(ring->mqd_obj);
4096 		if (r)
4097 			return r;
4098 	}
4099 
4100 	r = amdgpu_gfx_enable_kgq(adev, 0);
4101 	if (r)
4102 		return r;
4103 
4104 	return gfx_v11_0_cp_gfx_start(adev);
4105 }
4106 
gfx_v11_0_compute_mqd_init(struct amdgpu_device * adev,void * m,struct amdgpu_mqd_prop * prop)4107 static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,
4108 				      struct amdgpu_mqd_prop *prop)
4109 {
4110 	struct v11_compute_mqd *mqd = m;
4111 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4112 	uint32_t tmp;
4113 
4114 	mqd->header = 0xC0310800;
4115 	mqd->compute_pipelinestat_enable = 0x00000001;
4116 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4117 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4118 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4119 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4120 	mqd->compute_misc_reserved = 0x00000007;
4121 
4122 	eop_base_addr = prop->eop_gpu_addr >> 8;
4123 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4124 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4125 
4126 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4127 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL);
4128 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4129 			(order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1));
4130 
4131 	mqd->cp_hqd_eop_control = tmp;
4132 
4133 	/* enable doorbell? */
4134 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
4135 
4136 	if (prop->use_doorbell) {
4137 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4138 				    DOORBELL_OFFSET, prop->doorbell_index);
4139 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4140 				    DOORBELL_EN, 1);
4141 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4142 				    DOORBELL_SOURCE, 0);
4143 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4144 				    DOORBELL_HIT, 0);
4145 	} else {
4146 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4147 				    DOORBELL_EN, 0);
4148 	}
4149 
4150 	mqd->cp_hqd_pq_doorbell_control = tmp;
4151 
4152 	/* disable the queue if it's active */
4153 	mqd->cp_hqd_dequeue_request = 0;
4154 	mqd->cp_hqd_pq_rptr = 0;
4155 	mqd->cp_hqd_pq_wptr_lo = 0;
4156 	mqd->cp_hqd_pq_wptr_hi = 0;
4157 
4158 	/* set the pointer to the MQD */
4159 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
4160 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
4161 
4162 	/* set MQD vmid to 0 */
4163 	tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL);
4164 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4165 	mqd->cp_mqd_control = tmp;
4166 
4167 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4168 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
4169 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4170 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4171 
4172 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4173 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL);
4174 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4175 			    (order_base_2(prop->queue_size / 4) - 1));
4176 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4177 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
4178 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);
4179 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,
4180 			    prop->allow_tunneling);
4181 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4182 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4183 	mqd->cp_hqd_pq_control = tmp;
4184 
4185 	/* set the wb address whether it's enabled or not */
4186 	wb_gpu_addr = prop->rptr_gpu_addr;
4187 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4188 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4189 		upper_32_bits(wb_gpu_addr) & 0xffff;
4190 
4191 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4192 	wb_gpu_addr = prop->wptr_gpu_addr;
4193 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4194 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4195 
4196 	tmp = 0;
4197 	/* enable the doorbell if requested */
4198 	if (prop->use_doorbell) {
4199 		tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
4200 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4201 				DOORBELL_OFFSET, prop->doorbell_index);
4202 
4203 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4204 				    DOORBELL_EN, 1);
4205 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4206 				    DOORBELL_SOURCE, 0);
4207 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4208 				    DOORBELL_HIT, 0);
4209 	}
4210 
4211 	mqd->cp_hqd_pq_doorbell_control = tmp;
4212 
4213 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4214 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR);
4215 
4216 	/* set the vmid for the queue */
4217 	mqd->cp_hqd_vmid = 0;
4218 
4219 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE);
4220 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55);
4221 	mqd->cp_hqd_persistent_state = tmp;
4222 
4223 	/* set MIN_IB_AVAIL_SIZE */
4224 	tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL);
4225 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4226 	mqd->cp_hqd_ib_control = tmp;
4227 
4228 	/* set static priority for a compute queue/ring */
4229 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
4230 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
4231 
4232 	mqd->cp_hqd_active = prop->hqd_active;
4233 
4234 	return 0;
4235 }
4236 
gfx_v11_0_kiq_init_register(struct amdgpu_ring * ring)4237 static int gfx_v11_0_kiq_init_register(struct amdgpu_ring *ring)
4238 {
4239 	struct amdgpu_device *adev = ring->adev;
4240 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4241 	int j;
4242 
4243 	/* inactivate the queue */
4244 	if (amdgpu_sriov_vf(adev))
4245 		WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE, 0);
4246 
4247 	/* disable wptr polling */
4248 	WREG32_FIELD15_PREREG(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4249 
4250 	/* write the EOP addr */
4251 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR,
4252 	       mqd->cp_hqd_eop_base_addr_lo);
4253 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI,
4254 	       mqd->cp_hqd_eop_base_addr_hi);
4255 
4256 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4257 	WREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL,
4258 	       mqd->cp_hqd_eop_control);
4259 
4260 	/* enable doorbell? */
4261 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4262 	       mqd->cp_hqd_pq_doorbell_control);
4263 
4264 	/* disable the queue if it's active */
4265 	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
4266 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
4267 		for (j = 0; j < adev->usec_timeout; j++) {
4268 			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
4269 				break;
4270 			udelay(1);
4271 		}
4272 		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST,
4273 		       mqd->cp_hqd_dequeue_request);
4274 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR,
4275 		       mqd->cp_hqd_pq_rptr);
4276 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4277 		       mqd->cp_hqd_pq_wptr_lo);
4278 		WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4279 		       mqd->cp_hqd_pq_wptr_hi);
4280 	}
4281 
4282 	/* set the pointer to the MQD */
4283 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR,
4284 	       mqd->cp_mqd_base_addr_lo);
4285 	WREG32_SOC15(GC, 0, regCP_MQD_BASE_ADDR_HI,
4286 	       mqd->cp_mqd_base_addr_hi);
4287 
4288 	/* set MQD vmid to 0 */
4289 	WREG32_SOC15(GC, 0, regCP_MQD_CONTROL,
4290 	       mqd->cp_mqd_control);
4291 
4292 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4293 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE,
4294 	       mqd->cp_hqd_pq_base_lo);
4295 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_BASE_HI,
4296 	       mqd->cp_hqd_pq_base_hi);
4297 
4298 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4299 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL,
4300 	       mqd->cp_hqd_pq_control);
4301 
4302 	/* set the wb address whether it's enabled or not */
4303 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR,
4304 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4305 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4306 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4307 
4308 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4309 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR,
4310 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
4311 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4312 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
4313 
4314 	/* enable the doorbell if requested */
4315 	if (ring->use_doorbell) {
4316 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_LOWER,
4317 			(adev->doorbell_index.kiq * 2) << 2);
4318 		WREG32_SOC15(GC, 0, regCP_MEC_DOORBELL_RANGE_UPPER,
4319 			(adev->doorbell_index.userqueue_end * 2) << 2);
4320 	}
4321 
4322 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL,
4323 	       mqd->cp_hqd_pq_doorbell_control);
4324 
4325 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4326 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO,
4327 	       mqd->cp_hqd_pq_wptr_lo);
4328 	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI,
4329 	       mqd->cp_hqd_pq_wptr_hi);
4330 
4331 	/* set the vmid for the queue */
4332 	WREG32_SOC15(GC, 0, regCP_HQD_VMID, mqd->cp_hqd_vmid);
4333 
4334 	WREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE,
4335 	       mqd->cp_hqd_persistent_state);
4336 
4337 	/* activate the queue */
4338 	WREG32_SOC15(GC, 0, regCP_HQD_ACTIVE,
4339 	       mqd->cp_hqd_active);
4340 
4341 	if (ring->use_doorbell)
4342 		WREG32_FIELD15_PREREG(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4343 
4344 	return 0;
4345 }
4346 
gfx_v11_0_kiq_init_queue(struct amdgpu_ring * ring)4347 static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
4348 {
4349 	struct amdgpu_device *adev = ring->adev;
4350 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4351 
4352 	gfx_v11_0_kiq_setting(ring);
4353 
4354 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
4355 		/* reset MQD to a clean status */
4356 		if (adev->gfx.kiq[0].mqd_backup)
4357 			memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd));
4358 
4359 		/* reset ring buffer */
4360 		ring->wptr = 0;
4361 		amdgpu_ring_clear_ring(ring);
4362 
4363 		mutex_lock(&adev->srbm_mutex);
4364 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4365 		gfx_v11_0_kiq_init_register(ring);
4366 		soc21_grbm_select(adev, 0, 0, 0, 0);
4367 		mutex_unlock(&adev->srbm_mutex);
4368 	} else {
4369 		memset((void *)mqd, 0, sizeof(*mqd));
4370 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
4371 			amdgpu_ring_clear_ring(ring);
4372 		mutex_lock(&adev->srbm_mutex);
4373 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4374 		amdgpu_ring_init_mqd(ring);
4375 		gfx_v11_0_kiq_init_register(ring);
4376 		soc21_grbm_select(adev, 0, 0, 0, 0);
4377 		mutex_unlock(&adev->srbm_mutex);
4378 
4379 		if (adev->gfx.kiq[0].mqd_backup)
4380 			memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd));
4381 	}
4382 
4383 	return 0;
4384 }
4385 
gfx_v11_0_kcq_init_queue(struct amdgpu_ring * ring,bool reset)4386 static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring, bool reset)
4387 {
4388 	struct amdgpu_device *adev = ring->adev;
4389 	struct v11_compute_mqd *mqd = ring->mqd_ptr;
4390 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4391 
4392 	if (!reset && !amdgpu_in_reset(adev) && !adev->in_suspend) {
4393 		memset((void *)mqd, 0, sizeof(*mqd));
4394 		mutex_lock(&adev->srbm_mutex);
4395 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4396 		amdgpu_ring_init_mqd(ring);
4397 		soc21_grbm_select(adev, 0, 0, 0, 0);
4398 		mutex_unlock(&adev->srbm_mutex);
4399 
4400 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4401 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
4402 	} else {
4403 		/* restore MQD to a clean status */
4404 		if (adev->gfx.mec.mqd_backup[mqd_idx])
4405 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
4406 		/* reset ring buffer */
4407 		ring->wptr = 0;
4408 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
4409 		amdgpu_ring_clear_ring(ring);
4410 	}
4411 
4412 	return 0;
4413 }
4414 
gfx_v11_0_kiq_resume(struct amdgpu_device * adev)4415 static int gfx_v11_0_kiq_resume(struct amdgpu_device *adev)
4416 {
4417 	struct amdgpu_ring *ring;
4418 	int r;
4419 
4420 	ring = &adev->gfx.kiq[0].ring;
4421 
4422 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
4423 	if (unlikely(r != 0))
4424 		return r;
4425 
4426 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4427 	if (unlikely(r != 0)) {
4428 		amdgpu_bo_unreserve(ring->mqd_obj);
4429 		return r;
4430 	}
4431 
4432 	gfx_v11_0_kiq_init_queue(ring);
4433 	amdgpu_bo_kunmap(ring->mqd_obj);
4434 	ring->mqd_ptr = NULL;
4435 	amdgpu_bo_unreserve(ring->mqd_obj);
4436 	ring->sched.ready = true;
4437 	return 0;
4438 }
4439 
gfx_v11_0_kcq_resume(struct amdgpu_device * adev)4440 static int gfx_v11_0_kcq_resume(struct amdgpu_device *adev)
4441 {
4442 	struct amdgpu_ring *ring = NULL;
4443 	int r = 0, i;
4444 
4445 	if (!amdgpu_async_gfx_ring)
4446 		gfx_v11_0_cp_compute_enable(adev, true);
4447 
4448 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4449 		ring = &adev->gfx.compute_ring[i];
4450 
4451 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
4452 		if (unlikely(r != 0))
4453 			goto done;
4454 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
4455 		if (!r) {
4456 			r = gfx_v11_0_kcq_init_queue(ring, false);
4457 			amdgpu_bo_kunmap(ring->mqd_obj);
4458 			ring->mqd_ptr = NULL;
4459 		}
4460 		amdgpu_bo_unreserve(ring->mqd_obj);
4461 		if (r)
4462 			goto done;
4463 	}
4464 
4465 	r = amdgpu_gfx_enable_kcq(adev, 0);
4466 done:
4467 	return r;
4468 }
4469 
gfx_v11_0_cp_resume(struct amdgpu_device * adev)4470 static int gfx_v11_0_cp_resume(struct amdgpu_device *adev)
4471 {
4472 	int r, i;
4473 	struct amdgpu_ring *ring;
4474 
4475 	if (!(adev->flags & AMD_IS_APU))
4476 		gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4477 
4478 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4479 		/* legacy firmware loading */
4480 		r = gfx_v11_0_cp_gfx_load_microcode(adev);
4481 		if (r)
4482 			return r;
4483 
4484 		if (adev->gfx.rs64_enable)
4485 			r = gfx_v11_0_cp_compute_load_microcode_rs64(adev);
4486 		else
4487 			r = gfx_v11_0_cp_compute_load_microcode(adev);
4488 		if (r)
4489 			return r;
4490 	}
4491 
4492 	gfx_v11_0_cp_set_doorbell_range(adev);
4493 
4494 	if (amdgpu_async_gfx_ring) {
4495 		gfx_v11_0_cp_compute_enable(adev, true);
4496 		gfx_v11_0_cp_gfx_enable(adev, true);
4497 	}
4498 
4499 	if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
4500 		r = amdgpu_mes_kiq_hw_init(adev);
4501 	else
4502 		r = gfx_v11_0_kiq_resume(adev);
4503 	if (r)
4504 		return r;
4505 
4506 	r = gfx_v11_0_kcq_resume(adev);
4507 	if (r)
4508 		return r;
4509 
4510 	if (!amdgpu_async_gfx_ring) {
4511 		r = gfx_v11_0_cp_gfx_resume(adev);
4512 		if (r)
4513 			return r;
4514 	} else {
4515 		r = gfx_v11_0_cp_async_gfx_ring_resume(adev);
4516 		if (r)
4517 			return r;
4518 	}
4519 
4520 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4521 		ring = &adev->gfx.gfx_ring[i];
4522 		r = amdgpu_ring_test_helper(ring);
4523 		if (r)
4524 			return r;
4525 	}
4526 
4527 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4528 		ring = &adev->gfx.compute_ring[i];
4529 		r = amdgpu_ring_test_helper(ring);
4530 		if (r)
4531 			return r;
4532 	}
4533 
4534 	return 0;
4535 }
4536 
gfx_v11_0_cp_enable(struct amdgpu_device * adev,bool enable)4537 static void gfx_v11_0_cp_enable(struct amdgpu_device *adev, bool enable)
4538 {
4539 	gfx_v11_0_cp_gfx_enable(adev, enable);
4540 	gfx_v11_0_cp_compute_enable(adev, enable);
4541 }
4542 
gfx_v11_0_gfxhub_enable(struct amdgpu_device * adev)4543 static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
4544 {
4545 	int r;
4546 	bool value;
4547 
4548 	r = adev->gfxhub.funcs->gart_enable(adev);
4549 	if (r)
4550 		return r;
4551 
4552 	adev->hdp.funcs->flush_hdp(adev, NULL);
4553 
4554 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
4555 		false : true;
4556 
4557 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
4558 	/* TODO investigate why this and the hdp flush above is needed,
4559 	 * are we missing a flush somewhere else? */
4560 	adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0);
4561 
4562 	return 0;
4563 }
4564 
gfx_v11_0_select_cp_fw_arch(struct amdgpu_device * adev)4565 static void gfx_v11_0_select_cp_fw_arch(struct amdgpu_device *adev)
4566 {
4567 	u32 tmp;
4568 
4569 	/* select RS64 */
4570 	if (adev->gfx.rs64_enable) {
4571 		tmp = RREG32_SOC15(GC, 0, regCP_GFX_CNTL);
4572 		tmp = REG_SET_FIELD(tmp, CP_GFX_CNTL, ENGINE_SEL, 1);
4573 		WREG32_SOC15(GC, 0, regCP_GFX_CNTL, tmp);
4574 
4575 		tmp = RREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL);
4576 		tmp = REG_SET_FIELD(tmp, CP_MEC_ISA_CNTL, ISA_MODE, 1);
4577 		WREG32_SOC15(GC, 0, regCP_MEC_ISA_CNTL, tmp);
4578 	}
4579 
4580 	if (amdgpu_emu_mode == 1)
4581 		msleep(100);
4582 }
4583 
get_gb_addr_config(struct amdgpu_device * adev)4584 static int get_gb_addr_config(struct amdgpu_device * adev)
4585 {
4586 	u32 gb_addr_config;
4587 
4588 	gb_addr_config = RREG32_SOC15(GC, 0, regGB_ADDR_CONFIG);
4589 	if (gb_addr_config == 0)
4590 		return -EINVAL;
4591 
4592 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
4593 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS);
4594 
4595 	adev->gfx.config.gb_addr_config = gb_addr_config;
4596 
4597 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
4598 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4599 				      GB_ADDR_CONFIG, NUM_PIPES);
4600 
4601 	adev->gfx.config.max_tile_pipes =
4602 		adev->gfx.config.gb_addr_config_fields.num_pipes;
4603 
4604 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
4605 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4606 				      GB_ADDR_CONFIG, MAX_COMPRESSED_FRAGS);
4607 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
4608 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4609 				      GB_ADDR_CONFIG, NUM_RB_PER_SE);
4610 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
4611 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4612 				      GB_ADDR_CONFIG, NUM_SHADER_ENGINES);
4613 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
4614 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
4615 				      GB_ADDR_CONFIG, PIPE_INTERLEAVE_SIZE));
4616 
4617 	return 0;
4618 }
4619 
gfx_v11_0_disable_gpa_mode(struct amdgpu_device * adev)4620 static void gfx_v11_0_disable_gpa_mode(struct amdgpu_device *adev)
4621 {
4622 	uint32_t data;
4623 
4624 	data = RREG32_SOC15(GC, 0, regCPC_PSP_DEBUG);
4625 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
4626 	WREG32_SOC15(GC, 0, regCPC_PSP_DEBUG, data);
4627 
4628 	data = RREG32_SOC15(GC, 0, regCPG_PSP_DEBUG);
4629 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
4630 	WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data);
4631 }
4632 
gfx_v11_0_hw_init(struct amdgpu_ip_block * ip_block)4633 static int gfx_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
4634 {
4635 	int r;
4636 	struct amdgpu_device *adev = ip_block->adev;
4637 
4638 	amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4639 				       adev->gfx.cleaner_shader_ptr);
4640 
4641 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
4642 		if (adev->gfx.imu.funcs) {
4643 			/* RLC autoload sequence 1: Program rlc ram */
4644 			if (adev->gfx.imu.funcs->program_rlc_ram)
4645 				adev->gfx.imu.funcs->program_rlc_ram(adev);
4646 			/* rlc autoload firmware */
4647 			r = gfx_v11_0_rlc_backdoor_autoload_enable(adev);
4648 			if (r)
4649 				return r;
4650 		}
4651 	} else {
4652 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4653 			if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) {
4654 				if (adev->gfx.imu.funcs->load_microcode)
4655 					adev->gfx.imu.funcs->load_microcode(adev);
4656 				if (adev->gfx.imu.funcs->setup_imu)
4657 					adev->gfx.imu.funcs->setup_imu(adev);
4658 				if (adev->gfx.imu.funcs->start_imu)
4659 					adev->gfx.imu.funcs->start_imu(adev);
4660 			}
4661 
4662 			/* disable gpa mode in backdoor loading */
4663 			gfx_v11_0_disable_gpa_mode(adev);
4664 		}
4665 	}
4666 
4667 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
4668 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
4669 		r = gfx_v11_0_wait_for_rlc_autoload_complete(adev);
4670 		if (r) {
4671 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
4672 			return r;
4673 		}
4674 	}
4675 
4676 	adev->gfx.is_poweron = true;
4677 
4678 	if(get_gb_addr_config(adev))
4679 		DRM_WARN("Invalid gb_addr_config !\n");
4680 
4681 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP &&
4682 	    adev->gfx.rs64_enable)
4683 		gfx_v11_0_config_gfx_rs64(adev);
4684 
4685 	r = gfx_v11_0_gfxhub_enable(adev);
4686 	if (r)
4687 		return r;
4688 
4689 	if (!amdgpu_emu_mode)
4690 		gfx_v11_0_init_golden_registers(adev);
4691 
4692 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) ||
4693 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO && amdgpu_dpm == 1)) {
4694 		/**
4695 		 * For gfx 11, rlc firmware loading relies on smu firmware is
4696 		 * loaded firstly, so in direct type, it has to load smc ucode
4697 		 * here before rlc.
4698 		 */
4699 		r = amdgpu_pm_load_smu_firmware(adev, NULL);
4700 		if (r)
4701 			return r;
4702 	}
4703 
4704 	gfx_v11_0_constants_init(adev);
4705 
4706 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
4707 		gfx_v11_0_select_cp_fw_arch(adev);
4708 
4709 	if (adev->nbio.funcs->gc_doorbell_init)
4710 		adev->nbio.funcs->gc_doorbell_init(adev);
4711 
4712 	r = gfx_v11_0_rlc_resume(adev);
4713 	if (r)
4714 		return r;
4715 
4716 	/*
4717 	 * init golden registers and rlc resume may override some registers,
4718 	 * reconfig them here
4719 	 */
4720 	gfx_v11_0_tcp_harvest(adev);
4721 
4722 	r = gfx_v11_0_cp_resume(adev);
4723 	if (r)
4724 		return r;
4725 
4726 	/* get IMU version from HW if it's not set */
4727 	if (!adev->gfx.imu_fw_version)
4728 		adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0);
4729 
4730 	return r;
4731 }
4732 
gfx_v11_0_hw_fini(struct amdgpu_ip_block * ip_block)4733 static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block)
4734 {
4735 	struct amdgpu_device *adev = ip_block->adev;
4736 
4737 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4738 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4739 	amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4740 
4741 	if (!adev->no_hw_access) {
4742 		if (amdgpu_async_gfx_ring) {
4743 			if (amdgpu_gfx_disable_kgq(adev, 0))
4744 				DRM_ERROR("KGQ disable failed\n");
4745 		}
4746 
4747 		if (amdgpu_gfx_disable_kcq(adev, 0))
4748 			DRM_ERROR("KCQ disable failed\n");
4749 
4750 		amdgpu_mes_kiq_hw_fini(adev);
4751 	}
4752 
4753 	if (amdgpu_sriov_vf(adev))
4754 		/* Remove the steps disabling CPG and clearing KIQ position,
4755 		 * so that CP could perform IDLE-SAVE during switch. Those
4756 		 * steps are necessary to avoid a DMAR error in gfx9 but it is
4757 		 * not reproduced on gfx11.
4758 		 */
4759 		return 0;
4760 
4761 	gfx_v11_0_cp_enable(adev, false);
4762 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
4763 
4764 	adev->gfxhub.funcs->gart_disable(adev);
4765 
4766 	adev->gfx.is_poweron = false;
4767 
4768 	return 0;
4769 }
4770 
gfx_v11_0_suspend(struct amdgpu_ip_block * ip_block)4771 static int gfx_v11_0_suspend(struct amdgpu_ip_block *ip_block)
4772 {
4773 	return gfx_v11_0_hw_fini(ip_block);
4774 }
4775 
gfx_v11_0_resume(struct amdgpu_ip_block * ip_block)4776 static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block)
4777 {
4778 	return gfx_v11_0_hw_init(ip_block);
4779 }
4780 
gfx_v11_0_is_idle(void * handle)4781 static bool gfx_v11_0_is_idle(void *handle)
4782 {
4783 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4784 
4785 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS),
4786 				GRBM_STATUS, GUI_ACTIVE))
4787 		return false;
4788 	else
4789 		return true;
4790 }
4791 
gfx_v11_0_wait_for_idle(struct amdgpu_ip_block * ip_block)4792 static int gfx_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4793 {
4794 	unsigned i;
4795 	u32 tmp;
4796 	struct amdgpu_device *adev = ip_block->adev;
4797 
4798 	for (i = 0; i < adev->usec_timeout; i++) {
4799 		/* read MC_STATUS */
4800 		tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS) &
4801 			GRBM_STATUS__GUI_ACTIVE_MASK;
4802 
4803 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4804 			return 0;
4805 		udelay(1);
4806 	}
4807 	return -ETIMEDOUT;
4808 }
4809 
gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device * adev,bool req)4810 int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev,
4811 				      bool req)
4812 {
4813 	u32 i, tmp, val;
4814 
4815 	for (i = 0; i < adev->usec_timeout; i++) {
4816 		/* Request with MeId=2, PipeId=0 */
4817 		tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req);
4818 		tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4);
4819 		WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp);
4820 
4821 		val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX);
4822 		if (req) {
4823 			if (val == tmp)
4824 				break;
4825 		} else {
4826 			tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX,
4827 					    REQUEST, 1);
4828 
4829 			/* unlocked or locked by firmware */
4830 			if (val != tmp)
4831 				break;
4832 		}
4833 		udelay(1);
4834 	}
4835 
4836 	if (i >= adev->usec_timeout)
4837 		return -EINVAL;
4838 
4839 	return 0;
4840 }
4841 
gfx_v11_0_soft_reset(struct amdgpu_ip_block * ip_block)4842 static int gfx_v11_0_soft_reset(struct amdgpu_ip_block *ip_block)
4843 {
4844 	u32 grbm_soft_reset = 0;
4845 	u32 tmp;
4846 	int r, i, j, k;
4847 	struct amdgpu_device *adev = ip_block->adev;
4848 
4849 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4850 
4851 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4852 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
4853 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
4854 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
4855 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
4856 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4857 
4858 	mutex_lock(&adev->srbm_mutex);
4859 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
4860 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
4861 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
4862 				soc21_grbm_select(adev, i, k, j, 0);
4863 
4864 				WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
4865 				WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
4866 			}
4867 		}
4868 	}
4869 	for (i = 0; i < adev->gfx.me.num_me; ++i) {
4870 		for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
4871 			for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
4872 				soc21_grbm_select(adev, i, k, j, 0);
4873 
4874 				WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
4875 			}
4876 		}
4877 	}
4878 	soc21_grbm_select(adev, 0, 0, 0, 0);
4879 	mutex_unlock(&adev->srbm_mutex);
4880 
4881 	/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
4882 	mutex_lock(&adev->gfx.reset_sem_mutex);
4883 	r = gfx_v11_0_request_gfx_index_mutex(adev, true);
4884 	if (r) {
4885 		mutex_unlock(&adev->gfx.reset_sem_mutex);
4886 		DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n");
4887 		return r;
4888 	}
4889 
4890 	WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
4891 
4892 	// Read CP_VMID_RESET register three times.
4893 	// to get sufficient time for GFX_HQD_ACTIVE reach 0
4894 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4895 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4896 	RREG32_SOC15(GC, 0, regCP_VMID_RESET);
4897 
4898 	/* release the gfx mutex */
4899 	r = gfx_v11_0_request_gfx_index_mutex(adev, false);
4900 	mutex_unlock(&adev->gfx.reset_sem_mutex);
4901 	if (r) {
4902 		DRM_ERROR("Failed to release the gfx mutex during soft reset\n");
4903 		return r;
4904 	}
4905 
4906 	for (i = 0; i < adev->usec_timeout; i++) {
4907 		if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
4908 		    !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
4909 			break;
4910 		udelay(1);
4911 	}
4912 	if (i >= adev->usec_timeout) {
4913 		printk("Failed to wait all pipes clean\n");
4914 		return -EINVAL;
4915 	}
4916 
4917 	/**********  trigger soft reset  ***********/
4918 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4919 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4920 					SOFT_RESET_CP, 1);
4921 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4922 					SOFT_RESET_GFX, 1);
4923 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4924 					SOFT_RESET_CPF, 1);
4925 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4926 					SOFT_RESET_CPC, 1);
4927 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4928 					SOFT_RESET_CPG, 1);
4929 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4930 	/**********  exit soft reset  ***********/
4931 	grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
4932 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4933 					SOFT_RESET_CP, 0);
4934 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4935 					SOFT_RESET_GFX, 0);
4936 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4937 					SOFT_RESET_CPF, 0);
4938 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4939 					SOFT_RESET_CPC, 0);
4940 	grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4941 					SOFT_RESET_CPG, 0);
4942 	WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
4943 
4944 	tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
4945 	tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
4946 	WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
4947 
4948 	WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
4949 	WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
4950 
4951 	for (i = 0; i < adev->usec_timeout; i++) {
4952 		if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
4953 			break;
4954 		udelay(1);
4955 	}
4956 	if (i >= adev->usec_timeout) {
4957 		printk("Failed to wait CP_VMID_RESET to 0\n");
4958 		return -EINVAL;
4959 	}
4960 
4961 	tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
4962 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
4963 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
4964 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
4965 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
4966 	WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
4967 
4968 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4969 
4970 	return gfx_v11_0_cp_resume(adev);
4971 }
4972 
gfx_v11_0_check_soft_reset(struct amdgpu_ip_block * ip_block)4973 static bool gfx_v11_0_check_soft_reset(struct amdgpu_ip_block *ip_block)
4974 {
4975 	int i, r;
4976 	struct amdgpu_device *adev = ip_block->adev;
4977 	struct amdgpu_ring *ring;
4978 	long tmo = msecs_to_jiffies(1000);
4979 
4980 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
4981 		ring = &adev->gfx.gfx_ring[i];
4982 		r = amdgpu_ring_test_ib(ring, tmo);
4983 		if (r)
4984 			return true;
4985 	}
4986 
4987 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4988 		ring = &adev->gfx.compute_ring[i];
4989 		r = amdgpu_ring_test_ib(ring, tmo);
4990 		if (r)
4991 			return true;
4992 	}
4993 
4994 	return false;
4995 }
4996 
gfx_v11_0_post_soft_reset(struct amdgpu_ip_block * ip_block)4997 static int gfx_v11_0_post_soft_reset(struct amdgpu_ip_block *ip_block)
4998 {
4999 	struct amdgpu_device *adev = ip_block->adev;
5000 	/**
5001 	 * GFX soft reset will impact MES, need resume MES when do GFX soft reset
5002 	 */
5003 	return amdgpu_mes_resume(adev);
5004 }
5005 
gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device * adev)5006 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5007 {
5008 	uint64_t clock;
5009 	uint64_t clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
5010 
5011 	if (amdgpu_sriov_vf(adev)) {
5012 		amdgpu_gfx_off_ctrl(adev, false);
5013 		mutex_lock(&adev->gfx.gpu_clock_mutex);
5014 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5015 		clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5016 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_HI);
5017 		if (clock_counter_hi_pre != clock_counter_hi_after)
5018 			clock_counter_lo = (uint64_t)RREG32_SOC15(GC, 0, regCP_MES_MTIME_LO);
5019 		mutex_unlock(&adev->gfx.gpu_clock_mutex);
5020 		amdgpu_gfx_off_ctrl(adev, true);
5021 	} else {
5022 		preempt_disable();
5023 		clock_counter_hi_pre = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5024 		clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5025 		clock_counter_hi_after = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
5026 		if (clock_counter_hi_pre != clock_counter_hi_after)
5027 			clock_counter_lo = (uint64_t)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
5028 		preempt_enable();
5029 	}
5030 	clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
5031 
5032 	return clock;
5033 }
5034 
gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)5035 static void gfx_v11_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5036 					   uint32_t vmid,
5037 					   uint32_t gds_base, uint32_t gds_size,
5038 					   uint32_t gws_base, uint32_t gws_size,
5039 					   uint32_t oa_base, uint32_t oa_size)
5040 {
5041 	struct amdgpu_device *adev = ring->adev;
5042 
5043 	/* GDS Base */
5044 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5045 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_BASE) + 2 * vmid,
5046 				    gds_base);
5047 
5048 	/* GDS Size */
5049 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5050 				    SOC15_REG_OFFSET(GC, 0, regGDS_VMID0_SIZE) + 2 * vmid,
5051 				    gds_size);
5052 
5053 	/* GWS */
5054 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5055 				    SOC15_REG_OFFSET(GC, 0, regGDS_GWS_VMID0) + vmid,
5056 				    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5057 
5058 	/* OA */
5059 	gfx_v11_0_write_data_to_reg(ring, 0, false,
5060 				    SOC15_REG_OFFSET(GC, 0, regGDS_OA_VMID0) + vmid,
5061 				    (1 << (oa_size + oa_base)) - (1 << oa_base));
5062 }
5063 
gfx_v11_0_early_init(struct amdgpu_ip_block * ip_block)5064 static int gfx_v11_0_early_init(struct amdgpu_ip_block *ip_block)
5065 {
5066 	struct amdgpu_device *adev = ip_block->adev;
5067 
5068 	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
5069 
5070 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
5071 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
5072 					  AMDGPU_MAX_COMPUTE_RINGS);
5073 
5074 	gfx_v11_0_set_kiq_pm4_funcs(adev);
5075 	gfx_v11_0_set_ring_funcs(adev);
5076 	gfx_v11_0_set_irq_funcs(adev);
5077 	gfx_v11_0_set_gds_init(adev);
5078 	gfx_v11_0_set_rlc_funcs(adev);
5079 	gfx_v11_0_set_mqd_funcs(adev);
5080 	gfx_v11_0_set_imu_funcs(adev);
5081 
5082 	gfx_v11_0_init_rlcg_reg_access_ctrl(adev);
5083 
5084 	return gfx_v11_0_init_microcode(adev);
5085 }
5086 
gfx_v11_0_late_init(struct amdgpu_ip_block * ip_block)5087 static int gfx_v11_0_late_init(struct amdgpu_ip_block *ip_block)
5088 {
5089 	struct amdgpu_device *adev = ip_block->adev;
5090 	int r;
5091 
5092 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5093 	if (r)
5094 		return r;
5095 
5096 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5097 	if (r)
5098 		return r;
5099 
5100 	r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
5101 	if (r)
5102 		return r;
5103 	return 0;
5104 }
5105 
gfx_v11_0_is_rlc_enabled(struct amdgpu_device * adev)5106 static bool gfx_v11_0_is_rlc_enabled(struct amdgpu_device *adev)
5107 {
5108 	uint32_t rlc_cntl;
5109 
5110 	/* if RLC is not enabled, do nothing */
5111 	rlc_cntl = RREG32_SOC15(GC, 0, regRLC_CNTL);
5112 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
5113 }
5114 
gfx_v11_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)5115 static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
5116 {
5117 	uint32_t data;
5118 	unsigned i;
5119 
5120 	data = RLC_SAFE_MODE__CMD_MASK;
5121 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5122 
5123 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, data);
5124 
5125 	/* wait for RLC_SAFE_MODE */
5126 	for (i = 0; i < adev->usec_timeout; i++) {
5127 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, regRLC_SAFE_MODE),
5128 				   RLC_SAFE_MODE, CMD))
5129 			break;
5130 		udelay(1);
5131 	}
5132 }
5133 
gfx_v11_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)5134 static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
5135 {
5136 	WREG32_SOC15(GC, 0, regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
5137 }
5138 
gfx_v11_0_update_perf_clk(struct amdgpu_device * adev,bool enable)5139 static void gfx_v11_0_update_perf_clk(struct amdgpu_device *adev,
5140 				      bool enable)
5141 {
5142 	uint32_t def, data;
5143 
5144 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
5145 		return;
5146 
5147 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5148 
5149 	if (enable)
5150 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5151 	else
5152 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
5153 
5154 	if (def != data)
5155 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5156 }
5157 
gfx_v11_0_update_sram_fgcg(struct amdgpu_device * adev,bool enable)5158 static void gfx_v11_0_update_sram_fgcg(struct amdgpu_device *adev,
5159 				       bool enable)
5160 {
5161 	uint32_t def, data;
5162 
5163 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
5164 		return;
5165 
5166 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5167 
5168 	if (enable)
5169 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5170 	else
5171 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
5172 
5173 	if (def != data)
5174 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5175 }
5176 
gfx_v11_0_update_repeater_fgcg(struct amdgpu_device * adev,bool enable)5177 static void gfx_v11_0_update_repeater_fgcg(struct amdgpu_device *adev,
5178 					   bool enable)
5179 {
5180 	uint32_t def, data;
5181 
5182 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
5183 		return;
5184 
5185 	def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5186 
5187 	if (enable)
5188 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5189 	else
5190 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK;
5191 
5192 	if (def != data)
5193 		WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5194 }
5195 
gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)5196 static void gfx_v11_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5197 						       bool enable)
5198 {
5199 	uint32_t data, def;
5200 
5201 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
5202 		return;
5203 
5204 	/* It is disabled by HW by default */
5205 	if (enable) {
5206 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5207 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
5208 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5209 
5210 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5211 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5212 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5213 
5214 			if (def != data)
5215 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5216 		}
5217 	} else {
5218 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5219 			def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5220 
5221 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5222 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5223 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
5224 
5225 			if (def != data)
5226 				WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5227 		}
5228 	}
5229 }
5230 
gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5231 static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5232 						       bool enable)
5233 {
5234 	uint32_t def, data;
5235 
5236 	if (!(adev->cg_flags &
5237 	      (AMD_CG_SUPPORT_GFX_CGCG |
5238 	      AMD_CG_SUPPORT_GFX_CGLS |
5239 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
5240 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
5241 		return;
5242 
5243 	if (enable) {
5244 		def = data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5245 
5246 		/* unset CGCG override */
5247 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5248 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5249 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5250 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5251 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
5252 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5253 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5254 
5255 		/* update CGCG override bits */
5256 		if (def != data)
5257 			WREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE, data);
5258 
5259 		/* enable cgcg FSM(0x0000363F) */
5260 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5261 
5262 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5263 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
5264 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5265 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5266 		}
5267 
5268 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5269 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
5270 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5271 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5272 		}
5273 
5274 		if (def != data)
5275 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5276 
5277 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5278 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5279 
5280 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5281 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD_MASK;
5282 			data |= (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5283 				 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5284 		}
5285 
5286 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5287 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY_MASK;
5288 			data |= (0xf << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5289 				 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5290 		}
5291 
5292 		if (def != data)
5293 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5294 
5295 		/* set IDLE_POLL_COUNT(0x00900100) */
5296 		def = data = RREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL);
5297 
5298 		data &= ~(CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK | CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK);
5299 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5300 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5301 
5302 		if (def != data)
5303 			WREG32_SOC15(GC, 0, regCP_RB_WPTR_POLL_CNTL, data);
5304 
5305 		data = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
5306 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
5307 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
5308 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
5309 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
5310 		WREG32_SOC15(GC, 0, regCP_INT_CNTL, data);
5311 
5312 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5313 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5314 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5315 
5316 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5317 		if (adev->sdma.num_instances > 1) {
5318 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5319 			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
5320 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5321 		}
5322 	} else {
5323 		/* Program RLC_CGCG_CGLS_CTRL */
5324 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5325 
5326 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
5327 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5328 
5329 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5330 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5331 
5332 		if (def != data)
5333 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL, data);
5334 
5335 		/* Program RLC_CGCG_CGLS_CTRL_3D */
5336 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5337 
5338 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5339 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5340 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5341 			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5342 
5343 		if (def != data)
5344 			WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data);
5345 
5346 		data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL);
5347 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5348 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
5349 
5350 		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
5351 		if (adev->sdma.num_instances > 1) {
5352 			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
5353 			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
5354 			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
5355 		}
5356 	}
5357 }
5358 
gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5359 static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5360 					    bool enable)
5361 {
5362 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5363 
5364 	gfx_v11_0_update_coarse_grain_clock_gating(adev, enable);
5365 
5366 	gfx_v11_0_update_medium_grain_clock_gating(adev, enable);
5367 
5368 	gfx_v11_0_update_repeater_fgcg(adev, enable);
5369 
5370 	gfx_v11_0_update_sram_fgcg(adev, enable);
5371 
5372 	gfx_v11_0_update_perf_clk(adev, enable);
5373 
5374 	if (adev->cg_flags &
5375 	    (AMD_CG_SUPPORT_GFX_MGCG |
5376 	     AMD_CG_SUPPORT_GFX_CGLS |
5377 	     AMD_CG_SUPPORT_GFX_CGCG |
5378 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
5379 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
5380 	        gfx_v11_0_enable_gui_idle_interrupt(adev, enable);
5381 
5382 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5383 
5384 	return 0;
5385 }
5386 
gfx_v11_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned vmid)5387 static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
5388 {
5389 	u32 reg, pre_data, data;
5390 
5391 	amdgpu_gfx_off_ctrl(adev, false);
5392 	reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
5393 	if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
5394 		pre_data = RREG32_NO_KIQ(reg);
5395 	else
5396 		pre_data = RREG32(reg);
5397 
5398 	data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
5399 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5400 
5401 	if (pre_data != data) {
5402 		if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
5403 			WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
5404 		} else
5405 			WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
5406 	}
5407 	amdgpu_gfx_off_ctrl(adev, true);
5408 
5409 	if (ring
5410 		&& amdgpu_sriov_is_pp_one_vf(adev)
5411 		&& (pre_data != data)
5412 		&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
5413 			|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
5414 		amdgpu_ring_emit_wreg(ring, reg, data);
5415 	}
5416 }
5417 
5418 static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = {
5419 	.is_rlc_enabled = gfx_v11_0_is_rlc_enabled,
5420 	.set_safe_mode = gfx_v11_0_set_safe_mode,
5421 	.unset_safe_mode = gfx_v11_0_unset_safe_mode,
5422 	.init = gfx_v11_0_rlc_init,
5423 	.get_csb_size = gfx_v11_0_get_csb_size,
5424 	.get_csb_buffer = gfx_v11_0_get_csb_buffer,
5425 	.resume = gfx_v11_0_rlc_resume,
5426 	.stop = gfx_v11_0_rlc_stop,
5427 	.reset = gfx_v11_0_rlc_reset,
5428 	.start = gfx_v11_0_rlc_start,
5429 	.update_spm_vmid = gfx_v11_0_update_spm_vmid,
5430 };
5431 
gfx_v11_cntl_power_gating(struct amdgpu_device * adev,bool enable)5432 static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
5433 {
5434 	u32 data = RREG32_SOC15(GC, 0, regRLC_PG_CNTL);
5435 
5436 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5437 		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5438 	else
5439 		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5440 
5441 	WREG32_SOC15(GC, 0, regRLC_PG_CNTL, data);
5442 
5443 	// Program RLC_PG_DELAY3 for CGPG hysteresis
5444 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
5445 		switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5446 		case IP_VERSION(11, 0, 1):
5447 		case IP_VERSION(11, 0, 4):
5448 		case IP_VERSION(11, 5, 0):
5449 		case IP_VERSION(11, 5, 1):
5450 		case IP_VERSION(11, 5, 2):
5451 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
5452 			break;
5453 		default:
5454 			break;
5455 		}
5456 	}
5457 }
5458 
gfx_v11_cntl_pg(struct amdgpu_device * adev,bool enable)5459 static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable)
5460 {
5461 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5462 
5463 	gfx_v11_cntl_power_gating(adev, enable);
5464 
5465 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5466 }
5467 
gfx_v11_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)5468 static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5469 					   enum amd_powergating_state state)
5470 {
5471 	struct amdgpu_device *adev = ip_block->adev;
5472 	bool enable = (state == AMD_PG_STATE_GATE);
5473 
5474 	if (amdgpu_sriov_vf(adev))
5475 		return 0;
5476 
5477 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5478 	case IP_VERSION(11, 0, 0):
5479 	case IP_VERSION(11, 0, 2):
5480 	case IP_VERSION(11, 0, 3):
5481 		amdgpu_gfx_off_ctrl(adev, enable);
5482 		break;
5483 	case IP_VERSION(11, 0, 1):
5484 	case IP_VERSION(11, 0, 4):
5485 	case IP_VERSION(11, 5, 0):
5486 	case IP_VERSION(11, 5, 1):
5487 	case IP_VERSION(11, 5, 2):
5488 		if (!enable)
5489 			amdgpu_gfx_off_ctrl(adev, false);
5490 
5491 		gfx_v11_cntl_pg(adev, enable);
5492 
5493 		if (enable)
5494 			amdgpu_gfx_off_ctrl(adev, true);
5495 
5496 		break;
5497 	default:
5498 		break;
5499 	}
5500 
5501 	return 0;
5502 }
5503 
gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)5504 static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5505 					  enum amd_clockgating_state state)
5506 {
5507 	struct amdgpu_device *adev = ip_block->adev;
5508 
5509 	if (amdgpu_sriov_vf(adev))
5510 	        return 0;
5511 
5512 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5513 	case IP_VERSION(11, 0, 0):
5514 	case IP_VERSION(11, 0, 1):
5515 	case IP_VERSION(11, 0, 2):
5516 	case IP_VERSION(11, 0, 3):
5517 	case IP_VERSION(11, 0, 4):
5518 	case IP_VERSION(11, 5, 0):
5519 	case IP_VERSION(11, 5, 1):
5520 	case IP_VERSION(11, 5, 2):
5521 	        gfx_v11_0_update_gfx_clock_gating(adev,
5522 	                        state ==  AMD_CG_STATE_GATE);
5523 	        break;
5524 	default:
5525 	        break;
5526 	}
5527 
5528 	return 0;
5529 }
5530 
gfx_v11_0_get_clockgating_state(void * handle,u64 * flags)5531 static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags)
5532 {
5533 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5534 	int data;
5535 
5536 	/* AMD_CG_SUPPORT_GFX_MGCG */
5537 	data = RREG32_SOC15(GC, 0, regRLC_CGTT_MGCG_OVERRIDE);
5538 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5539 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
5540 
5541 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
5542 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
5543 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
5544 
5545 	/* AMD_CG_SUPPORT_GFX_FGCG */
5546 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
5547 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
5548 
5549 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
5550 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
5551 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
5552 
5553 	/* AMD_CG_SUPPORT_GFX_CGCG */
5554 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
5555 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5556 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
5557 
5558 	/* AMD_CG_SUPPORT_GFX_CGLS */
5559 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5560 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
5561 
5562 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
5563 	data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D);
5564 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5565 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5566 
5567 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
5568 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5569 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5570 }
5571 
gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5572 static u64 gfx_v11_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5573 {
5574 	/* gfx11 is 32bit rptr*/
5575 	return *(uint32_t *)ring->rptr_cpu_addr;
5576 }
5577 
gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5578 static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5579 {
5580 	struct amdgpu_device *adev = ring->adev;
5581 	u64 wptr;
5582 
5583 	/* XXX check if swapping is necessary on BE */
5584 	if (ring->use_doorbell) {
5585 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5586 	} else {
5587 		wptr = RREG32_SOC15(GC, 0, regCP_RB0_WPTR);
5588 		wptr += (u64)RREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI) << 32;
5589 	}
5590 
5591 	return wptr;
5592 }
5593 
gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5594 static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5595 {
5596 	struct amdgpu_device *adev = ring->adev;
5597 
5598 	if (ring->use_doorbell) {
5599 		/* XXX check if swapping is necessary on BE */
5600 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5601 			     ring->wptr);
5602 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5603 	} else {
5604 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
5605 			     lower_32_bits(ring->wptr));
5606 		WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
5607 			     upper_32_bits(ring->wptr));
5608 	}
5609 }
5610 
gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5611 static u64 gfx_v11_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5612 {
5613 	/* gfx11 hardware is 32bit rptr */
5614 	return *(uint32_t *)ring->rptr_cpu_addr;
5615 }
5616 
gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5617 static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5618 {
5619 	u64 wptr;
5620 
5621 	/* XXX check if swapping is necessary on BE */
5622 	if (ring->use_doorbell)
5623 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5624 	else
5625 		BUG();
5626 	return wptr;
5627 }
5628 
gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5629 static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5630 {
5631 	struct amdgpu_device *adev = ring->adev;
5632 
5633 	/* XXX check if swapping is necessary on BE */
5634 	if (ring->use_doorbell) {
5635 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
5636 			     ring->wptr);
5637 		WDOORBELL64(ring->doorbell_index, ring->wptr);
5638 	} else {
5639 		BUG(); /* only DOORBELL method supported on gfx11 now */
5640 	}
5641 }
5642 
gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5643 static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5644 {
5645 	struct amdgpu_device *adev = ring->adev;
5646 	u32 ref_and_mask, reg_mem_engine;
5647 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5648 
5649 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5650 		switch (ring->me) {
5651 		case 1:
5652 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5653 			break;
5654 		case 2:
5655 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5656 			break;
5657 		default:
5658 			return;
5659 		}
5660 		reg_mem_engine = 0;
5661 	} else {
5662 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
5663 		reg_mem_engine = 1; /* pfp */
5664 	}
5665 
5666 	gfx_v11_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5667 			       adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5668 			       adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5669 			       ref_and_mask, ref_and_mask, 0x20);
5670 }
5671 
gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5672 static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5673 				       struct amdgpu_job *job,
5674 				       struct amdgpu_ib *ib,
5675 				       uint32_t flags)
5676 {
5677 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5678 	u32 header, control = 0;
5679 
5680 	BUG_ON(ib->flags & AMDGPU_IB_FLAG_CE);
5681 
5682 	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5683 
5684 	control |= ib->length_dw | (vmid << 24);
5685 
5686 	if (ring->adev->gfx.mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5687 		control |= INDIRECT_BUFFER_PRE_ENB(1);
5688 
5689 		if (flags & AMDGPU_IB_PREEMPTED)
5690 			control |= INDIRECT_BUFFER_PRE_RESUME(1);
5691 
5692 		if (vmid)
5693 			gfx_v11_0_ring_emit_de_meta(ring,
5694 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
5695 	}
5696 
5697 	if (ring->is_mes_queue)
5698 		/* inherit vmid from mqd */
5699 		control |= 0x400000;
5700 
5701 	amdgpu_ring_write(ring, header);
5702 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5703 	amdgpu_ring_write(ring,
5704 #ifdef __BIG_ENDIAN
5705 		(2 << 0) |
5706 #endif
5707 		lower_32_bits(ib->gpu_addr));
5708 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5709 	amdgpu_ring_write(ring, control);
5710 }
5711 
gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5712 static void gfx_v11_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5713 					   struct amdgpu_job *job,
5714 					   struct amdgpu_ib *ib,
5715 					   uint32_t flags)
5716 {
5717 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5718 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5719 
5720 	if (ring->is_mes_queue)
5721 		/* inherit vmid from mqd */
5722 		control |= 0x40000000;
5723 
5724 	/* Currently, there is a high possibility to get wave ID mismatch
5725 	 * between ME and GDS, leading to a hw deadlock, because ME generates
5726 	 * different wave IDs than the GDS expects. This situation happens
5727 	 * randomly when at least 5 compute pipes use GDS ordered append.
5728 	 * The wave IDs generated by ME are also wrong after suspend/resume.
5729 	 * Those are probably bugs somewhere else in the kernel driver.
5730 	 *
5731 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5732 	 * GDS to 0 for this ring (me/pipe).
5733 	 */
5734 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5735 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5736 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
5737 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5738 	}
5739 
5740 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5741 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5742 	amdgpu_ring_write(ring,
5743 #ifdef __BIG_ENDIAN
5744 				(2 << 0) |
5745 #endif
5746 				lower_32_bits(ib->gpu_addr));
5747 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5748 	amdgpu_ring_write(ring, control);
5749 }
5750 
gfx_v11_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5751 static void gfx_v11_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5752 				     u64 seq, unsigned flags)
5753 {
5754 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5755 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5756 
5757 	/* RELEASE_MEM - flush caches, send int */
5758 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5759 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
5760 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
5761 				 PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
5762 				 PACKET3_RELEASE_MEM_GCR_GLM_WB |
5763 				 PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
5764 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5765 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
5766 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
5767 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
5768 
5769 	/*
5770 	 * the address should be Qword aligned if 64bit write, Dword
5771 	 * aligned if only send 32bit data low (discard data high)
5772 	 */
5773 	if (write64bit)
5774 		BUG_ON(addr & 0x7);
5775 	else
5776 		BUG_ON(addr & 0x3);
5777 	amdgpu_ring_write(ring, lower_32_bits(addr));
5778 	amdgpu_ring_write(ring, upper_32_bits(addr));
5779 	amdgpu_ring_write(ring, lower_32_bits(seq));
5780 	amdgpu_ring_write(ring, upper_32_bits(seq));
5781 	amdgpu_ring_write(ring, ring->is_mes_queue ?
5782 			 (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
5783 }
5784 
gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5785 static void gfx_v11_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5786 {
5787 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5788 	uint32_t seq = ring->fence_drv.sync_seq;
5789 	uint64_t addr = ring->fence_drv.gpu_addr;
5790 
5791 	gfx_v11_0_wait_reg_mem(ring, usepfp, 1, 0, lower_32_bits(addr),
5792 			       upper_32_bits(addr), seq, 0xffffffff, 4);
5793 }
5794 
gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring * ring,uint16_t pasid,uint32_t flush_type,bool all_hub,uint8_t dst_sel)5795 static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
5796 				   uint16_t pasid, uint32_t flush_type,
5797 				   bool all_hub, uint8_t dst_sel)
5798 {
5799 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
5800 	amdgpu_ring_write(ring,
5801 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
5802 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
5803 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
5804 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
5805 }
5806 
gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5807 static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5808 					 unsigned vmid, uint64_t pd_addr)
5809 {
5810 	if (ring->is_mes_queue)
5811 		gfx_v11_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
5812 	else
5813 		amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5814 
5815 	/* compute doesn't have PFP */
5816 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5817 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5818 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5819 		amdgpu_ring_write(ring, 0x0);
5820 	}
5821 
5822 	/* Make sure that we can't skip the SET_Q_MODE packets when the VM
5823 	 * changed in any way.
5824 	 */
5825 	ring->set_q_mode_offs = 0;
5826 	ring->set_q_mode_ptr = NULL;
5827 }
5828 
gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5829 static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5830 					  u64 seq, unsigned int flags)
5831 {
5832 	struct amdgpu_device *adev = ring->adev;
5833 
5834 	/* we only allocate 32bit for each seq wb address */
5835 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5836 
5837 	/* write fence seq to the "addr" */
5838 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5839 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5840 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5841 	amdgpu_ring_write(ring, lower_32_bits(addr));
5842 	amdgpu_ring_write(ring, upper_32_bits(addr));
5843 	amdgpu_ring_write(ring, lower_32_bits(seq));
5844 
5845 	if (flags & AMDGPU_FENCE_FLAG_INT) {
5846 		/* set register to trigger INT */
5847 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5848 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5849 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5850 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, regCPC_INT_STATUS));
5851 		amdgpu_ring_write(ring, 0);
5852 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5853 	}
5854 }
5855 
gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5856 static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
5857 					 uint32_t flags)
5858 {
5859 	uint32_t dw2 = 0;
5860 
5861 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5862 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5863 		/* set load_global_config & load_global_uconfig */
5864 		dw2 |= 0x8001;
5865 		/* set load_cs_sh_regs */
5866 		dw2 |= 0x01000000;
5867 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
5868 		dw2 |= 0x10002;
5869 	}
5870 
5871 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5872 	amdgpu_ring_write(ring, dw2);
5873 	amdgpu_ring_write(ring, 0);
5874 }
5875 
gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)5876 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5877 						   uint64_t addr)
5878 {
5879 	unsigned ret;
5880 
5881 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5882 	amdgpu_ring_write(ring, lower_32_bits(addr));
5883 	amdgpu_ring_write(ring, upper_32_bits(addr));
5884 	/* discard following DWs if *cond_exec_gpu_addr==0 */
5885 	amdgpu_ring_write(ring, 0);
5886 	ret = ring->wptr & ring->buf_mask;
5887 	/* patch dummy value later */
5888 	amdgpu_ring_write(ring, 0);
5889 
5890 	return ret;
5891 }
5892 
gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring * ring,u64 shadow_va,u64 csa_va,u64 gds_va,bool init_shadow,int vmid)5893 static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
5894 					   u64 shadow_va, u64 csa_va,
5895 					   u64 gds_va, bool init_shadow,
5896 					   int vmid)
5897 {
5898 	struct amdgpu_device *adev = ring->adev;
5899 	unsigned int offs, end;
5900 
5901 	if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
5902 		return;
5903 
5904 	/*
5905 	 * The logic here isn't easy to understand because we need to keep state
5906 	 * accross multiple executions of the function as well as between the
5907 	 * CPU and GPU. The general idea is that the newly written GPU command
5908 	 * has a condition on the previous one and only executed if really
5909 	 * necessary.
5910 	 */
5911 
5912 	/*
5913 	 * The dw in the NOP controls if the next SET_Q_MODE packet should be
5914 	 * executed or not. Reserve 64bits just to be on the save side.
5915 	 */
5916 	amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
5917 	offs = ring->wptr & ring->buf_mask;
5918 
5919 	/*
5920 	 * We start with skipping the prefix SET_Q_MODE and always executing
5921 	 * the postfix SET_Q_MODE packet. This is changed below with a
5922 	 * WRITE_DATA command when the postfix executed.
5923 	 */
5924 	amdgpu_ring_write(ring, shadow_va ? 1 : 0);
5925 	amdgpu_ring_write(ring, 0);
5926 
5927 	if (ring->set_q_mode_offs) {
5928 		uint64_t addr;
5929 
5930 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5931 		addr += ring->set_q_mode_offs << 2;
5932 		end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
5933 	}
5934 
5935 	/*
5936 	 * When the postfix SET_Q_MODE packet executes we need to make sure that the
5937 	 * next prefix SET_Q_MODE packet executes as well.
5938 	 */
5939 	if (!shadow_va) {
5940 		uint64_t addr;
5941 
5942 		addr = amdgpu_bo_gpu_offset(ring->ring_obj);
5943 		addr += offs << 2;
5944 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5945 		amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5946 		amdgpu_ring_write(ring, lower_32_bits(addr));
5947 		amdgpu_ring_write(ring, upper_32_bits(addr));
5948 		amdgpu_ring_write(ring, 0x1);
5949 	}
5950 
5951 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
5952 	amdgpu_ring_write(ring, lower_32_bits(shadow_va));
5953 	amdgpu_ring_write(ring, upper_32_bits(shadow_va));
5954 	amdgpu_ring_write(ring, lower_32_bits(gds_va));
5955 	amdgpu_ring_write(ring, upper_32_bits(gds_va));
5956 	amdgpu_ring_write(ring, lower_32_bits(csa_va));
5957 	amdgpu_ring_write(ring, upper_32_bits(csa_va));
5958 	amdgpu_ring_write(ring, shadow_va ?
5959 			  PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
5960 	amdgpu_ring_write(ring, init_shadow ?
5961 			  PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
5962 
5963 	if (ring->set_q_mode_offs)
5964 		amdgpu_ring_patch_cond_exec(ring, end);
5965 
5966 	if (shadow_va) {
5967 		uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
5968 
5969 		/*
5970 		 * If the tokens match try to skip the last postfix SET_Q_MODE
5971 		 * packet to avoid saving/restoring the state all the time.
5972 		 */
5973 		if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
5974 			*ring->set_q_mode_ptr = 0;
5975 
5976 		ring->set_q_mode_token = token;
5977 	} else {
5978 		ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
5979 	}
5980 
5981 	ring->set_q_mode_offs = offs;
5982 }
5983 
gfx_v11_0_ring_preempt_ib(struct amdgpu_ring * ring)5984 static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
5985 {
5986 	int i, r = 0;
5987 	struct amdgpu_device *adev = ring->adev;
5988 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5989 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5990 	unsigned long flags;
5991 
5992 	if (adev->enable_mes)
5993 		return -EINVAL;
5994 
5995 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5996 		return -EINVAL;
5997 
5998 	spin_lock_irqsave(&kiq->ring_lock, flags);
5999 
6000 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
6001 		spin_unlock_irqrestore(&kiq->ring_lock, flags);
6002 		return -ENOMEM;
6003 	}
6004 
6005 	/* assert preemption condition */
6006 	amdgpu_ring_set_preempt_cond_exec(ring, false);
6007 
6008 	/* assert IB preemption, emit the trailing fence */
6009 	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
6010 				   ring->trail_fence_gpu_addr,
6011 				   ++ring->trail_seq);
6012 	amdgpu_ring_commit(kiq_ring);
6013 
6014 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
6015 
6016 	/* poll the trailing fence */
6017 	for (i = 0; i < adev->usec_timeout; i++) {
6018 		if (ring->trail_seq ==
6019 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
6020 			break;
6021 		udelay(1);
6022 	}
6023 
6024 	if (i >= adev->usec_timeout) {
6025 		r = -EINVAL;
6026 		DRM_ERROR("ring %d failed to preempt ib\n", ring->idx);
6027 	}
6028 
6029 	/* deassert preemption condition */
6030 	amdgpu_ring_set_preempt_cond_exec(ring, true);
6031 	return r;
6032 }
6033 
gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume)6034 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
6035 {
6036 	struct amdgpu_device *adev = ring->adev;
6037 	struct v10_de_ib_state de_payload = {0};
6038 	uint64_t offset, gds_addr, de_payload_gpu_addr;
6039 	void *de_payload_cpu_addr;
6040 	int cnt;
6041 
6042 	if (ring->is_mes_queue) {
6043 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
6044 				  gfx[0].gfx_meta_data) +
6045 			offsetof(struct v10_gfx_meta_data, de_payload);
6046 		de_payload_gpu_addr =
6047 			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
6048 		de_payload_cpu_addr =
6049 			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
6050 
6051 		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
6052 				  gfx[0].gds_backup) +
6053 			offsetof(struct v10_gfx_meta_data, de_payload);
6054 		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
6055 	} else {
6056 		offset = offsetof(struct v10_gfx_meta_data, de_payload);
6057 		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
6058 		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
6059 
6060 		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
6061 				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
6062 				 PAGE_SIZE);
6063 	}
6064 
6065 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
6066 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
6067 
6068 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
6069 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
6070 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
6071 				 WRITE_DATA_DST_SEL(8) |
6072 				 WR_CONFIRM) |
6073 				 WRITE_DATA_CACHE_POLICY(0));
6074 	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
6075 	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
6076 
6077 	if (resume)
6078 		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
6079 					   sizeof(de_payload) >> 2);
6080 	else
6081 		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
6082 					   sizeof(de_payload) >> 2);
6083 }
6084 
gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)6085 static void gfx_v11_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
6086 				    bool secure)
6087 {
6088 	uint32_t v = secure ? FRAME_TMZ : 0;
6089 
6090 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
6091 	amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
6092 }
6093 
gfx_v11_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)6094 static void gfx_v11_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
6095 				     uint32_t reg_val_offs)
6096 {
6097 	struct amdgpu_device *adev = ring->adev;
6098 
6099 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6100 	amdgpu_ring_write(ring, 0 |	/* src: register*/
6101 				(5 << 8) |	/* dst: memory */
6102 				(1 << 20));	/* write confirm */
6103 	amdgpu_ring_write(ring, reg);
6104 	amdgpu_ring_write(ring, 0);
6105 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6106 				reg_val_offs * 4));
6107 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6108 				reg_val_offs * 4));
6109 }
6110 
gfx_v11_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)6111 static void gfx_v11_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6112 				   uint32_t val)
6113 {
6114 	uint32_t cmd = 0;
6115 
6116 	switch (ring->funcs->type) {
6117 	case AMDGPU_RING_TYPE_GFX:
6118 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6119 		break;
6120 	case AMDGPU_RING_TYPE_KIQ:
6121 		cmd = (1 << 16); /* no inc addr */
6122 		break;
6123 	default:
6124 		cmd = WR_CONFIRM;
6125 		break;
6126 	}
6127 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6128 	amdgpu_ring_write(ring, cmd);
6129 	amdgpu_ring_write(ring, reg);
6130 	amdgpu_ring_write(ring, 0);
6131 	amdgpu_ring_write(ring, val);
6132 }
6133 
gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)6134 static void gfx_v11_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
6135 					uint32_t val, uint32_t mask)
6136 {
6137 	gfx_v11_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
6138 }
6139 
gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)6140 static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
6141 						   uint32_t reg0, uint32_t reg1,
6142 						   uint32_t ref, uint32_t mask)
6143 {
6144 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6145 
6146 	gfx_v11_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
6147 			       ref, mask, 0x20);
6148 }
6149 
gfx_v11_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)6150 static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring,
6151 					 unsigned vmid)
6152 {
6153 	struct amdgpu_device *adev = ring->adev;
6154 	uint32_t value = 0;
6155 
6156 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6157 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6158 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6159 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6160 	amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
6161 	WREG32_SOC15(GC, 0, regSQ_CMD, value);
6162 	amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
6163 }
6164 
6165 static void
gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,uint32_t me,uint32_t pipe,enum amdgpu_interrupt_state state)6166 gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6167 				      uint32_t me, uint32_t pipe,
6168 				      enum amdgpu_interrupt_state state)
6169 {
6170 	uint32_t cp_int_cntl, cp_int_cntl_reg;
6171 
6172 	if (!me) {
6173 		switch (pipe) {
6174 		case 0:
6175 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING0);
6176 			break;
6177 		case 1:
6178 			cp_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_INT_CNTL_RING1);
6179 			break;
6180 		default:
6181 			DRM_DEBUG("invalid pipe %d\n", pipe);
6182 			return;
6183 		}
6184 	} else {
6185 		DRM_DEBUG("invalid me %d\n", me);
6186 		return;
6187 	}
6188 
6189 	switch (state) {
6190 	case AMDGPU_IRQ_STATE_DISABLE:
6191 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6192 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6193 					    TIME_STAMP_INT_ENABLE, 0);
6194 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6195 					    GENERIC0_INT_ENABLE, 0);
6196 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6197 		break;
6198 	case AMDGPU_IRQ_STATE_ENABLE:
6199 		cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6200 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6201 					    TIME_STAMP_INT_ENABLE, 1);
6202 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6203 					    GENERIC0_INT_ENABLE, 1);
6204 		WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6205 		break;
6206 	default:
6207 		break;
6208 	}
6209 }
6210 
gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)6211 static void gfx_v11_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6212 						     int me, int pipe,
6213 						     enum amdgpu_interrupt_state state)
6214 {
6215 	u32 mec_int_cntl, mec_int_cntl_reg;
6216 
6217 	/*
6218 	 * amdgpu controls only the first MEC. That's why this function only
6219 	 * handles the setting of interrupts for this specific MEC. All other
6220 	 * pipes' interrupts are set by amdkfd.
6221 	 */
6222 
6223 	if (me == 1) {
6224 		switch (pipe) {
6225 		case 0:
6226 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6227 			break;
6228 		case 1:
6229 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE1_INT_CNTL);
6230 			break;
6231 		case 2:
6232 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE2_INT_CNTL);
6233 			break;
6234 		case 3:
6235 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE3_INT_CNTL);
6236 			break;
6237 		default:
6238 			DRM_DEBUG("invalid pipe %d\n", pipe);
6239 			return;
6240 		}
6241 	} else {
6242 		DRM_DEBUG("invalid me %d\n", me);
6243 		return;
6244 	}
6245 
6246 	switch (state) {
6247 	case AMDGPU_IRQ_STATE_DISABLE:
6248 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6249 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6250 					     TIME_STAMP_INT_ENABLE, 0);
6251 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6252 					     GENERIC0_INT_ENABLE, 0);
6253 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6254 		break;
6255 	case AMDGPU_IRQ_STATE_ENABLE:
6256 		mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6257 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6258 					     TIME_STAMP_INT_ENABLE, 1);
6259 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6260 					     GENERIC0_INT_ENABLE, 1);
6261 		WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6262 		break;
6263 	default:
6264 		break;
6265 	}
6266 }
6267 
gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6268 static int gfx_v11_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6269 					    struct amdgpu_irq_src *src,
6270 					    unsigned type,
6271 					    enum amdgpu_interrupt_state state)
6272 {
6273 	switch (type) {
6274 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6275 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 0, state);
6276 		break;
6277 	case AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP:
6278 		gfx_v11_0_set_gfx_eop_interrupt_state(adev, 0, 1, state);
6279 		break;
6280 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6281 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6282 		break;
6283 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6284 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6285 		break;
6286 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6287 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6288 		break;
6289 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6290 		gfx_v11_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6291 		break;
6292 	default:
6293 		break;
6294 	}
6295 	return 0;
6296 }
6297 
gfx_v11_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6298 static int gfx_v11_0_eop_irq(struct amdgpu_device *adev,
6299 			     struct amdgpu_irq_src *source,
6300 			     struct amdgpu_iv_entry *entry)
6301 {
6302 	int i;
6303 	u8 me_id, pipe_id, queue_id;
6304 	struct amdgpu_ring *ring;
6305 	uint32_t mes_queue_id = entry->src_data[0];
6306 
6307 	DRM_DEBUG("IH: CP EOP\n");
6308 
6309 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
6310 		struct amdgpu_mes_queue *queue;
6311 
6312 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
6313 
6314 		spin_lock(&adev->mes.queue_id_lock);
6315 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
6316 		if (queue) {
6317 			DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
6318 			amdgpu_fence_process(queue->ring);
6319 		}
6320 		spin_unlock(&adev->mes.queue_id_lock);
6321 	} else {
6322 		me_id = (entry->ring_id & 0x0c) >> 2;
6323 		pipe_id = (entry->ring_id & 0x03) >> 0;
6324 		queue_id = (entry->ring_id & 0x70) >> 4;
6325 
6326 		switch (me_id) {
6327 		case 0:
6328 			if (pipe_id == 0)
6329 				amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6330 			else
6331 				amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
6332 			break;
6333 		case 1:
6334 		case 2:
6335 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6336 				ring = &adev->gfx.compute_ring[i];
6337 				/* Per-queue interrupt is supported for MEC starting from VI.
6338 				 * The interrupt can only be enabled/disabled per pipe instead
6339 				 * of per queue.
6340 				 */
6341 				if ((ring->me == me_id) &&
6342 				    (ring->pipe == pipe_id) &&
6343 				    (ring->queue == queue_id))
6344 					amdgpu_fence_process(ring);
6345 			}
6346 			break;
6347 		}
6348 	}
6349 
6350 	return 0;
6351 }
6352 
gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6353 static int gfx_v11_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6354 					      struct amdgpu_irq_src *source,
6355 					      unsigned int type,
6356 					      enum amdgpu_interrupt_state state)
6357 {
6358 	u32 cp_int_cntl_reg, cp_int_cntl;
6359 	int i, j;
6360 
6361 	switch (state) {
6362 	case AMDGPU_IRQ_STATE_DISABLE:
6363 	case AMDGPU_IRQ_STATE_ENABLE:
6364 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6365 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6366 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6367 
6368 				if (cp_int_cntl_reg) {
6369 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6370 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6371 								    PRIV_REG_INT_ENABLE,
6372 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6373 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6374 				}
6375 			}
6376 		}
6377 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6378 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6379 				/* MECs start at 1 */
6380 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6381 
6382 				if (cp_int_cntl_reg) {
6383 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6384 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6385 								    PRIV_REG_INT_ENABLE,
6386 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6387 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6388 				}
6389 			}
6390 		}
6391 		break;
6392 	default:
6393 		break;
6394 	}
6395 
6396 	return 0;
6397 }
6398 
gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6399 static int gfx_v11_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6400 					    struct amdgpu_irq_src *source,
6401 					    unsigned type,
6402 					    enum amdgpu_interrupt_state state)
6403 {
6404 	u32 cp_int_cntl_reg, cp_int_cntl;
6405 	int i, j;
6406 
6407 	switch (state) {
6408 	case AMDGPU_IRQ_STATE_DISABLE:
6409 	case AMDGPU_IRQ_STATE_ENABLE:
6410 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6411 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6412 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6413 
6414 				if (cp_int_cntl_reg) {
6415 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6416 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6417 								    OPCODE_ERROR_INT_ENABLE,
6418 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6419 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6420 				}
6421 			}
6422 		}
6423 		for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6424 			for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6425 				/* MECs start at 1 */
6426 				cp_int_cntl_reg = gfx_v11_0_get_cpc_int_cntl(adev, i + 1, j);
6427 
6428 				if (cp_int_cntl_reg) {
6429 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6430 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6431 								    OPCODE_ERROR_INT_ENABLE,
6432 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6433 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6434 				}
6435 			}
6436 		}
6437 		break;
6438 	default:
6439 		break;
6440 	}
6441 	return 0;
6442 }
6443 
gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned int type,enum amdgpu_interrupt_state state)6444 static int gfx_v11_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6445 					       struct amdgpu_irq_src *source,
6446 					       unsigned int type,
6447 					       enum amdgpu_interrupt_state state)
6448 {
6449 	u32 cp_int_cntl_reg, cp_int_cntl;
6450 	int i, j;
6451 
6452 	switch (state) {
6453 	case AMDGPU_IRQ_STATE_DISABLE:
6454 	case AMDGPU_IRQ_STATE_ENABLE:
6455 		for (i = 0; i < adev->gfx.me.num_me; i++) {
6456 			for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6457 				cp_int_cntl_reg = gfx_v11_0_get_cpg_int_cntl(adev, i, j);
6458 
6459 				if (cp_int_cntl_reg) {
6460 					cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6461 					cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6462 								    PRIV_INSTR_INT_ENABLE,
6463 								    state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6464 					WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6465 				}
6466 			}
6467 		}
6468 		break;
6469 	default:
6470 		break;
6471 	}
6472 
6473 	return 0;
6474 }
6475 
gfx_v11_0_handle_priv_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6476 static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,
6477 					struct amdgpu_iv_entry *entry)
6478 {
6479 	u8 me_id, pipe_id, queue_id;
6480 	struct amdgpu_ring *ring;
6481 	int i;
6482 
6483 	me_id = (entry->ring_id & 0x0c) >> 2;
6484 	pipe_id = (entry->ring_id & 0x03) >> 0;
6485 	queue_id = (entry->ring_id & 0x70) >> 4;
6486 
6487 	switch (me_id) {
6488 	case 0:
6489 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
6490 			ring = &adev->gfx.gfx_ring[i];
6491 			if (ring->me == me_id && ring->pipe == pipe_id &&
6492 			    ring->queue == queue_id)
6493 				drm_sched_fault(&ring->sched);
6494 		}
6495 		break;
6496 	case 1:
6497 	case 2:
6498 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6499 			ring = &adev->gfx.compute_ring[i];
6500 			if (ring->me == me_id && ring->pipe == pipe_id &&
6501 			    ring->queue == queue_id)
6502 				drm_sched_fault(&ring->sched);
6503 		}
6504 		break;
6505 	default:
6506 		BUG();
6507 		break;
6508 	}
6509 }
6510 
gfx_v11_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6511 static int gfx_v11_0_priv_reg_irq(struct amdgpu_device *adev,
6512 				  struct amdgpu_irq_src *source,
6513 				  struct amdgpu_iv_entry *entry)
6514 {
6515 	DRM_ERROR("Illegal register access in command stream\n");
6516 	gfx_v11_0_handle_priv_fault(adev, entry);
6517 	return 0;
6518 }
6519 
gfx_v11_0_bad_op_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6520 static int gfx_v11_0_bad_op_irq(struct amdgpu_device *adev,
6521 				struct amdgpu_irq_src *source,
6522 				struct amdgpu_iv_entry *entry)
6523 {
6524 	DRM_ERROR("Illegal opcode in command stream \n");
6525 	gfx_v11_0_handle_priv_fault(adev, entry);
6526 	return 0;
6527 }
6528 
gfx_v11_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6529 static int gfx_v11_0_priv_inst_irq(struct amdgpu_device *adev,
6530 				   struct amdgpu_irq_src *source,
6531 				   struct amdgpu_iv_entry *entry)
6532 {
6533 	DRM_ERROR("Illegal instruction in command stream\n");
6534 	gfx_v11_0_handle_priv_fault(adev, entry);
6535 	return 0;
6536 }
6537 
gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6538 static int gfx_v11_0_rlc_gc_fed_irq(struct amdgpu_device *adev,
6539 				  struct amdgpu_irq_src *source,
6540 				  struct amdgpu_iv_entry *entry)
6541 {
6542 	if (adev->gfx.ras && adev->gfx.ras->rlc_gc_fed_irq)
6543 		return adev->gfx.ras->rlc_gc_fed_irq(adev, source, entry);
6544 
6545 	return 0;
6546 }
6547 
6548 #if 0
6549 static int gfx_v11_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6550 					     struct amdgpu_irq_src *src,
6551 					     unsigned int type,
6552 					     enum amdgpu_interrupt_state state)
6553 {
6554 	uint32_t tmp, target;
6555 	struct amdgpu_ring *ring = &(adev->gfx.kiq[0].ring);
6556 
6557 	target = SOC15_REG_OFFSET(GC, 0, regCP_ME1_PIPE0_INT_CNTL);
6558 	target += ring->pipe;
6559 
6560 	switch (type) {
6561 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6562 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
6563 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6564 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6565 					    GENERIC2_INT_ENABLE, 0);
6566 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6567 
6568 			tmp = RREG32_SOC15_IP(GC, target);
6569 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6570 					    GENERIC2_INT_ENABLE, 0);
6571 			WREG32_SOC15_IP(GC, target, tmp);
6572 		} else {
6573 			tmp = RREG32_SOC15(GC, 0, regCPC_INT_CNTL);
6574 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6575 					    GENERIC2_INT_ENABLE, 1);
6576 			WREG32_SOC15(GC, 0, regCPC_INT_CNTL, tmp);
6577 
6578 			tmp = RREG32_SOC15_IP(GC, target);
6579 			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE0_INT_CNTL,
6580 					    GENERIC2_INT_ENABLE, 1);
6581 			WREG32_SOC15_IP(GC, target, tmp);
6582 		}
6583 		break;
6584 	default:
6585 		BUG(); /* kiq only support GENERIC2_INT now */
6586 		break;
6587 	}
6588 	return 0;
6589 }
6590 #endif
6591 
gfx_v11_0_emit_mem_sync(struct amdgpu_ring * ring)6592 static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
6593 {
6594 	const unsigned int gcr_cntl =
6595 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
6596 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
6597 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |
6598 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |
6599 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |
6600 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
6601 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
6602 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
6603 
6604 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
6605 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
6606 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
6607 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6608 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6609 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6610 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6611 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6612 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
6613 }
6614 
gfx_v11_0_reset_kgq(struct amdgpu_ring * ring,unsigned int vmid)6615 static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
6616 {
6617 	struct amdgpu_device *adev = ring->adev;
6618 	int r;
6619 
6620 	if (amdgpu_sriov_vf(adev))
6621 		return -EINVAL;
6622 
6623 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
6624 	if (r)
6625 		return r;
6626 
6627 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
6628 	if (unlikely(r != 0)) {
6629 		dev_err(adev->dev, "fail to resv mqd_obj\n");
6630 		return r;
6631 	}
6632 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
6633 	if (!r) {
6634 		r = gfx_v11_0_kgq_init_queue(ring, true);
6635 		amdgpu_bo_kunmap(ring->mqd_obj);
6636 		ring->mqd_ptr = NULL;
6637 	}
6638 	amdgpu_bo_unreserve(ring->mqd_obj);
6639 	if (r) {
6640 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
6641 		return r;
6642 	}
6643 
6644 	r = amdgpu_mes_map_legacy_queue(adev, ring);
6645 	if (r) {
6646 		dev_err(adev->dev, "failed to remap kgq\n");
6647 		return r;
6648 	}
6649 
6650 	return amdgpu_ring_test_ring(ring);
6651 }
6652 
gfx_v11_0_reset_kcq(struct amdgpu_ring * ring,unsigned int vmid)6653 static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid)
6654 {
6655 	struct amdgpu_device *adev = ring->adev;
6656 	int r = 0;
6657 
6658 	if (amdgpu_sriov_vf(adev))
6659 		return -EINVAL;
6660 
6661 	r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
6662 	if (r) {
6663 		dev_err(adev->dev, "reset via MMIO failed %d\n", r);
6664 		return r;
6665 	}
6666 
6667 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
6668 	if (unlikely(r != 0)) {
6669 		dev_err(adev->dev, "fail to resv mqd_obj\n");
6670 		return r;
6671 	}
6672 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
6673 	if (!r) {
6674 		r = gfx_v11_0_kcq_init_queue(ring, true);
6675 		amdgpu_bo_kunmap(ring->mqd_obj);
6676 		ring->mqd_ptr = NULL;
6677 	}
6678 	amdgpu_bo_unreserve(ring->mqd_obj);
6679 	if (r) {
6680 		dev_err(adev->dev, "fail to unresv mqd_obj\n");
6681 		return r;
6682 	}
6683 	r = amdgpu_mes_map_legacy_queue(adev, ring);
6684 	if (r) {
6685 		dev_err(adev->dev, "failed to remap kcq\n");
6686 		return r;
6687 	}
6688 
6689 	return amdgpu_ring_test_ring(ring);
6690 }
6691 
gfx_v11_ip_print(struct amdgpu_ip_block * ip_block,struct drm_printer * p)6692 static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
6693 {
6694 	struct amdgpu_device *adev = ip_block->adev;
6695 	uint32_t i, j, k, reg, index = 0;
6696 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6697 
6698 	if (!adev->gfx.ip_dump_core)
6699 		return;
6700 
6701 	for (i = 0; i < reg_count; i++)
6702 		drm_printf(p, "%-50s \t 0x%08x\n",
6703 			   gc_reg_list_11_0[i].reg_name,
6704 			   adev->gfx.ip_dump_core[i]);
6705 
6706 	/* print compute queue registers for all instances */
6707 	if (!adev->gfx.ip_dump_compute_queues)
6708 		return;
6709 
6710 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6711 	drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
6712 		   adev->gfx.mec.num_mec,
6713 		   adev->gfx.mec.num_pipe_per_mec,
6714 		   adev->gfx.mec.num_queue_per_pipe);
6715 
6716 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6717 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6718 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6719 				drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
6720 				for (reg = 0; reg < reg_count; reg++) {
6721 					drm_printf(p, "%-50s \t 0x%08x\n",
6722 						   gc_cp_reg_list_11[reg].reg_name,
6723 						   adev->gfx.ip_dump_compute_queues[index + reg]);
6724 				}
6725 				index += reg_count;
6726 			}
6727 		}
6728 	}
6729 
6730 	/* print gfx queue registers for all instances */
6731 	if (!adev->gfx.ip_dump_gfx_queues)
6732 		return;
6733 
6734 	index = 0;
6735 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6736 	drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
6737 		   adev->gfx.me.num_me,
6738 		   adev->gfx.me.num_pipe_per_me,
6739 		   adev->gfx.me.num_queue_per_pipe);
6740 
6741 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6742 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6743 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6744 				drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
6745 				for (reg = 0; reg < reg_count; reg++) {
6746 					drm_printf(p, "%-50s \t 0x%08x\n",
6747 						   gc_gfx_queue_reg_list_11[reg].reg_name,
6748 						   adev->gfx.ip_dump_gfx_queues[index + reg]);
6749 				}
6750 				index += reg_count;
6751 			}
6752 		}
6753 	}
6754 }
6755 
gfx_v11_ip_dump(struct amdgpu_ip_block * ip_block)6756 static void gfx_v11_ip_dump(struct amdgpu_ip_block *ip_block)
6757 {
6758 	struct amdgpu_device *adev = ip_block->adev;
6759 	uint32_t i, j, k, reg, index = 0;
6760 	uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
6761 
6762 	if (!adev->gfx.ip_dump_core)
6763 		return;
6764 
6765 	amdgpu_gfx_off_ctrl(adev, false);
6766 	for (i = 0; i < reg_count; i++)
6767 		adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
6768 	amdgpu_gfx_off_ctrl(adev, true);
6769 
6770 	/* dump compute queue registers for all instances */
6771 	if (!adev->gfx.ip_dump_compute_queues)
6772 		return;
6773 
6774 	reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
6775 	amdgpu_gfx_off_ctrl(adev, false);
6776 	mutex_lock(&adev->srbm_mutex);
6777 	for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6778 		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6779 			for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
6780 				/* ME0 is for GFX so start from 1 for CP */
6781 				soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0);
6782 				for (reg = 0; reg < reg_count; reg++) {
6783 					adev->gfx.ip_dump_compute_queues[index + reg] =
6784 						RREG32(SOC15_REG_ENTRY_OFFSET(
6785 							gc_cp_reg_list_11[reg]));
6786 				}
6787 				index += reg_count;
6788 			}
6789 		}
6790 	}
6791 	soc21_grbm_select(adev, 0, 0, 0, 0);
6792 	mutex_unlock(&adev->srbm_mutex);
6793 	amdgpu_gfx_off_ctrl(adev, true);
6794 
6795 	/* dump gfx queue registers for all instances */
6796 	if (!adev->gfx.ip_dump_gfx_queues)
6797 		return;
6798 
6799 	index = 0;
6800 	reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
6801 	amdgpu_gfx_off_ctrl(adev, false);
6802 	mutex_lock(&adev->srbm_mutex);
6803 	for (i = 0; i < adev->gfx.me.num_me; i++) {
6804 		for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
6805 			for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
6806 				soc21_grbm_select(adev, i, j, k, 0);
6807 
6808 				for (reg = 0; reg < reg_count; reg++) {
6809 					adev->gfx.ip_dump_gfx_queues[index + reg] =
6810 						RREG32(SOC15_REG_ENTRY_OFFSET(
6811 							gc_gfx_queue_reg_list_11[reg]));
6812 				}
6813 				index += reg_count;
6814 			}
6815 		}
6816 	}
6817 	soc21_grbm_select(adev, 0, 0, 0, 0);
6818 	mutex_unlock(&adev->srbm_mutex);
6819 	amdgpu_gfx_off_ctrl(adev, true);
6820 }
6821 
gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring * ring)6822 static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
6823 {
6824 	/* Emit the cleaner shader */
6825 	amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
6826 	amdgpu_ring_write(ring, 0);  /* RESERVED field, programmed to zero */
6827 }
6828 
6829 static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
6830 	.name = "gfx_v11_0",
6831 	.early_init = gfx_v11_0_early_init,
6832 	.late_init = gfx_v11_0_late_init,
6833 	.sw_init = gfx_v11_0_sw_init,
6834 	.sw_fini = gfx_v11_0_sw_fini,
6835 	.hw_init = gfx_v11_0_hw_init,
6836 	.hw_fini = gfx_v11_0_hw_fini,
6837 	.suspend = gfx_v11_0_suspend,
6838 	.resume = gfx_v11_0_resume,
6839 	.is_idle = gfx_v11_0_is_idle,
6840 	.wait_for_idle = gfx_v11_0_wait_for_idle,
6841 	.soft_reset = gfx_v11_0_soft_reset,
6842 	.check_soft_reset = gfx_v11_0_check_soft_reset,
6843 	.post_soft_reset = gfx_v11_0_post_soft_reset,
6844 	.set_clockgating_state = gfx_v11_0_set_clockgating_state,
6845 	.set_powergating_state = gfx_v11_0_set_powergating_state,
6846 	.get_clockgating_state = gfx_v11_0_get_clockgating_state,
6847 	.dump_ip_state = gfx_v11_ip_dump,
6848 	.print_ip_state = gfx_v11_ip_print,
6849 };
6850 
6851 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
6852 	.type = AMDGPU_RING_TYPE_GFX,
6853 	.align_mask = 0xff,
6854 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6855 	.support_64bit_ptrs = true,
6856 	.secure_submission_supported = true,
6857 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
6858 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
6859 	.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
6860 	.emit_frame_size = /* totally 247 maximum if 16 IBs */
6861 		5 + /* update_spm_vmid */
6862 		5 + /* COND_EXEC */
6863 		22 + /* SET_Q_PREEMPTION_MODE */
6864 		7 + /* PIPELINE_SYNC */
6865 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6866 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6867 		4 + /* VM_FLUSH */
6868 		8 + /* FENCE for VM_FLUSH */
6869 		20 + /* GDS switch */
6870 		5 + /* COND_EXEC */
6871 		7 + /* HDP_flush */
6872 		4 + /* VGT_flush */
6873 		31 + /*	DE_META */
6874 		3 + /* CNTX_CTRL */
6875 		5 + /* HDP_INVL */
6876 		22 + /* SET_Q_PREEMPTION_MODE */
6877 		8 + 8 + /* FENCE x2 */
6878 		8 + /* gfx_v11_0_emit_mem_sync */
6879 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
6880 	.emit_ib_size =	4, /* gfx_v11_0_ring_emit_ib_gfx */
6881 	.emit_ib = gfx_v11_0_ring_emit_ib_gfx,
6882 	.emit_fence = gfx_v11_0_ring_emit_fence,
6883 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6884 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6885 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6886 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6887 	.test_ring = gfx_v11_0_ring_test_ring,
6888 	.test_ib = gfx_v11_0_ring_test_ib,
6889 	.insert_nop = gfx_v11_ring_insert_nop,
6890 	.pad_ib = amdgpu_ring_generic_pad_ib,
6891 	.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
6892 	.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
6893 	.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
6894 	.preempt_ib = gfx_v11_0_ring_preempt_ib,
6895 	.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
6896 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6897 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6898 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6899 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6900 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6901 	.reset = gfx_v11_0_reset_kgq,
6902 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
6903 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
6904 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
6905 };
6906 
6907 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = {
6908 	.type = AMDGPU_RING_TYPE_COMPUTE,
6909 	.align_mask = 0xff,
6910 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6911 	.support_64bit_ptrs = true,
6912 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6913 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6914 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6915 	.emit_frame_size =
6916 		5 + /* update_spm_vmid */
6917 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6918 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6919 		5 + /* hdp invalidate */
6920 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6921 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6922 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6923 		2 + /* gfx_v11_0_ring_emit_vm_flush */
6924 		8 + 8 + 8 + /* gfx_v11_0_ring_emit_fence x3 for user fence, vm fence */
6925 		8 + /* gfx_v11_0_emit_mem_sync */
6926 		2, /* gfx_v11_0_ring_emit_cleaner_shader */
6927 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6928 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6929 	.emit_fence = gfx_v11_0_ring_emit_fence,
6930 	.emit_pipeline_sync = gfx_v11_0_ring_emit_pipeline_sync,
6931 	.emit_vm_flush = gfx_v11_0_ring_emit_vm_flush,
6932 	.emit_gds_switch = gfx_v11_0_ring_emit_gds_switch,
6933 	.emit_hdp_flush = gfx_v11_0_ring_emit_hdp_flush,
6934 	.test_ring = gfx_v11_0_ring_test_ring,
6935 	.test_ib = gfx_v11_0_ring_test_ib,
6936 	.insert_nop = gfx_v11_ring_insert_nop,
6937 	.pad_ib = amdgpu_ring_generic_pad_ib,
6938 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6939 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6940 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6941 	.soft_recovery = gfx_v11_0_ring_soft_recovery,
6942 	.emit_mem_sync = gfx_v11_0_emit_mem_sync,
6943 	.reset = gfx_v11_0_reset_kcq,
6944 	.emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader,
6945 	.begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
6946 	.end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
6947 };
6948 
6949 static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = {
6950 	.type = AMDGPU_RING_TYPE_KIQ,
6951 	.align_mask = 0xff,
6952 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
6953 	.support_64bit_ptrs = true,
6954 	.get_rptr = gfx_v11_0_ring_get_rptr_compute,
6955 	.get_wptr = gfx_v11_0_ring_get_wptr_compute,
6956 	.set_wptr = gfx_v11_0_ring_set_wptr_compute,
6957 	.emit_frame_size =
6958 		20 + /* gfx_v11_0_ring_emit_gds_switch */
6959 		7 + /* gfx_v11_0_ring_emit_hdp_flush */
6960 		5 + /*hdp invalidate */
6961 		7 + /* gfx_v11_0_ring_emit_pipeline_sync */
6962 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6963 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6964 		8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6965 	.emit_ib_size =	7, /* gfx_v11_0_ring_emit_ib_compute */
6966 	.emit_ib = gfx_v11_0_ring_emit_ib_compute,
6967 	.emit_fence = gfx_v11_0_ring_emit_fence_kiq,
6968 	.test_ring = gfx_v11_0_ring_test_ring,
6969 	.test_ib = gfx_v11_0_ring_test_ib,
6970 	.insert_nop = amdgpu_ring_insert_nop,
6971 	.pad_ib = amdgpu_ring_generic_pad_ib,
6972 	.emit_rreg = gfx_v11_0_ring_emit_rreg,
6973 	.emit_wreg = gfx_v11_0_ring_emit_wreg,
6974 	.emit_reg_wait = gfx_v11_0_ring_emit_reg_wait,
6975 	.emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait,
6976 };
6977 
gfx_v11_0_set_ring_funcs(struct amdgpu_device * adev)6978 static void gfx_v11_0_set_ring_funcs(struct amdgpu_device *adev)
6979 {
6980 	int i;
6981 
6982 	adev->gfx.kiq[0].ring.funcs = &gfx_v11_0_ring_funcs_kiq;
6983 
6984 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6985 		adev->gfx.gfx_ring[i].funcs = &gfx_v11_0_ring_funcs_gfx;
6986 
6987 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
6988 		adev->gfx.compute_ring[i].funcs = &gfx_v11_0_ring_funcs_compute;
6989 }
6990 
6991 static const struct amdgpu_irq_src_funcs gfx_v11_0_eop_irq_funcs = {
6992 	.set = gfx_v11_0_set_eop_interrupt_state,
6993 	.process = gfx_v11_0_eop_irq,
6994 };
6995 
6996 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_reg_irq_funcs = {
6997 	.set = gfx_v11_0_set_priv_reg_fault_state,
6998 	.process = gfx_v11_0_priv_reg_irq,
6999 };
7000 
7001 static const struct amdgpu_irq_src_funcs gfx_v11_0_bad_op_irq_funcs = {
7002 	.set = gfx_v11_0_set_bad_op_fault_state,
7003 	.process = gfx_v11_0_bad_op_irq,
7004 };
7005 
7006 static const struct amdgpu_irq_src_funcs gfx_v11_0_priv_inst_irq_funcs = {
7007 	.set = gfx_v11_0_set_priv_inst_fault_state,
7008 	.process = gfx_v11_0_priv_inst_irq,
7009 };
7010 
7011 static const struct amdgpu_irq_src_funcs gfx_v11_0_rlc_gc_fed_irq_funcs = {
7012 	.process = gfx_v11_0_rlc_gc_fed_irq,
7013 };
7014 
gfx_v11_0_set_irq_funcs(struct amdgpu_device * adev)7015 static void gfx_v11_0_set_irq_funcs(struct amdgpu_device *adev)
7016 {
7017 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7018 	adev->gfx.eop_irq.funcs = &gfx_v11_0_eop_irq_funcs;
7019 
7020 	adev->gfx.priv_reg_irq.num_types = 1;
7021 	adev->gfx.priv_reg_irq.funcs = &gfx_v11_0_priv_reg_irq_funcs;
7022 
7023 	adev->gfx.bad_op_irq.num_types = 1;
7024 	adev->gfx.bad_op_irq.funcs = &gfx_v11_0_bad_op_irq_funcs;
7025 
7026 	adev->gfx.priv_inst_irq.num_types = 1;
7027 	adev->gfx.priv_inst_irq.funcs = &gfx_v11_0_priv_inst_irq_funcs;
7028 
7029 	adev->gfx.rlc_gc_fed_irq.num_types = 1; /* 0x80 FED error */
7030 	adev->gfx.rlc_gc_fed_irq.funcs = &gfx_v11_0_rlc_gc_fed_irq_funcs;
7031 
7032 }
7033 
gfx_v11_0_set_imu_funcs(struct amdgpu_device * adev)7034 static void gfx_v11_0_set_imu_funcs(struct amdgpu_device *adev)
7035 {
7036 	if (adev->flags & AMD_IS_APU)
7037 		adev->gfx.imu.mode = MISSION_MODE;
7038 	else
7039 		adev->gfx.imu.mode = DEBUG_MODE;
7040 
7041 	adev->gfx.imu.funcs = &gfx_v11_0_imu_funcs;
7042 }
7043 
gfx_v11_0_set_rlc_funcs(struct amdgpu_device * adev)7044 static void gfx_v11_0_set_rlc_funcs(struct amdgpu_device *adev)
7045 {
7046 	adev->gfx.rlc.funcs = &gfx_v11_0_rlc_funcs;
7047 }
7048 
gfx_v11_0_set_gds_init(struct amdgpu_device * adev)7049 static void gfx_v11_0_set_gds_init(struct amdgpu_device *adev)
7050 {
7051 	unsigned total_cu = adev->gfx.config.max_cu_per_sh *
7052 			    adev->gfx.config.max_sh_per_se *
7053 			    adev->gfx.config.max_shader_engines;
7054 
7055 	adev->gds.gds_size = 0x1000;
7056 	adev->gds.gds_compute_max_wave_id = total_cu * 32 - 1;
7057 	adev->gds.gws_size = 64;
7058 	adev->gds.oa_size = 16;
7059 }
7060 
gfx_v11_0_set_mqd_funcs(struct amdgpu_device * adev)7061 static void gfx_v11_0_set_mqd_funcs(struct amdgpu_device *adev)
7062 {
7063 	/* set gfx eng mqd */
7064 	adev->mqds[AMDGPU_HW_IP_GFX].mqd_size =
7065 		sizeof(struct v11_gfx_mqd);
7066 	adev->mqds[AMDGPU_HW_IP_GFX].init_mqd =
7067 		gfx_v11_0_gfx_mqd_init;
7068 	/* set compute eng mqd */
7069 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
7070 		sizeof(struct v11_compute_mqd);
7071 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
7072 		gfx_v11_0_compute_mqd_init;
7073 }
7074 
gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device * adev,u32 bitmap)7075 static void gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
7076 							  u32 bitmap)
7077 {
7078 	u32 data;
7079 
7080 	if (!bitmap)
7081 		return;
7082 
7083 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7084 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7085 
7086 	WREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG, data);
7087 }
7088 
gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device * adev)7089 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
7090 {
7091 	u32 data, wgp_bitmask;
7092 	data = RREG32_SOC15(GC, 0, regCC_GC_SHADER_ARRAY_CONFIG);
7093 	data |= RREG32_SOC15(GC, 0, regGC_USER_SHADER_ARRAY_CONFIG);
7094 
7095 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
7096 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
7097 
7098 	wgp_bitmask =
7099 		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
7100 
7101 	return (~data) & wgp_bitmask;
7102 }
7103 
gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device * adev)7104 static u32 gfx_v11_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
7105 {
7106 	u32 wgp_idx, wgp_active_bitmap;
7107 	u32 cu_bitmap_per_wgp, cu_active_bitmap;
7108 
7109 	wgp_active_bitmap = gfx_v11_0_get_wgp_active_bitmap_per_sh(adev);
7110 	cu_active_bitmap = 0;
7111 
7112 	for (wgp_idx = 0; wgp_idx < 16; wgp_idx++) {
7113 		/* if there is one WGP enabled, it means 2 CUs will be enabled */
7114 		cu_bitmap_per_wgp = 3 << (2 * wgp_idx);
7115 		if (wgp_active_bitmap & (1 << wgp_idx))
7116 			cu_active_bitmap |= cu_bitmap_per_wgp;
7117 	}
7118 
7119 	return cu_active_bitmap;
7120 }
7121 
gfx_v11_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7122 static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev,
7123 				 struct amdgpu_cu_info *cu_info)
7124 {
7125 	int i, j, k, counter, active_cu_number = 0;
7126 	u32 mask, bitmap;
7127 	unsigned disable_masks[8 * 2];
7128 
7129 	if (!adev || !cu_info)
7130 		return -EINVAL;
7131 
7132 	amdgpu_gfx_parse_disable_cu(disable_masks, 8, 2);
7133 
7134 	mutex_lock(&adev->grbm_idx_mutex);
7135 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7136 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7137 			bitmap = i * adev->gfx.config.max_sh_per_se + j;
7138 			if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1))
7139 				continue;
7140 			mask = 1;
7141 			counter = 0;
7142 			gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
7143 			if (i < 8 && j < 2)
7144 				gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
7145 					adev, disable_masks[i * 2 + j]);
7146 			bitmap = gfx_v11_0_get_cu_active_bitmap_per_sh(adev);
7147 
7148 			/**
7149 			 * GFX11 could support more than 4 SEs, while the bitmap
7150 			 * in cu_info struct is 4x4 and ioctl interface struct
7151 			 * drm_amdgpu_info_device should keep stable.
7152 			 * So we use last two columns of bitmap to store cu mask for
7153 			 * SEs 4 to 7, the layout of the bitmap is as below:
7154 			 *    SE0: {SH0,SH1} --> {bitmap[0][0], bitmap[0][1]}
7155 			 *    SE1: {SH0,SH1} --> {bitmap[1][0], bitmap[1][1]}
7156 			 *    SE2: {SH0,SH1} --> {bitmap[2][0], bitmap[2][1]}
7157 			 *    SE3: {SH0,SH1} --> {bitmap[3][0], bitmap[3][1]}
7158 			 *    SE4: {SH0,SH1} --> {bitmap[0][2], bitmap[0][3]}
7159 			 *    SE5: {SH0,SH1} --> {bitmap[1][2], bitmap[1][3]}
7160 			 *    SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]}
7161 			 *    SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]}
7162 			 */
7163 			cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap;
7164 
7165 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
7166 				if (bitmap & mask)
7167 					counter++;
7168 
7169 				mask <<= 1;
7170 			}
7171 			active_cu_number += counter;
7172 		}
7173 	}
7174 	gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7175 	mutex_unlock(&adev->grbm_idx_mutex);
7176 
7177 	cu_info->number = active_cu_number;
7178 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7179 
7180 	return 0;
7181 }
7182 
7183 const struct amdgpu_ip_block_version gfx_v11_0_ip_block =
7184 {
7185 	.type = AMD_IP_BLOCK_TYPE_GFX,
7186 	.major = 11,
7187 	.minor = 0,
7188 	.rev = 0,
7189 	.funcs = &gfx_v11_0_ip_funcs,
7190 };
7191