xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v12_1.c (revision aec2f682d47c54ef434b2d440992626d80b1ebdc)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "amdgpu_userq_fence.h"
34 #include "imu_v12_1.h"
35 #include "soc_v1_0.h"
36 #include "gfx_v12_1_pkt.h"
37 
38 #include "gc/gc_12_1_0_offset.h"
39 #include "gc/gc_12_1_0_sh_mask.h"
40 #include "soc24_enum.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
42 
43 #include "soc15.h"
44 #include "clearstate_gfx12.h"
45 #include "v12_structs.h"
46 #include "gfx_v12_1.h"
47 #include "mes_v12_1.h"
48 #include "amdgpu_ras_mgr.h"
49 
50 #define GFX12_MEC_HPD_SIZE	2048
51 #define NUM_SIMD_PER_CU_GFX12_1	4
52 
53 #define RLCG_UCODE_LOADING_START_ADDRESS	0x00002000L
54 
55 #define regCP_HQD_EOP_CONTROL_DEFAULT                                             0x00000000
56 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT                                     0x00000000
57 #define regCP_MQD_CONTROL_DEFAULT                                                 0x00000100
58 #define regCP_HQD_PQ_CONTROL_DEFAULT                                              0x00308509
59 #define regCP_HQD_PQ_RPTR_DEFAULT                                                 0x00000000
60 #define regCP_HQD_PERSISTENT_STATE_DEFAULT                                        0x0ae06301
61 #define regCP_HQD_IB_CONTROL_DEFAULT                                              0x00100000
62 
63 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mec.bin");
64 MODULE_FIRMWARE("amdgpu/gc_12_1_0_rlc.bin");
65 
66 #define SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0	0x00000001
67 #define DEFAULT_SH_MEM_CONFIG \
68 	((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
69 	 (SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
70 	 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
71 
72 static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id);
73 static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev);
74 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev);
75 static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev);
76 static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev);
77 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev);
78 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
79 				 struct amdgpu_cu_info *cu_info);
80 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev);
81 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
82 				       u32 sh_num, u32 instance, int xcc_id);
83 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
84 				     uint32_t val);
85 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
86 static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring,
87 					   uint16_t pasid, uint32_t flush_type,
88 					   bool all_hub, uint8_t dst_sel);
89 static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
90 static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
91 static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev,
92 				      bool enable);
93 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev,
94 					 bool enable, int xcc_id);
95 static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev);
96 
97 static void gfx_v12_1_kiq_set_resources(struct amdgpu_ring *kiq_ring,
98 					uint64_t queue_mask)
99 {
100 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
101 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
102 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
103 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
104 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
105 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
106 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
107 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
108 	amdgpu_ring_write(kiq_ring, 0);
109 }
110 
111 static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring,
112 				     struct amdgpu_ring *ring)
113 {
114 	uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
115 	uint64_t wptr_addr = ring->wptr_gpu_addr;
116 	uint32_t me = 0, eng_sel = 0;
117 
118 	switch (ring->funcs->type) {
119 	case AMDGPU_RING_TYPE_COMPUTE:
120 		me = 1;
121 		eng_sel = 0;
122 		break;
123 	case AMDGPU_RING_TYPE_MES:
124 		me = 2;
125 		eng_sel = 5;
126 		break;
127 	default:
128 		WARN_ON(1);
129 	}
130 
131 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
132 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
133 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
134 			  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
135 			  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
136 			  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
137 			  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
138 			  PACKET3_MAP_QUEUES_ME((me)) |
139 			  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
140 			  PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
141 			  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
142 	amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
143 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
144 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
145 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
146 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
147 }
148 
149 static void gfx_v12_1_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
150 				       struct amdgpu_ring *ring,
151 				       enum amdgpu_unmap_queues_action action,
152 				       u64 gpu_addr, u64 seq)
153 {
154 	struct amdgpu_device *adev = kiq_ring->adev;
155 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
156 
157 	if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
158 		amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr,
159 					      seq, kiq_ring->xcc_id);
160 		return;
161 	}
162 
163 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
164 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
165 			  PACKET3_UNMAP_QUEUES_ACTION(action) |
166 			  PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
167 			  PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
168 			  PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
169 	amdgpu_ring_write(kiq_ring,
170 		  PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
171 
172 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
173 		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
174 		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
175 		amdgpu_ring_write(kiq_ring, seq);
176 	} else {
177 		amdgpu_ring_write(kiq_ring, 0);
178 		amdgpu_ring_write(kiq_ring, 0);
179 		amdgpu_ring_write(kiq_ring, 0);
180 	}
181 }
182 
183 static void gfx_v12_1_kiq_query_status(struct amdgpu_ring *kiq_ring,
184 				       struct amdgpu_ring *ring,
185 				       u64 addr, u64 seq)
186 {
187 	uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
188 
189 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
190 	amdgpu_ring_write(kiq_ring,
191 			  PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
192 			  PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
193 			  PACKET3_QUERY_STATUS_COMMAND(2));
194 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
195 			  PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
196 			  PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
197 	amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
198 	amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
199 	amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
200 	amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
201 }
202 
203 static void gfx_v12_1_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
204 					  uint16_t pasid,
205 					  uint32_t flush_type,
206 					  bool all_hub)
207 {
208 	gfx_v12_1_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
209 }
210 
211 static const struct kiq_pm4_funcs gfx_v12_1_kiq_pm4_funcs = {
212 	.kiq_set_resources = gfx_v12_1_kiq_set_resources,
213 	.kiq_map_queues = gfx_v12_1_kiq_map_queues,
214 	.kiq_unmap_queues = gfx_v12_1_kiq_unmap_queues,
215 	.kiq_query_status = gfx_v12_1_kiq_query_status,
216 	.kiq_invalidate_tlbs = gfx_v12_1_kiq_invalidate_tlbs,
217 	.set_resources_size = 8,
218 	.map_queues_size = 7,
219 	.unmap_queues_size = 6,
220 	.query_status_size = 7,
221 	.invalidate_tlbs_size = 2,
222 };
223 
224 static void gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device *adev)
225 {
226 	int i, num_xcc;
227 
228 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
229 	for (i =0; i < num_xcc; i++)
230 		adev->gfx.kiq[i].pmf = &gfx_v12_1_kiq_pm4_funcs;
231 }
232 
233 static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
234 				   int mem_space, int opt, uint32_t addr0,
235 				   uint32_t addr1, uint32_t ref,
236 				   uint32_t mask, uint32_t inv)
237 {
238 	if (mem_space == 0) {
239 		addr0 = soc_v1_0_normalize_xcc_reg_offset(addr0);
240 		addr1 = soc_v1_0_normalize_xcc_reg_offset(addr1);
241 	}
242 
243 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
244 	amdgpu_ring_write(ring,
245 			  /* memory (1) or register (0) */
246 			  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
247 			   WAIT_REG_MEM_OPERATION(opt) | /* wait */
248 			   WAIT_REG_MEM_FUNCTION(3)));  /* equal */
249 
250 	if (mem_space)
251 		BUG_ON(addr0 & 0x3); /* Dword align */
252 	amdgpu_ring_write(ring, addr0);
253 	amdgpu_ring_write(ring, addr1);
254 	amdgpu_ring_write(ring, ref);
255 	amdgpu_ring_write(ring, mask);
256 	amdgpu_ring_write(ring, inv); /* poll interval */
257 }
258 
259 static int gfx_v12_1_ring_test_ring(struct amdgpu_ring *ring)
260 {
261 	struct amdgpu_device *adev = ring->adev;
262 	uint32_t scratch_reg0_offset, xcc_offset;
263 	uint32_t tmp = 0;
264 	unsigned i;
265 	int r;
266 
267 	/* Use register offset which is local to XCC in the packet */
268 	xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
269 	scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
270 	WREG32(scratch_reg0_offset, 0xCAFEDEAD);
271 	tmp = RREG32(scratch_reg0_offset);
272 
273 	r = amdgpu_ring_alloc(ring, 5);
274 	if (r) {
275 		dev_err(adev->dev,
276 			"amdgpu: cp failed to lock ring %d (%d).\n",
277 			ring->idx, r);
278 		return r;
279 	}
280 
281 	if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
282 		gfx_v12_1_ring_emit_wreg(ring, xcc_offset, 0xDEADBEEF);
283 	} else {
284 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
285 		amdgpu_ring_write(ring, xcc_offset -
286 				  PACKET3_SET_UCONFIG_REG_START);
287 		amdgpu_ring_write(ring, 0xDEADBEEF);
288 	}
289 	amdgpu_ring_commit(ring);
290 
291 	for (i = 0; i < adev->usec_timeout; i++) {
292 		tmp = RREG32(scratch_reg0_offset);
293 		if (tmp == 0xDEADBEEF)
294 			break;
295 		if (amdgpu_emu_mode == 1)
296 			msleep(1);
297 		else
298 			udelay(1);
299 	}
300 
301 	if (i >= adev->usec_timeout)
302 		r = -ETIMEDOUT;
303 	return r;
304 }
305 
306 static int gfx_v12_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
307 {
308 	struct amdgpu_device *adev = ring->adev;
309 	struct amdgpu_ib ib;
310 	struct dma_fence *f = NULL;
311 	unsigned index;
312 	uint64_t gpu_addr;
313 	volatile uint32_t *cpu_ptr;
314 	long r;
315 
316 	/* MES KIQ fw hasn't indirect buffer support for now */
317 	if (adev->enable_mes_kiq &&
318 	    ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
319 		return 0;
320 
321 	memset(&ib, 0, sizeof(ib));
322 
323 	r = amdgpu_device_wb_get(adev, &index);
324 	if (r)
325 		return r;
326 
327 	gpu_addr = adev->wb.gpu_addr + (index * 4);
328 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
329 	cpu_ptr = &adev->wb.wb[index];
330 
331 	r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
332 	if (r) {
333 		dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
334 		goto err1;
335 	}
336 
337 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
338 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
339 	ib.ptr[2] = lower_32_bits(gpu_addr);
340 	ib.ptr[3] = upper_32_bits(gpu_addr);
341 	ib.ptr[4] = 0xDEADBEEF;
342 	ib.length_dw = 5;
343 
344 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
345 	if (r)
346 		goto err2;
347 
348 	r = dma_fence_wait_timeout(f, false, timeout);
349 	if (r == 0) {
350 		r = -ETIMEDOUT;
351 		goto err2;
352 	} else if (r < 0) {
353 		goto err2;
354 	}
355 
356 	if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
357 		r = 0;
358 	else
359 		r = -EINVAL;
360 err2:
361 	amdgpu_ib_free(&ib, NULL);
362 	dma_fence_put(f);
363 err1:
364 	amdgpu_device_wb_free(adev, index);
365 	return r;
366 }
367 
368 static void gfx_v12_1_free_microcode(struct amdgpu_device *adev)
369 {
370 	amdgpu_ucode_release(&adev->gfx.rlc_fw);
371 	amdgpu_ucode_release(&adev->gfx.mec_fw);
372 
373 	kfree(adev->gfx.rlc.register_list_format);
374 }
375 
376 static int gfx_v12_1_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
377 {
378 	const struct psp_firmware_header_v1_0 *toc_hdr;
379 	int err = 0;
380 
381 	err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
382 				   AMDGPU_UCODE_REQUIRED,
383 				   "amdgpu/%s_toc.bin", ucode_prefix);
384 	if (err)
385 		goto out;
386 
387 	toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
388 	adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
389 	adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
390 	adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
391 	adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
392 			le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
393 	return 0;
394 out:
395 	amdgpu_ucode_release(&adev->psp.toc_fw);
396 	return err;
397 }
398 
399 static int gfx_v12_1_init_microcode(struct amdgpu_device *adev)
400 {
401 	char ucode_prefix[15];
402 	int err;
403 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
404 	uint16_t version_major;
405 	uint16_t version_minor;
406 
407 	DRM_DEBUG("\n");
408 
409 	amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
410 
411 	if (!amdgpu_sriov_vf(adev)) {
412 		err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
413 					   AMDGPU_UCODE_REQUIRED,
414 					   "amdgpu/%s_rlc.bin", ucode_prefix);
415 		if (err)
416 			goto out;
417 		rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
418 		version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
419 		version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
420 		err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
421 		if (err)
422 			goto out;
423 	}
424 
425 	err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
426 				   AMDGPU_UCODE_REQUIRED,
427 				   "amdgpu/%s_mec.bin", ucode_prefix);
428 	if (err)
429 		goto out;
430 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
431 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
432 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
433 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
434 	amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
435 
436 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
437 		err = gfx_v12_1_init_toc_microcode(adev, ucode_prefix);
438 
439 	/* only one MEC for gfx 12 */
440 	adev->gfx.mec2_fw = NULL;
441 
442 	if (adev->gfx.imu.funcs) {
443 		if (adev->gfx.imu.funcs->init_microcode) {
444 			err = adev->gfx.imu.funcs->init_microcode(adev);
445 			if (err)
446 				dev_err(adev->dev, "Failed to load imu firmware!\n");
447 		}
448 	}
449 
450 out:
451 	if (err) {
452 		amdgpu_ucode_release(&adev->gfx.rlc_fw);
453 		amdgpu_ucode_release(&adev->gfx.mec_fw);
454 	}
455 
456 	return err;
457 }
458 
459 static u32 gfx_v12_1_get_csb_size(struct amdgpu_device *adev)
460 {
461 	u32 count = 0;
462 	const struct cs_section_def *sect = NULL;
463 	const struct cs_extent_def *ext = NULL;
464 
465 	count += 1;
466 
467 	for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
468 		if (sect->id == SECT_CONTEXT) {
469 			for (ext = sect->section; ext->extent != NULL; ++ext)
470 				count += 2 + ext->reg_count;
471 		} else
472 			return 0;
473 	}
474 
475 	return count;
476 }
477 
478 static void gfx_v12_1_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
479 {
480 	u32 count = 0, clustercount = 0, i;
481 	const struct cs_section_def *sect = NULL;
482 	const struct cs_extent_def *ext = NULL;
483 
484 	if (adev->gfx.rlc.cs_data == NULL)
485 		return;
486 	if (buffer == NULL)
487 		return;
488 
489 	count += 1;
490 
491 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
492 		if (sect->id == SECT_CONTEXT) {
493 			for (ext = sect->section; ext->extent != NULL; ++ext) {
494 				clustercount++;
495 				buffer[count++] = ext->reg_count;
496 				buffer[count++] = ext->reg_index;
497 
498 				for (i = 0; i < ext->reg_count; i++)
499 					buffer[count++] = cpu_to_le32(ext->extent[i]);
500 			}
501 		} else
502 			return;
503 	}
504 
505 	buffer[0] = clustercount;
506 }
507 
508 static void gfx_v12_1_rlc_fini(struct amdgpu_device *adev)
509 {
510 	/* clear state block */
511 	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
512 			&adev->gfx.rlc.clear_state_gpu_addr,
513 			(void **)&adev->gfx.rlc.cs_ptr);
514 
515 	/* jump table block */
516 	amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
517 			&adev->gfx.rlc.cp_table_gpu_addr,
518 			(void **)&adev->gfx.rlc.cp_table_ptr);
519 }
520 
521 static void gfx_v12_1_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
522 {
523 	int xcc_id, num_xcc;
524 	struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
525 
526 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
527 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
528 		reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
529 
530 		reg_access_ctrl->grbm_cntl =
531 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
532 		reg_access_ctrl->grbm_idx =
533 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
534 
535 		reg_access_ctrl->vfi_cmd =
536 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_CMD);
537 		reg_access_ctrl->vfi_stat =
538 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_STAT);
539 		reg_access_ctrl->vfi_addr =
540 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_ADDR);
541 		reg_access_ctrl->vfi_data =
542 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_DATA);
543 		reg_access_ctrl->vfi_grbm_cntl =
544 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_CNTL);
545 		reg_access_ctrl->vfi_grbm_idx =
546 			SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_INDEX);
547 		reg_access_ctrl->vfi_grbm_cntl_data = 0;
548 		reg_access_ctrl->vfi_grbm_idx_data = 0;
549 	}
550 	adev->gfx.rlc.rlcg_reg_access_supported = true;
551 }
552 
553 static int gfx_v12_1_rlc_init(struct amdgpu_device *adev)
554 {
555 	const struct cs_section_def *cs_data;
556 	int r, i, num_xcc;
557 
558 	adev->gfx.rlc.cs_data = gfx12_cs_data;
559 
560 	cs_data = adev->gfx.rlc.cs_data;
561 
562 	if (cs_data) {
563 		/* init clear state block */
564 		r = amdgpu_gfx_rlc_init_csb(adev);
565 		if (r)
566 			return r;
567 	}
568 
569 	/* init spm vmid with 0xf */
570 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
571 	for (i = 0; i < num_xcc; i++) {
572 		if (adev->gfx.rlc.funcs->update_spm_vmid)
573 			adev->gfx.rlc.funcs->update_spm_vmid(adev, i, NULL, 0xf);
574 	}
575 
576 	return 0;
577 }
578 
579 static void gfx_v12_1_mec_fini(struct amdgpu_device *adev)
580 {
581 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
582 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
583 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
584 }
585 
586 static int gfx_v12_1_mec_init(struct amdgpu_device *adev)
587 {
588 	int r, i, num_xcc;
589 	u32 *hpd;
590 	size_t mec_hpd_size;
591 
592 	bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
593 
594 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
595 	for (i = 0; i < num_xcc; i++)
596 		bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
597 			    AMDGPU_MAX_COMPUTE_QUEUES);
598 
599 	/* take ownership of the relevant compute queues */
600 	amdgpu_gfx_compute_queue_acquire(adev);
601 	mec_hpd_size = adev->gfx.num_compute_rings *
602 		       GFX12_MEC_HPD_SIZE * num_xcc;
603 
604 	if (mec_hpd_size) {
605 		r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
606 					      AMDGPU_GEM_DOMAIN_GTT,
607 					      &adev->gfx.mec.hpd_eop_obj,
608 					      &adev->gfx.mec.hpd_eop_gpu_addr,
609 					      (void **)&hpd);
610 		if (r) {
611 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
612 			gfx_v12_1_mec_fini(adev);
613 			return r;
614 		}
615 
616 		memset(hpd, 0, mec_hpd_size);
617 
618 		amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
619 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
620 	}
621 
622 	return 0;
623 }
624 
625 static uint32_t wave_read_ind(struct amdgpu_device *adev,
626 			      uint32_t xcc_id, uint32_t wave,
627 			      uint32_t address)
628 {
629 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
630 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
631 		(address << SQ_IND_INDEX__INDEX__SHIFT));
632 	return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
633 }
634 
635 static void wave_read_regs(struct amdgpu_device *adev,
636 			   uint32_t xcc_id, uint32_t wave,
637 			   uint32_t thread, uint32_t regno,
638 			   uint32_t num, uint32_t *out)
639 {
640 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
641 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
642 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
643 		(thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
644 		(SQ_IND_INDEX__AUTO_INCR_MASK));
645 	while (num--)
646 		*(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
647 }
648 
649 static void gfx_v12_1_read_wave_data(struct amdgpu_device *adev,
650 				     uint32_t xcc_id,
651 				     uint32_t simd, uint32_t wave,
652 				     uint32_t *dst, int *no_fields)
653 {
654 	/* in gfx12 the SIMD_ID is specified as part of the INSTANCE
655 	 * field when performing a select_se_sh so it should be
656 	 * zero here */
657 	WARN_ON(simd != 0);
658 
659 	/* type 4 wave data */
660 	dst[(*no_fields)++] = 4;
661 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATUS);
662 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_LO);
663 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_HI);
664 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_LO);
665 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_HI);
666 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID1);
667 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID2);
668 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_GPR_ALLOC);
669 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_LDS_ALLOC);
670 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS);
671 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS2);
672 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_DBG1);
673 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_M0);
674 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_MODE);
675 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATE_PRIV);
676 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
677 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_USER);
678 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_TRAP_CTRL);
679 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_ACTIVE);
680 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_VALID_AND_IDLE);
681 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
682 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
683 	dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_SCHED_MODE);
684 }
685 
686 static void gfx_v12_1_read_wave_sgprs(struct amdgpu_device *adev,
687 				      uint32_t xcc_id, uint32_t simd,
688 				      uint32_t wave, uint32_t start,
689 				      uint32_t size, uint32_t *dst)
690 {
691 	WARN_ON(simd != 0);
692 
693 	wave_read_regs(adev, xcc_id, wave, 0,
694 		       start + SQIND_WAVE_SGPRS_OFFSET,
695 		       size, dst);
696 }
697 
698 static void gfx_v12_1_read_wave_vgprs(struct amdgpu_device *adev,
699 				      uint32_t xcc_id, uint32_t simd,
700 				      uint32_t wave, uint32_t thread,
701 				      uint32_t start, uint32_t size,
702 				      uint32_t *dst)
703 {
704 	wave_read_regs(adev, xcc_id, wave, thread,
705 		       start + SQIND_WAVE_VGPRS_OFFSET,
706 		       size, dst);
707 }
708 
709 static void gfx_v12_1_select_me_pipe_q(struct amdgpu_device *adev,
710 				       u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
711 {
712 	soc_v1_0_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
713 }
714 
715 static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev)
716 {
717 	/* Fill this in when the interface is ready */
718 	return 1;
719 }
720 
721 static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
722 {
723 	int logic_xcc;
724 	int xcc = (ih_node & 0x7) - 2 + (ih_node >> 3) * 4;
725 
726 	for (logic_xcc = 0; logic_xcc < NUM_XCC(adev->gfx.xcc_mask); logic_xcc++) {
727 		if (xcc == GET_INST(GC, logic_xcc))
728 			return logic_xcc;
729 	}
730 
731 	dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
732 	return -EINVAL;
733 }
734 
735 static const struct amdgpu_gfx_funcs gfx_v12_1_gfx_funcs = {
736 	.get_gpu_clock_counter = &gfx_v12_1_get_gpu_clock_counter,
737 	.select_se_sh = &gfx_v12_1_xcc_select_se_sh,
738 	.read_wave_data = &gfx_v12_1_read_wave_data,
739 	.read_wave_sgprs = &gfx_v12_1_read_wave_sgprs,
740 	.read_wave_vgprs = &gfx_v12_1_read_wave_vgprs,
741 	.select_me_pipe_q = &gfx_v12_1_select_me_pipe_q,
742 	.update_perfmon_mgcg = &gfx_v12_1_update_perf_clk,
743 	.get_xccs_per_xcp = &gfx_v12_1_get_xccs_per_xcp,
744 	.ih_node_to_logical_xcc = &gfx_v12_1_ih_to_xcc_inst,
745 };
746 
747 static int gfx_v12_1_gpu_early_init(struct amdgpu_device *adev)
748 {
749 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
750 	case IP_VERSION(12, 1, 0):
751 		adev->gfx.config.max_hw_contexts = 8;
752 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
753 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
754 		adev->gfx.config.sc_hiz_tile_fifo_size = 0;
755 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
756 		break;
757 	default:
758 		BUG();
759 		break;
760 	}
761 
762 	return 0;
763 }
764 
765 static int gfx_v12_1_compute_ring_init(struct amdgpu_device *adev, int ring_id,
766 				       int xcc_id, int mec, int pipe, int queue)
767 {
768 	int r;
769 	unsigned irq_type;
770 	struct amdgpu_ring *ring;
771 	unsigned int hw_prio;
772 	uint32_t xcc_doorbell_start;
773 
774 	ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
775 				       ring_id];
776 
777 	/* mec0 is me1 */
778 	ring->xcc_id = xcc_id;
779 	ring->me = mec + 1;
780 	ring->pipe = pipe;
781 	ring->queue = queue;
782 
783 	ring->ring_obj = NULL;
784 	ring->use_doorbell = true;
785 	xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
786 			     xcc_id * adev->doorbell_index.xcc_doorbell_range;
787 	ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
788 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
789 			     (ring_id + xcc_id * adev->gfx.num_compute_rings) *
790 			     GFX12_MEC_HPD_SIZE;
791 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
792 	sprintf(ring->name, "comp_%d.%d.%d.%d",
793 			ring->xcc_id, ring->me, ring->pipe, ring->queue);
794 
795 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
796 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
797 		+ ring->pipe;
798 	hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
799 			AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
800 	/* type-2 packets are deprecated on MEC, use type-3 instead */
801 	r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
802 			     hw_prio, NULL);
803 	if (r)
804 		return r;
805 
806 	return 0;
807 }
808 
809 static struct {
810 	SOC24_FIRMWARE_ID	id;
811 	unsigned int		offset;
812 	unsigned int		size;
813 	unsigned int		size_x16;
814 	unsigned int		num_inst;
815 } rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
816 
817 #define RLC_TOC_OFFSET_DWUNIT   8
818 #define RLC_SIZE_MULTIPLE       1024
819 #define RLC_TOC_UMF_SIZE_inM	23ULL
820 #define RLC_TOC_FORMAT_API	165ULL
821 
822 #define RLC_NUM_INS_CODE0   1
823 #define RLC_NUM_INS_CODE1   8
824 #define RLC_NUM_INS_CODE2   2
825 #define RLC_NUM_INS_CODE3   16
826 
827 static void gfx_v12_1_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
828 {
829 	RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
830 
831 	while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
832 		rlc_autoload_info[ucode->id].id = ucode->id;
833 		rlc_autoload_info[ucode->id].offset =
834 			ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
835 		rlc_autoload_info[ucode->id].size =
836 			ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
837 					  ucode->size * 4;
838 		switch (ucode->vfflr_image_code) {
839 		case 0:
840 			rlc_autoload_info[ucode->id].num_inst =
841 				RLC_NUM_INS_CODE0;
842 			break;
843 		case 1:
844 			rlc_autoload_info[ucode->id].num_inst =
845 				RLC_NUM_INS_CODE1;
846 			break;
847 		case 2:
848 			rlc_autoload_info[ucode->id].num_inst =
849 				RLC_NUM_INS_CODE2;
850 			break;
851 		case 3:
852 			rlc_autoload_info[ucode->id].num_inst =
853 				RLC_NUM_INS_CODE3;
854 			break;
855 		default:
856 			dev_err(adev->dev,
857 				"Invalid Instance number detected\n");
858 			break;
859 		}
860 		ucode++;
861 	}
862 }
863 
864 static uint32_t gfx_v12_1_calc_toc_total_size(struct amdgpu_device *adev)
865 {
866 	uint32_t total_size = 0;
867 	SOC24_FIRMWARE_ID id;
868 
869 	gfx_v12_1_parse_rlc_toc(adev, adev->psp.toc.start_addr);
870 
871 	for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
872 		total_size += rlc_autoload_info[id].size;
873 
874 	/* In case the offset in rlc toc ucode is aligned */
875 	if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
876 		total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
877 			rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
878 	if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
879 		total_size = RLC_TOC_UMF_SIZE_inM << 20;
880 
881 	return total_size;
882 }
883 
884 static int gfx_v12_1_rlc_autoload_buffer_init(struct amdgpu_device *adev)
885 {
886 	int r;
887 	uint32_t total_size;
888 
889 	total_size = gfx_v12_1_calc_toc_total_size(adev);
890 
891 	r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
892 				      AMDGPU_GEM_DOMAIN_VRAM,
893 				      &adev->gfx.rlc.rlc_autoload_bo,
894 				      &adev->gfx.rlc.rlc_autoload_gpu_addr,
895 				      (void **)&adev->gfx.rlc.rlc_autoload_ptr);
896 
897 	if (r) {
898 		dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
899 		return r;
900 	}
901 
902 	return 0;
903 }
904 
905 static void gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
906 						       SOC24_FIRMWARE_ID id,
907 						       const void *fw_data,
908 						       uint32_t fw_size)
909 {
910 	uint32_t toc_offset;
911 	uint32_t toc_fw_size, toc_fw_inst_size;
912 	char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
913 	int i, num_inst;
914 
915 	if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
916 		return;
917 
918 	toc_offset = rlc_autoload_info[id].offset;
919 	toc_fw_size = rlc_autoload_info[id].size;
920 	num_inst = rlc_autoload_info[id].num_inst;
921 	toc_fw_inst_size = toc_fw_size / num_inst;
922 
923 	if (fw_size == 0)
924 		fw_size = toc_fw_inst_size;
925 
926 	if (fw_size > toc_fw_inst_size)
927 		fw_size = toc_fw_inst_size;
928 
929 	for (i = 0; i < num_inst; i++) {
930 		if ((num_inst == RLC_NUM_INS_CODE0) ||
931 		    ((1 << (i / 2)) & adev->gfx.xcc_mask)) {
932 			memcpy(ptr + toc_offset + i * toc_fw_inst_size, fw_data, fw_size);
933 
934 			if (fw_size < toc_fw_inst_size)
935 				memset(ptr + toc_offset + fw_size + i * toc_fw_inst_size,
936 				       0, toc_fw_inst_size - fw_size);
937 		}
938 	}
939 }
940 
941 static void
942 gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
943 {
944 	void *data;
945 	uint32_t size;
946 	uint32_t *toc_ptr;
947 
948 	data = adev->psp.toc.start_addr;
949 	size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
950 
951 	toc_ptr = (uint32_t *)data + size / 4 - 2;
952 	*toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
953 
954 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
955 						   data, size);
956 }
957 
958 static void
959 gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
960 {
961 	const __le32 *fw_data;
962 	uint32_t fw_size;
963 	const struct gfx_firmware_header_v2_0 *cpv2_hdr;
964 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
965 	const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
966 	const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
967 	uint16_t version_major, version_minor;
968 
969 	/* mec ucode */
970 	cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
971 		adev->gfx.mec_fw->data;
972 	/* instruction */
973 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
974 		le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
975 	fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
976 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
977 						   fw_data, fw_size);
978 	/* data */
979 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
980 		le32_to_cpu(cpv2_hdr->data_offset_bytes));
981 	fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
982 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
983 						   fw_data, fw_size);
984 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
985 						   fw_data, fw_size);
986 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
987 						   fw_data, fw_size);
988 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
989 						   fw_data, fw_size);
990 
991 	/* rlc ucode */
992 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
993 		adev->gfx.rlc_fw->data;
994 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
995 			le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
996 	fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
997 	gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
998 						   fw_data, fw_size);
999 
1000 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1001 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1002 	if (version_major == 2) {
1003 		if (version_minor >= 1) {
1004 			rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1005 
1006 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1007 					le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
1008 			fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
1009 			gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
1010 						   fw_data, fw_size);
1011 
1012 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1013 					le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
1014 			fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
1015 			gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
1016 						   fw_data, fw_size);
1017 		}
1018 		if (version_minor >= 2) {
1019 			rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1020 
1021 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1022 					le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1023 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1024 			gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
1025 						   fw_data, fw_size);
1026 
1027 			fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1028 					le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1029 			fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1030 			gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
1031 						   fw_data, fw_size);
1032 		}
1033 	}
1034 }
1035 
1036 static void
1037 gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
1038 {
1039 	const __le32 *fw_data;
1040 	uint32_t fw_size;
1041 	const struct sdma_firmware_header_v3_0 *sdma_hdr;
1042 
1043 	if (adev->sdma.instance[0].fw) {
1044 		sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
1045 			adev->sdma.instance[0].fw->data;
1046 		fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1047 				le32_to_cpu(sdma_hdr->ucode_offset_bytes));
1048 		fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
1049 
1050 		gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
1051 							   fw_data, fw_size);
1052 	}
1053 }
1054 
1055 static void
1056 gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
1057 {
1058 	const __le32 *fw_data;
1059 	unsigned fw_size;
1060 	const struct mes_firmware_header_v1_0 *mes_hdr;
1061 	int pipe, ucode_id, data_id;
1062 
1063 	for (pipe = 0; pipe < 2; pipe++) {
1064 		if (pipe == 0) {
1065 			ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
1066 			data_id  = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
1067 		} else {
1068 			ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
1069 			data_id  = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
1070 		}
1071 
1072 		mes_hdr = (const struct mes_firmware_header_v1_0 *)
1073 			adev->mes.fw[pipe]->data;
1074 
1075 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1076 				le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1077 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1078 
1079 		gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
1080 
1081 		fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1082 				le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1083 		fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1084 
1085 		gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
1086 	}
1087 }
1088 
1089 static int gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1090 {
1091 	uint32_t rlc_g_offset, rlc_g_size;
1092 	uint64_t gpu_addr;
1093 	uint32_t data;
1094 	int i, num_xcc;
1095 
1096 	/* RLC autoload sequence 2: copy ucode */
1097 	gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(adev);
1098 	gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(adev);
1099 	gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(adev);
1100 	gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(adev);
1101 
1102 	rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
1103 	rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
1104 	gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
1105 
1106 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1107 	for (i = 0; i < num_xcc; i++) {
1108 		WREG32_SOC15(GC, GET_INST(GC, i),
1109 			     regGFX_IMU_RLC_BOOTLOADER_ADDR_HI,
1110 			     upper_32_bits(gpu_addr));
1111 		WREG32_SOC15(GC, GET_INST(GC, i),
1112 			     regGFX_IMU_RLC_BOOTLOADER_ADDR_LO,
1113 			     lower_32_bits(gpu_addr));
1114 		WREG32_SOC15(GC, GET_INST(GC, i),
1115 			     regGFX_IMU_RLC_BOOTLOADER_SIZE,
1116 			     rlc_g_size);
1117 	}
1118 
1119 	if (adev->gfx.imu.funcs) {
1120 		/* RLC autoload sequence 3: load IMU fw */
1121 		if (adev->gfx.imu.funcs->load_microcode)
1122 			adev->gfx.imu.funcs->load_microcode(adev);
1123 	}
1124 
1125 	/* unhalt rlc to start autoload */
1126 	for (i = 0; i < num_xcc; i++) {
1127 		data = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE);
1128 		data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
1129 		data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1130 		WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE, data);
1131 		WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
1132 	}
1133 
1134 	return 0;
1135 }
1136 
1137 static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
1138 {
1139 	int i, j, k, r, ring_id = 0;
1140 	unsigned num_compute_rings;
1141 	int xcc_id, num_xcc;
1142 	struct amdgpu_device *adev = ip_block->adev;
1143 
1144 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1145 	case IP_VERSION(12, 1, 0):
1146 		adev->gfx.mec.num_mec = 1;
1147 		adev->gfx.mec.num_pipe_per_mec = 4;
1148 		adev->gfx.mec.num_queue_per_pipe = 8;
1149 		break;
1150 	default:
1151 		adev->gfx.mec.num_mec = 2;
1152 		adev->gfx.mec.num_pipe_per_mec = 2;
1153 		adev->gfx.mec.num_queue_per_pipe = 4;
1154 		break;
1155 	}
1156 
1157 	if (adev->gfx.num_compute_rings) {
1158 		/* recalculate compute rings to use based on hardware configuration */
1159 		num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
1160 				     adev->gfx.mec.num_queue_per_pipe) / 2;
1161 		adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
1162 						  num_compute_rings);
1163 	}
1164 
1165 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1166 
1167 	/* EOP Event */
1168 	r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
1169 			      GFX_12_1_0__SRCID__CP_EOP_INTERRUPT,
1170 			      &adev->gfx.eop_irq);
1171 	if (r)
1172 		return r;
1173 
1174 	/* Privileged reg */
1175 	r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
1176 			      GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT,
1177 			      &adev->gfx.priv_reg_irq);
1178 	if (r)
1179 		return r;
1180 
1181 	/* Privileged inst */
1182 	r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
1183 			      GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT,
1184 			      &adev->gfx.priv_inst_irq);
1185 	if (r)
1186 		return r;
1187 
1188 	/* RLC POISON Error */
1189 	r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC,
1190 				GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT,
1191 				&adev->gfx.rlc_poison_irq);
1192 	if (r)
1193 		return r;
1194 
1195 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1196 
1197 	r = gfx_v12_1_rlc_init(adev);
1198 	if (r) {
1199 		dev_err(adev->dev, "Failed to init rlc BOs!\n");
1200 		return r;
1201 	}
1202 
1203 	r = gfx_v12_1_mec_init(adev);
1204 	if (r) {
1205 		dev_err(adev->dev, "Failed to init MEC BOs!\n");
1206 		return r;
1207 	}
1208 
1209 	/* set up the compute queues - allocate horizontally across pipes */
1210 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1211 		ring_id = 0;
1212 		for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1213 			for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1214 				for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1215 					if (!amdgpu_gfx_is_mec_queue_enabled(adev,
1216 								xcc_id, i, k, j))
1217 						continue;
1218 
1219 					r = gfx_v12_1_compute_ring_init(adev, ring_id,
1220 								xcc_id, i, k, j);
1221 					if (r)
1222 						return r;
1223 
1224 					ring_id++;
1225 				}
1226 			}
1227 		}
1228 
1229 		if (!adev->enable_mes_kiq) {
1230 			r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, xcc_id);
1231 			if (r) {
1232 				dev_err(adev->dev, "Failed to init KIQ BOs!\n");
1233 				return r;
1234 			}
1235 
1236 			r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1237 			if (r)
1238 				return r;
1239 		}
1240 
1241 		r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_1_compute_mqd), xcc_id);
1242 		if (r)
1243 			return r;
1244 	}
1245 
1246 	/* allocate visible FB for rlc auto-loading fw */
1247 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1248 		r = gfx_v12_1_rlc_autoload_buffer_init(adev);
1249 		if (r)
1250 			return r;
1251 	} else if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1252 		r = gfx_v12_1_init_cp_compute_microcode_bo(adev);
1253 		if (r)
1254 			return r;
1255 	}
1256 
1257 	r = gfx_v12_1_gpu_early_init(adev);
1258 	if (r)
1259 		return r;
1260 
1261 	r = amdgpu_gfx_sysfs_init(adev);
1262 	if (r)
1263 		return r;
1264 
1265 	return 0;
1266 }
1267 
1268 static void gfx_v12_1_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1269 {
1270 	amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1271 			&adev->gfx.rlc.rlc_autoload_gpu_addr,
1272 			(void **)&adev->gfx.rlc.rlc_autoload_ptr);
1273 }
1274 
1275 static int gfx_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
1276 {
1277 	int i, num_xcc;
1278 	struct amdgpu_device *adev = ip_block->adev;
1279 
1280 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1281 	for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
1282 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1283 
1284 	for (i = 0; i < num_xcc; i++) {
1285 		amdgpu_gfx_mqd_sw_fini(adev, i);
1286 
1287 		if (!adev->enable_mes_kiq) {
1288 			amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
1289 			amdgpu_gfx_kiq_fini(adev, i);
1290 		}
1291 	}
1292 
1293 	gfx_v12_1_rlc_fini(adev);
1294 	gfx_v12_1_mec_fini(adev);
1295 
1296 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1297 		gfx_v12_1_rlc_autoload_buffer_fini(adev);
1298 
1299 	gfx_v12_1_free_microcode(adev);
1300 	amdgpu_gfx_sysfs_fini(adev);
1301 
1302 	return 0;
1303 }
1304 
1305 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1306 				       u32 sh_num, u32 instance, int xcc_id)
1307 {
1308 	u32 data;
1309 
1310 	if (instance == 0xffffffff)
1311 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1312 				     INSTANCE_BROADCAST_WRITES, 1);
1313 	else
1314 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1315 				     instance);
1316 
1317 	if (se_num == 0xffffffff)
1318 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1319 				     1);
1320 	else
1321 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1322 
1323 	if (sh_num == 0xffffffff)
1324 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1325 				     1);
1326 	else
1327 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1328 
1329 	WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
1330 }
1331 
1332 static u32 gfx_v12_1_get_sa_active_bitmap(struct amdgpu_device *adev,
1333 					  int xcc_id)
1334 {
1335 	u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1336 
1337 	gc_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SA_UNIT_DISABLE);
1338 	gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1339 					    CC_GC_SA_UNIT_DISABLE,
1340 					    SA_DISABLE);
1341 	gc_user_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SA_UNIT_DISABLE);
1342 	gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1343 						 GC_USER_SA_UNIT_DISABLE,
1344 						 SA_DISABLE);
1345 	sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1346 					    adev->gfx.config.max_shader_engines);
1347 
1348 	return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1349 }
1350 
1351 static u32 gfx_v12_1_get_rb_active_bitmap(struct amdgpu_device *adev,
1352 					  int xcc_id)
1353 {
1354 	u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1355 	u32 rb_mask;
1356 
1357 	gc_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
1358 					   regCC_RB_BACKEND_DISABLE);
1359 	gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1360 					    CC_RB_BACKEND_DISABLE,
1361 					    BACKEND_DISABLE);
1362 	gc_user_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
1363 						regGC_USER_RB_BACKEND_DISABLE);
1364 	gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1365 						 GC_USER_RB_BACKEND_DISABLE,
1366 						 BACKEND_DISABLE);
1367 	rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1368 					    adev->gfx.config.max_shader_engines);
1369 
1370 	return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1371 }
1372 
1373 static void gfx_v12_1_setup_rb(struct amdgpu_device *adev)
1374 {
1375 	u32 rb_bitmap_width_per_sa;
1376 	u32 max_sa;
1377 	u32 active_sa_bitmap;
1378 	u32 global_active_rb_bitmap;
1379 	u32 active_rb_bitmap = 0;
1380 	u32 i;
1381 	int xcc_id;
1382 
1383 	for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
1384 		/* query sa bitmap from SA_UNIT_DISABLE registers */
1385 		active_sa_bitmap = gfx_v12_1_get_sa_active_bitmap(adev, xcc_id);
1386 		/* query rb bitmap from RB_BACKEND_DISABLE registers */
1387 		global_active_rb_bitmap = gfx_v12_1_get_rb_active_bitmap(adev, xcc_id);
1388 
1389 		/* generate active rb bitmap according to active sa bitmap */
1390 		max_sa = adev->gfx.config.max_shader_engines *
1391 			 adev->gfx.config.max_sh_per_se;
1392 		rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1393 					 adev->gfx.config.max_sh_per_se;
1394 		for (i = 0; i < max_sa; i++) {
1395 			if (active_sa_bitmap & (1 << i))
1396 				active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1397 		}
1398 
1399 		active_rb_bitmap |= global_active_rb_bitmap;
1400 	}
1401 
1402 	adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1403 	adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1404 }
1405 
1406 static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev,
1407 					    int xcc_id)
1408 {
1409 	int i;
1410 	uint32_t sh_mem_bases;
1411 	uint32_t data;
1412 
1413 	/*
1414 	 * Configure apertures:
1415 	 * LDS:         0x20000000'00000000 - 0x20000001'00000000 (4GB)
1416 	 * Scratch:     0x10000000'00000000 - 0x11ffffff'ffffffff (128PB 57-bit)
1417 	 */
1418 	sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1419 				     (adev->gmc.private_aperture_start >> 58));
1420 	sh_mem_bases = REG_SET_FIELD(sh_mem_bases, SH_MEM_BASES, SHARED_BASE,
1421 				     (adev->gmc.shared_aperture_start >> 48));
1422 
1423 	mutex_lock(&adev->srbm_mutex);
1424 	for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1425 		soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
1426 		/* CP and shaders */
1427 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1428 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
1429 
1430 		/* Enable trap for each kfd vmid. */
1431 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL);
1432 		data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1433 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data);
1434 
1435 		/* Disable VGPR deallocation instruction for each KFD vmid. */
1436 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG);
1437 		data = REG_SET_FIELD(data, SQ_DEBUG, DISABLE_VGPR_DEALLOC, 1);
1438 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG, data);
1439 	}
1440 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1441 	mutex_unlock(&adev->srbm_mutex);
1442 }
1443 
1444 static void gfx_v12_1_tcp_harvest(struct amdgpu_device *adev)
1445 {
1446 	/* TODO: harvest feature to be added later. */
1447 }
1448 
1449 static void gfx_v12_1_get_tcc_info(struct amdgpu_device *adev)
1450 {
1451 }
1452 
1453 static void gfx_v12_1_xcc_constants_init(struct amdgpu_device *adev,
1454 					 int xcc_id)
1455 {
1456 	u32 tmp;
1457 	int i;
1458 
1459 	/* XXX SH_MEM regs */
1460 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1461 	mutex_lock(&adev->srbm_mutex);
1462 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1463 		soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
1464 		/* CP and shaders */
1465 		WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1466 			     regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1467 		if (i != 0) {
1468 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1469 				(adev->gmc.private_aperture_start >> 58));
1470 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1471 				(adev->gmc.shared_aperture_start >> 48));
1472 			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, tmp);
1473 		}
1474 	}
1475 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1476 
1477 	mutex_unlock(&adev->srbm_mutex);
1478 
1479 	gfx_v12_1_xcc_init_compute_vmid(adev, xcc_id);
1480 }
1481 
1482 static void gfx_v12_1_constants_init(struct amdgpu_device *adev)
1483 {
1484 	int i, num_xcc;
1485 
1486 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1487 
1488 	gfx_v12_1_setup_rb(adev);
1489 	gfx_v12_1_get_cu_info(adev, &adev->gfx.cu_info);
1490 	gfx_v12_1_get_tcc_info(adev);
1491 	adev->gfx.config.pa_sc_tile_steering_override = 0;
1492 
1493 	for (i = 0; i < num_xcc; i++)
1494 		gfx_v12_1_xcc_constants_init(adev, i);
1495 }
1496 
1497 static void gfx_v12_1_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1498 						    bool enable, int xcc_id)
1499 {
1500 	u32 tmp;
1501 
1502 	if (amdgpu_sriov_vf(adev))
1503 		return;
1504 
1505 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
1506 
1507 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1508 			    enable ? 1 : 0);
1509 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1510 			    enable ? 1 : 0);
1511 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1512 			    enable ? 1 : 0);
1513 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1514 			    enable ? 1 : 0);
1515 
1516 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
1517 }
1518 
1519 static int gfx_v12_1_xcc_init_csb(struct amdgpu_device *adev,
1520 				  int xcc_id)
1521 {
1522 	adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1523 
1524 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI,
1525 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
1526 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO,
1527 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1528 	WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1529 		     regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1530 
1531 	return 0;
1532 }
1533 
1534 static void gfx_v12_1_xcc_rlc_stop(struct amdgpu_device *adev,
1535 				   int xcc_id)
1536 {
1537 	u32 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL);
1538 
1539 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1540 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL, tmp);
1541 }
1542 
1543 static void gfx_v12_1_rlc_stop(struct amdgpu_device *adev)
1544 {
1545 	int i, num_xcc;
1546 
1547 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1548 	for (i = 0; i < num_xcc; i++)
1549 		gfx_v12_1_xcc_rlc_stop(adev, i);
1550 }
1551 
1552 static void gfx_v12_1_xcc_rlc_reset(struct amdgpu_device *adev,
1553 				    int xcc_id)
1554 {
1555 	WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id),
1556 			      GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1557 	udelay(50);
1558 	WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id),
1559 			      GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1560 	udelay(50);
1561 }
1562 
1563 static void gfx_v12_1_rlc_reset(struct amdgpu_device *adev)
1564 {
1565 	int i, num_xcc;
1566 
1567 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1568 	for (i = 0; i < num_xcc; i++)
1569 		gfx_v12_1_xcc_rlc_reset(adev, i);
1570 }
1571 
1572 static void gfx_v12_1_xcc_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1573 						 bool enable, int xcc_id)
1574 {
1575 	uint32_t rlc_pg_cntl;
1576 
1577 	rlc_pg_cntl = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL);
1578 
1579 	if (!enable) {
1580 		/* RLC_PG_CNTL[23] = 0 (default)
1581 		 * RLC will wait for handshake acks with SMU
1582 		 * GFXOFF will be enabled
1583 		 * RLC_PG_CNTL[23] = 1
1584 		 * RLC will not issue any message to SMU
1585 		 * hence no handshake between SMU & RLC
1586 		 * GFXOFF will be disabled
1587 		 */
1588 		rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1589 	} else
1590 		rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1591 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, rlc_pg_cntl);
1592 }
1593 
1594 static void gfx_v12_1_xcc_rlc_start(struct amdgpu_device *adev,
1595 				    int xcc_id)
1596 {
1597 	/* TODO: enable rlc & smu handshake until smu
1598 	 * and gfxoff feature works as expected */
1599 	if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1600 		gfx_v12_1_xcc_rlc_smu_handshake_cntl(adev, false, xcc_id);
1601 
1602 	WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, RLC_ENABLE_F32, 1);
1603 	udelay(50);
1604 }
1605 
1606 static void gfx_v12_1_rlc_start(struct amdgpu_device *adev)
1607 {
1608 	int i, num_xcc;
1609 
1610 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1611 	for (i = 0; i < num_xcc; i++) {
1612 		gfx_v12_1_xcc_rlc_start(adev, i);
1613 	}
1614 }
1615 
1616 static void gfx_v12_1_xcc_rlc_enable_srm(struct amdgpu_device *adev,
1617 					 int xcc_id)
1618 {
1619 	uint32_t tmp;
1620 
1621 	/* enable Save Restore Machine */
1622 	tmp = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL));
1623 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1624 	tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1625 	WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL), tmp);
1626 }
1627 
1628 static void gfx_v12_1_xcc_load_rlcg_microcode(struct amdgpu_device *adev,
1629 					      int xcc_id)
1630 {
1631 	const struct rlc_firmware_header_v2_0 *hdr;
1632 	const __le32 *fw_data;
1633 	unsigned i, fw_size;
1634 
1635 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1636 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1637 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1638 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1639 
1640 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
1641 		     RLCG_UCODE_LOADING_START_ADDRESS);
1642 
1643 	for (i = 0; i < fw_size; i++)
1644 		WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1645 			     regRLC_GPM_UCODE_DATA,
1646 			     le32_to_cpup(fw_data++));
1647 
1648 	WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1649 		     regRLC_GPM_UCODE_ADDR,
1650 		     adev->gfx.rlc_fw_version);
1651 }
1652 
1653 static void gfx_v12_1_xcc_load_rlc_iram_dram_microcode(struct amdgpu_device *adev,
1654 						       int xcc_id)
1655 {
1656 	const struct rlc_firmware_header_v2_2 *hdr;
1657 	const __le32 *fw_data;
1658 	unsigned i, fw_size;
1659 	u32 tmp;
1660 
1661 	hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1662 
1663 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1664 			le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1665 	fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1666 
1667 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_IRAM_ADDR, 0);
1668 
1669 	for (i = 0; i < fw_size; i++) {
1670 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1671 			msleep(1);
1672 		WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1673 			     regRLC_LX6_IRAM_DATA,
1674 			     le32_to_cpup(fw_data++));
1675 	}
1676 
1677 	WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1678 		     regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1679 
1680 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1681 			le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1682 	fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1683 
1684 	WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1685 		     regRLC_LX6_DRAM_ADDR, 0);
1686 	for (i = 0; i < fw_size; i++) {
1687 		if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1688 			msleep(1);
1689 		WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1690 			     regRLC_LX6_DRAM_DATA,
1691 			     le32_to_cpup(fw_data++));
1692 	}
1693 
1694 	WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1695 		     regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1696 
1697 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL);
1698 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1699 	tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1700 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL, tmp);
1701 }
1702 
1703 static int gfx_v12_1_xcc_rlc_load_microcode(struct amdgpu_device *adev,
1704 					    int xcc_id)
1705 {
1706 	const struct rlc_firmware_header_v2_0 *hdr;
1707 	uint16_t version_major;
1708 	uint16_t version_minor;
1709 
1710 	if (!adev->gfx.rlc_fw)
1711 		return -EINVAL;
1712 
1713 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1714 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
1715 
1716 	version_major = le16_to_cpu(hdr->header.header_version_major);
1717 	version_minor = le16_to_cpu(hdr->header.header_version_minor);
1718 
1719 	if (version_major == 2) {
1720 		gfx_v12_1_xcc_load_rlcg_microcode(adev, xcc_id);
1721 		if (amdgpu_dpm == 1) {
1722 			if (version_minor >= 2)
1723 				gfx_v12_1_xcc_load_rlc_iram_dram_microcode(adev, xcc_id);
1724 		}
1725 
1726 		return 0;
1727 	}
1728 
1729 	return -EINVAL;
1730 }
1731 
1732 static int gfx_v12_1_xcc_rlc_resume(struct amdgpu_device *adev,
1733 				    int xcc_id)
1734 {
1735 	int r;
1736 
1737 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1738 		gfx_v12_1_xcc_init_csb(adev, xcc_id);
1739 
1740 		if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1741 			gfx_v12_1_xcc_rlc_enable_srm(adev, xcc_id);
1742 	} else {
1743 		if (amdgpu_sriov_vf(adev)) {
1744 			gfx_v12_1_xcc_init_csb(adev, xcc_id);
1745 			return 0;
1746 		}
1747 
1748 		gfx_v12_1_xcc_rlc_stop(adev, xcc_id);
1749 
1750 		/* disable CG */
1751 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
1752 
1753 		/* disable PG */
1754 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, 0);
1755 
1756 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1757 			/* legacy rlc firmware loading */
1758 			r = gfx_v12_1_xcc_rlc_load_microcode(adev, xcc_id);
1759 			if (r)
1760 				return r;
1761 		}
1762 
1763 		gfx_v12_1_xcc_init_csb(adev, xcc_id);
1764 
1765 		gfx_v12_1_xcc_rlc_start(adev, xcc_id);
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 static int gfx_v12_1_rlc_resume(struct amdgpu_device *adev)
1772 {
1773 	int r, i, num_xcc;
1774 
1775 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1776 	for (i = 0; i < num_xcc; i++) {
1777 		r = gfx_v12_1_xcc_rlc_resume(adev, i);
1778 		if (r)
1779 			return r;
1780 	}
1781 
1782 	return 0;
1783 }
1784 
1785 static void gfx_v12_1_xcc_config_gfx_rs64(struct amdgpu_device *adev,
1786 					  int xcc_id)
1787 {
1788 	const struct gfx_firmware_header_v2_0 *mec_hdr;
1789 	uint32_t pipe_id, tmp;
1790 
1791 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)
1792 		adev->gfx.mec_fw->data;
1793 
1794 	/* config mec program start addr */
1795 	for (pipe_id = 0; pipe_id < 4; pipe_id++) {
1796 		soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id));
1797 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START,
1798 					mec_hdr->ucode_start_addr_lo >> 2 |
1799 					mec_hdr->ucode_start_addr_hi << 30);
1800 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI,
1801 					mec_hdr->ucode_start_addr_hi >> 2);
1802 	}
1803 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1804 
1805 	/* reset mec pipe */
1806 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL);
1807 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
1808 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
1809 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
1810 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
1811 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp);
1812 
1813 	/* clear mec pipe reset */
1814 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
1815 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
1816 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
1817 	tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
1818 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp);
1819 }
1820 
1821 static void gfx_v12_1_config_gfx_rs64(struct amdgpu_device *adev)
1822 {
1823 	int i, num_xcc;
1824 
1825 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1826 
1827 	for (i = 0; i < num_xcc; i++)
1828 		gfx_v12_1_xcc_config_gfx_rs64(adev, i);
1829 }
1830 
1831 static void gfx_v12_1_xcc_set_mec_ucode_start_addr(struct amdgpu_device *adev,
1832 						   int xcc_id)
1833 {
1834 	const struct gfx_firmware_header_v2_0 *cp_hdr;
1835 	unsigned pipe_id;
1836 
1837 	cp_hdr = (const struct gfx_firmware_header_v2_0 *)
1838 		adev->gfx.mec_fw->data;
1839 	mutex_lock(&adev->srbm_mutex);
1840 	for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
1841 		soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id));
1842 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START,
1843 			     cp_hdr->ucode_start_addr_lo >> 2 |
1844 			     cp_hdr->ucode_start_addr_hi << 30);
1845 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI,
1846 			     cp_hdr->ucode_start_addr_hi >> 2);
1847 	}
1848 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1849 	mutex_unlock(&adev->srbm_mutex);
1850 }
1851 
1852 static int gfx_v12_1_xcc_wait_for_rlc_autoload_complete(struct amdgpu_device *adev,
1853 							int xcc_id)
1854 {
1855 	uint32_t cp_status;
1856 	uint32_t bootload_status;
1857 	int i;
1858 
1859 	for (i = 0; i < adev->usec_timeout; i++) {
1860 		cp_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_STAT);
1861 		bootload_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
1862 					       regRLC_RLCS_BOOTLOAD_STATUS);
1863 
1864 		if ((cp_status == 0) &&
1865 		    (REG_GET_FIELD(bootload_status,
1866 			RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
1867 			break;
1868 		}
1869 		udelay(1);
1870 		if (amdgpu_emu_mode)
1871 			msleep(10);
1872 	}
1873 
1874 	if (i >= adev->usec_timeout) {
1875 		dev_err(adev->dev,
1876 			"rlc autoload: xcc%d gc ucode autoload timeout\n", xcc_id);
1877 		return -ETIMEDOUT;
1878 	}
1879 
1880 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1881 		gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
1882 	}
1883 
1884 	return 0;
1885 }
1886 
1887 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
1888 {
1889 	int xcc_id;
1890 
1891 	for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++)
1892 		gfx_v12_1_xcc_wait_for_rlc_autoload_complete(adev, xcc_id);
1893 
1894 	return 0;
1895 }
1896 
1897 static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev,
1898 					    bool enable, int xcc_id)
1899 {
1900 	u32 data;
1901 
1902 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL);
1903 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
1904 						 enable ? 0 : 1);
1905 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
1906 						 enable ? 0 : 1);
1907 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
1908 						 enable ? 0 : 1);
1909 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
1910 						 enable ? 0 : 1);
1911 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
1912 						 enable ? 0 : 1);
1913 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
1914 						 enable ? 1 : 0);
1915 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
1916 			                         enable ? 1 : 0);
1917 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
1918 						 enable ? 1 : 0);
1919 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
1920 						 enable ? 1 : 0);
1921 	data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
1922 						 enable ? 0 : 1);
1923 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, data);
1924 
1925 	adev->gfx.kiq[xcc_id].ring.sched.ready = enable;
1926 
1927 	udelay(50);
1928 }
1929 
1930 static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev)
1931 {
1932 	const struct gfx_firmware_header_v2_0 *mec_hdr;
1933 	const __le32 *fw_ucode, *fw_data;
1934 	u32 fw_ucode_size, fw_data_size;
1935 	u32 *fw_ucode_ptr, *fw_data_ptr;
1936 	int i, r, xcc_id;
1937 
1938 	if (!adev->gfx.mec_fw)
1939 		return -EINVAL;
1940 
1941 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
1942 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
1943 
1944 	fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
1945 				le32_to_cpu(mec_hdr->ucode_offset_bytes));
1946 	fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
1947 
1948 	fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1949 				le32_to_cpu(mec_hdr->data_offset_bytes));
1950 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
1951 
1952 	if (adev->gfx.mec.mec_fw_obj == NULL) {
1953 		r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
1954 					      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
1955 					      &adev->gfx.mec.mec_fw_obj,
1956 					      &adev->gfx.mec.mec_fw_gpu_addr,
1957 					      (void **)&fw_ucode_ptr);
1958 		if (r) {
1959 			dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
1960 			gfx_v12_1_mec_fini(adev);
1961 			return r;
1962 		}
1963 
1964 		memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
1965 
1966 		amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1967 		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1968 	}
1969 
1970 	if (adev->gfx.mec.mec_fw_data_obj == NULL) {
1971 		r = amdgpu_bo_create_reserved(adev,
1972 					      ALIGN(fw_data_size, 64 * 1024) *
1973 					      adev->gfx.mec.num_pipe_per_mec * NUM_XCC(adev->gfx.xcc_mask),
1974 					      64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
1975 					      &adev->gfx.mec.mec_fw_data_obj,
1976 					      &adev->gfx.mec.mec_fw_data_gpu_addr,
1977 					      (void **)&fw_data_ptr);
1978 		if (r) {
1979 			dev_err(adev->dev, "(%d) failed to create mec fw data bo\n", r);
1980 			gfx_v12_1_mec_fini(adev);
1981 			return r;
1982 		}
1983 
1984 		for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
1985 			for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
1986 				u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
1987 					     ALIGN(fw_data_size, 64 * 1024) / 4;
1988 				memcpy(fw_data_ptr + offset, fw_data, fw_data_size);
1989 			}
1990 		}
1991 
1992 		amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
1993 		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
1994 	}
1995 
1996 	return 0;
1997 }
1998 
1999 static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev,
2000 							int xcc_id)
2001 {
2002 	const struct gfx_firmware_header_v2_0 *mec_hdr;
2003 	u32 fw_data_size;
2004 	u32 tmp, i, usec_timeout = 50000; /* Wait for 50 ms */
2005 
2006 	if (!adev->gfx.mec_fw)
2007 		return -EINVAL;
2008 
2009 	mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
2010 	fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
2011 
2012 	gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
2013 
2014 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
2015 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2016 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2017 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2018 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
2019 
2020 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
2021 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2022 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2023 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
2024 
2025 	mutex_lock(&adev->srbm_mutex);
2026 	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2027 		soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
2028 
2029 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
2030 			     lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2031 					   (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
2032 					   ALIGN(fw_data_size, 64 * 1024)));
2033 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
2034 			     upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2035 					   (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
2036 					   ALIGN(fw_data_size, 64 * 1024)));
2037 
2038 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
2039 				lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2040 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
2041 				upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2042 	}
2043 	mutex_unlock(&adev->srbm_mutex);
2044 	soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2045 
2046 	/* Trigger an invalidation of the L1 instruction caches */
2047 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
2048 	tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2049 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
2050 
2051 	/* Wait for invalidation complete */
2052 	for (i = 0; i < usec_timeout; i++) {
2053 		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
2054 		if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2055 				       INVALIDATE_DCACHE_COMPLETE))
2056 			break;
2057 		udelay(1);
2058 	}
2059 
2060 	if (i >= usec_timeout) {
2061 		dev_err(adev->dev, "failed to invalidate data cache\n");
2062 		return -EINVAL;
2063 	}
2064 
2065 	/* Trigger an invalidation of the L1 instruction caches */
2066 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
2067 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2068 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
2069 
2070 	/* Wait for invalidation complete */
2071 	for (i = 0; i < usec_timeout; i++) {
2072 		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
2073 		if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2074 				       INVALIDATE_CACHE_COMPLETE))
2075 			break;
2076 		udelay(1);
2077 	}
2078 
2079 	if (i >= usec_timeout) {
2080 		dev_err(adev->dev, "failed to invalidate instruction cache\n");
2081 		return -EINVAL;
2082 	}
2083 
2084 	gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
2085 
2086 	return 0;
2087 }
2088 
2089 static void gfx_v12_1_xcc_kiq_setting(struct amdgpu_ring *ring,
2090 				      int xcc_id)
2091 {
2092 	uint32_t tmp;
2093 	struct amdgpu_device *adev = ring->adev;
2094 
2095 	/* tell RLC which is KIQ queue */
2096 	tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
2097 	tmp &= 0xffffff00;
2098 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2099 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
2100 	tmp |= 0x80;
2101 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
2102 }
2103 
2104 static void gfx_v12_1_xcc_cp_set_doorbell_range(struct amdgpu_device *adev,
2105 						int xcc_id)
2106 {
2107 	/* disable gfx engine doorbell range */
2108 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_LOWER, 0);
2109 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_UPPER, 0);
2110 
2111 	/* set compute engine doorbell range */
2112 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER,
2113 		     ((adev->doorbell_index.kiq +
2114 		       xcc_id * adev->doorbell_index.xcc_doorbell_range) *
2115 		      2) << 2);
2116 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER,
2117 		     ((adev->doorbell_index.userqueue_end +
2118 		       xcc_id * adev->doorbell_index.xcc_doorbell_range) *
2119 		      2) << 2);
2120 }
2121 
2122 static int gfx_v12_1_compute_mqd_init(struct amdgpu_device *adev, void *m,
2123 				      struct amdgpu_mqd_prop *prop)
2124 {
2125 	struct v12_1_compute_mqd *mqd = m;
2126 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2127 	uint32_t tmp;
2128 
2129 	mqd->header = 0xC0310800;
2130 	mqd->compute_pipelinestat_enable = 0x00000001;
2131 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2132 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2133 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2134 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2135 	mqd->compute_misc_reserved = 0x00000007;
2136 
2137 	eop_base_addr = prop->eop_gpu_addr >> 8;
2138 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2139 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2140 
2141 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2142 	tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
2143 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2144 			(order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
2145 
2146 	mqd->cp_hqd_eop_control = tmp;
2147 
2148 	/* enable doorbell? */
2149 	tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
2150 
2151 	if (prop->use_doorbell) {
2152 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2153 				    DOORBELL_OFFSET, prop->doorbell_index);
2154 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2155 				    DOORBELL_EN, 1);
2156 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2157 				    DOORBELL_SOURCE, 0);
2158 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2159 				    DOORBELL_HIT, 0);
2160 	} else {
2161 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2162 				    DOORBELL_EN, 0);
2163 	}
2164 
2165 	mqd->cp_hqd_pq_doorbell_control = tmp;
2166 
2167 	/* disable the queue if it's active */
2168 	mqd->cp_hqd_dequeue_request = 0;
2169 	mqd->cp_hqd_pq_rptr = 0;
2170 	mqd->cp_hqd_pq_wptr_lo = 0;
2171 	mqd->cp_hqd_pq_wptr_hi = 0;
2172 
2173 	/* set the pointer to the MQD */
2174 	mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
2175 	mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
2176 
2177 	/* set MQD vmid to 0 */
2178 	tmp = regCP_MQD_CONTROL_DEFAULT;
2179 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2180 	mqd->cp_mqd_control = tmp;
2181 
2182 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2183 	hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
2184 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2185 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2186 
2187 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2188 	tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
2189 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2190 			    (order_base_2(prop->queue_size / 4) - 1));
2191 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2192 			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
2193 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2194 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
2195 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2196 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2197 	mqd->cp_hqd_pq_control = tmp;
2198 
2199 	/* set the wb address whether it's enabled or not */
2200 	wb_gpu_addr = prop->rptr_gpu_addr;
2201 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2202 	mqd->cp_hqd_pq_rptr_report_addr_hi =
2203 		upper_32_bits(wb_gpu_addr) & 0xffff;
2204 
2205 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2206 	wb_gpu_addr = prop->wptr_gpu_addr;
2207 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2208 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2209 
2210 	tmp = 0;
2211 	/* enable the doorbell if requested */
2212 	if (prop->use_doorbell) {
2213 		tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
2214 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2215 				DOORBELL_OFFSET, prop->doorbell_index);
2216 
2217 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2218 				    DOORBELL_EN, 1);
2219 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2220 				    DOORBELL_SOURCE, 0);
2221 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2222 				    DOORBELL_HIT, 0);
2223 	}
2224 
2225 	mqd->cp_hqd_pq_doorbell_control = tmp;
2226 
2227 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2228 	mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
2229 
2230 	/* set the vmid for the queue */
2231 	mqd->cp_hqd_vmid = 0;
2232 
2233 	tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
2234 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x63);
2235 	mqd->cp_hqd_persistent_state = tmp;
2236 
2237 	/* set MIN_IB_AVAIL_SIZE */
2238 	tmp = regCP_HQD_IB_CONTROL_DEFAULT;
2239 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 1);
2240 	mqd->cp_hqd_ib_control = tmp;
2241 
2242 	/* set static priority for a compute queue/ring */
2243 	mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
2244 	mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
2245 
2246 	mqd->cp_mqd_stride_size = prop->mqd_stride_size ? prop->mqd_stride_size :
2247 		AMDGPU_MQD_SIZE_ALIGN(adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size);
2248 
2249 	mqd->cp_hqd_active = prop->hqd_active;
2250 
2251 	return 0;
2252 }
2253 
2254 static int gfx_v12_1_xcc_kiq_init_register(struct amdgpu_ring *ring,
2255 					   int xcc_id)
2256 {
2257 	struct amdgpu_device *adev = ring->adev;
2258 	struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
2259 	int j;
2260 
2261 	/* inactivate the queue */
2262 	if (amdgpu_sriov_vf(adev))
2263 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
2264 
2265 	/* disable wptr polling */
2266 	WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
2267 
2268 	/* write the EOP addr */
2269 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
2270 	       mqd->cp_hqd_eop_base_addr_lo);
2271 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
2272 	       mqd->cp_hqd_eop_base_addr_hi);
2273 
2274 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2275 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
2276 	       mqd->cp_hqd_eop_control);
2277 
2278 	/* enable doorbell? */
2279 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
2280 	       mqd->cp_hqd_pq_doorbell_control);
2281 
2282 	/* disable the queue if it's active */
2283 	if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
2284 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
2285 		for (j = 0; j < adev->usec_timeout; j++) {
2286 			if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
2287 				break;
2288 			udelay(1);
2289 		}
2290 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
2291 		       mqd->cp_hqd_dequeue_request);
2292 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
2293 		       mqd->cp_hqd_pq_rptr);
2294 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
2295 		       mqd->cp_hqd_pq_wptr_lo);
2296 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
2297 		       mqd->cp_hqd_pq_wptr_hi);
2298 	}
2299 
2300 	/* set the pointer to the MQD */
2301 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
2302 	       mqd->cp_mqd_base_addr_lo);
2303 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
2304 	       mqd->cp_mqd_base_addr_hi);
2305 
2306 	/* set MQD vmid to 0 */
2307 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
2308 	       mqd->cp_mqd_control);
2309 
2310 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2311 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
2312 	       mqd->cp_hqd_pq_base_lo);
2313 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
2314 	       mqd->cp_hqd_pq_base_hi);
2315 
2316 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2317 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
2318 	       mqd->cp_hqd_pq_control);
2319 
2320 	/* set the wb address whether it's enabled or not */
2321 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
2322 		mqd->cp_hqd_pq_rptr_report_addr_lo);
2323 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2324 		mqd->cp_hqd_pq_rptr_report_addr_hi);
2325 
2326 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2327 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
2328 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
2329 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
2330 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
2331 
2332 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
2333 	       mqd->cp_hqd_pq_doorbell_control);
2334 
2335 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2336 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
2337 	       mqd->cp_hqd_pq_wptr_lo);
2338 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
2339 	       mqd->cp_hqd_pq_wptr_hi);
2340 
2341 	/* set the vmid for the queue */
2342 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
2343 
2344 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
2345 	       mqd->cp_hqd_persistent_state);
2346 
2347 	/* activate the queue */
2348 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
2349 	       mqd->cp_hqd_active);
2350 
2351 	if (ring->use_doorbell)
2352 		WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
2353 
2354 	return 0;
2355 }
2356 
2357 static int gfx_v12_1_xcc_kiq_init_queue(struct amdgpu_ring *ring,
2358 					int xcc_id)
2359 {
2360 	struct amdgpu_device *adev = ring->adev;
2361 	struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
2362 
2363 	gfx_v12_1_xcc_kiq_setting(ring, xcc_id);
2364 
2365 	if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
2366 		/* reset MQD to a clean status */
2367 		if (adev->gfx.kiq[xcc_id].mqd_backup)
2368 			memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(*mqd));
2369 
2370 		/* reset ring buffer */
2371 		ring->wptr = 0;
2372 		amdgpu_ring_clear_ring(ring);
2373 
2374 		mutex_lock(&adev->srbm_mutex);
2375 		soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
2376 		gfx_v12_1_xcc_kiq_init_register(ring, xcc_id);
2377 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2378 		mutex_unlock(&adev->srbm_mutex);
2379 	} else {
2380 		memset((void *)mqd, 0, sizeof(*mqd));
2381 		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
2382 			amdgpu_ring_clear_ring(ring);
2383 		mutex_lock(&adev->srbm_mutex);
2384 		soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
2385 		amdgpu_ring_init_mqd(ring);
2386 		gfx_v12_1_xcc_kiq_init_register(ring, xcc_id);
2387 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2388 		mutex_unlock(&adev->srbm_mutex);
2389 
2390 		if (adev->gfx.kiq[xcc_id].mqd_backup)
2391 			memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(*mqd));
2392 	}
2393 
2394 	return 0;
2395 }
2396 
2397 static int gfx_v12_1_xcc_kcq_init_queue(struct amdgpu_ring *ring,
2398 					int xcc_id)
2399 {
2400 	struct amdgpu_device *adev = ring->adev;
2401 	struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
2402 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
2403 
2404 	if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
2405 		memset((void *)mqd, 0, sizeof(*mqd));
2406 		mutex_lock(&adev->srbm_mutex);
2407 		soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
2408 		amdgpu_ring_init_mqd(ring);
2409 		soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2410 		mutex_unlock(&adev->srbm_mutex);
2411 
2412 		if (adev->gfx.mec.mqd_backup[mqd_idx])
2413 			memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2414 	} else {
2415 		/* restore MQD to a clean status */
2416 		if (adev->gfx.mec.mqd_backup[mqd_idx])
2417 			memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
2418 		/* reset ring buffer */
2419 		ring->wptr = 0;
2420 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
2421 		amdgpu_ring_clear_ring(ring);
2422 	}
2423 
2424 	return 0;
2425 }
2426 
2427 static int gfx_v12_1_xcc_kiq_resume(struct amdgpu_device *adev,
2428 				    int xcc_id)
2429 {
2430 	struct amdgpu_ring *ring;
2431 	int r;
2432 
2433 	ring = &adev->gfx.kiq[xcc_id].ring;
2434 
2435 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
2436 	if (unlikely(r != 0))
2437 		return r;
2438 
2439 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2440 	if (unlikely(r != 0)) {
2441 		amdgpu_bo_unreserve(ring->mqd_obj);
2442 		return r;
2443 	}
2444 
2445 	gfx_v12_1_xcc_kiq_init_queue(ring, xcc_id);
2446 	amdgpu_bo_kunmap(ring->mqd_obj);
2447 	ring->mqd_ptr = NULL;
2448 	amdgpu_bo_unreserve(ring->mqd_obj);
2449 	ring->sched.ready = true;
2450 	return 0;
2451 }
2452 
2453 static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev,
2454 				    int xcc_id)
2455 {
2456 	struct amdgpu_ring *ring = NULL;
2457 	int r = 0, i;
2458 
2459 	if (!amdgpu_async_gfx_ring)
2460 		gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
2461 
2462 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2463 		ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
2464 
2465 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
2466 		if (unlikely(r != 0))
2467 			goto done;
2468 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2469 		if (!r) {
2470 			r = gfx_v12_1_xcc_kcq_init_queue(ring, xcc_id);
2471 			amdgpu_bo_kunmap(ring->mqd_obj);
2472 			ring->mqd_ptr = NULL;
2473 		}
2474 		amdgpu_bo_unreserve(ring->mqd_obj);
2475 		if (r)
2476 			goto done;
2477 	}
2478 
2479 	r = amdgpu_gfx_enable_kcq(adev, xcc_id);
2480 done:
2481 	return r;
2482 }
2483 
2484 static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask)
2485 {
2486 	int r, i, xcc_id;
2487 	struct amdgpu_ring *ring;
2488 
2489 	for_each_inst(xcc_id, xcc_mask) {
2490 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2491 			/* legacy firmware loading */
2492 			r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id);
2493 			if (r)
2494 				return r;
2495 		}
2496 
2497 		/* GFX CGCG and LS is set by default */
2498 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
2499 			gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
2500 
2501 		gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
2502 
2503 		gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
2504 
2505 		if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
2506 			r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
2507 		else
2508 			r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
2509 		if (r)
2510 			return r;
2511 
2512 		r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
2513 		if (r)
2514 			return r;
2515 
2516 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2517 			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
2518 			r = amdgpu_ring_test_helper(ring);
2519 			if (r)
2520 				return r;
2521 		}
2522 	}
2523 
2524 	return 0;
2525 }
2526 
2527 static int gfx_v12_1_cp_resume(struct amdgpu_device *adev)
2528 {
2529 	int num_xcc, num_xcp, num_xcc_per_xcp;
2530 	uint16_t xcc_mask;
2531 	int r = 0;
2532 
2533 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2534 	if (amdgpu_sriov_vf(adev)) {
2535 		enum amdgpu_gfx_partition mode;
2536 
2537 		mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
2538 						       AMDGPU_XCP_FL_NONE);
2539 		if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
2540 			return -EINVAL;
2541 		if (adev->gfx.funcs &&
2542 		    adev->gfx.funcs->get_xccs_per_xcp) {
2543 			num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
2544 			adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
2545 			num_xcp = num_xcc / num_xcc_per_xcp;
2546 		} else {
2547 			return -EINVAL;
2548 		}
2549 		r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
2550 
2551 	} else {
2552 		if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
2553 						    AMDGPU_XCP_FL_NONE) ==
2554 		    AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
2555 			r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
2556 							     amdgpu_user_partt_mode);
2557 	}
2558 
2559 	if (r)
2560 		return r;
2561 
2562 	xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
2563 
2564 	return gfx_v12_1_xcc_cp_resume(adev, xcc_mask);
2565 }
2566 
2567 static int gfx_v12_1_gfxhub_enable(struct amdgpu_device *adev)
2568 {
2569 	int r, i;
2570 	bool value;
2571 
2572 	r = adev->gfxhub.funcs->gart_enable(adev);
2573 	if (r)
2574 		return r;
2575 
2576 	value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
2577 		false : true;
2578 
2579 	adev->gfxhub.funcs->set_fault_enable_default(adev, value);
2580 	/* TODO investigate why TLB flush is needed,
2581 	 * are we missing a flush somewhere else? */
2582 	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
2583 		if (AMDGPU_IS_GFXHUB(i))
2584 			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(i), 0);
2585 	}
2586 
2587 	return 0;
2588 }
2589 
2590 static int get_gb_addr_config(struct amdgpu_device *adev)
2591 {
2592 	u32 gb_addr_config;
2593 
2594 	gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG_READ);
2595 	if (gb_addr_config == 0)
2596 		return -EINVAL;
2597 
2598 	adev->gfx.config.gb_addr_config_fields.num_pkrs =
2599 		1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG_READ, NUM_PKRS);
2600 
2601 	adev->gfx.config.gb_addr_config = gb_addr_config;
2602 
2603 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2604 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2605 				      GB_ADDR_CONFIG_READ, NUM_PIPES);
2606 
2607 	adev->gfx.config.max_tile_pipes =
2608 		adev->gfx.config.gb_addr_config_fields.num_pipes;
2609 
2610 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2611 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2612 				      GB_ADDR_CONFIG_READ, MAX_COMPRESSED_FRAGS);
2613 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2614 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2615 				      GB_ADDR_CONFIG_READ, NUM_RB_PER_SE);
2616 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2617 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2618 				      GB_ADDR_CONFIG_READ, NUM_SHADER_ENGINES);
2619 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2620 			REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2621 				      GB_ADDR_CONFIG_READ, PIPE_INTERLEAVE_SIZE));
2622 
2623 	return 0;
2624 }
2625 
2626 static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev,
2627 					   int xcc_id)
2628 {
2629 	uint32_t data;
2630 
2631 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
2632 	data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
2633 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
2634 
2635 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG);
2636 	data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
2637 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data);
2638 }
2639 
2640 static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev,
2641 					 int xcc_id)
2642 {
2643 	uint32_t data;
2644 
2645 	/* Set the TCP UTCL0 register to enable atomics */
2646 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1);
2647 	data = REG_SET_FIELD(data, TCP_UTCL0_CNTL1, ATOMIC_REQUESTER_EN, 0x1);
2648 
2649 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1, data);
2650 }
2651 
2652 static void gfx_v12_1_xcc_disable_burst(struct amdgpu_device *adev,
2653 					int xcc_id)
2654 {
2655 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGL1_DRAM_BURST_CTRL, 0xf);
2656 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGLARB_DRAM_BURST_CTRL, 0xf);
2657 }
2658 
2659 static void gfx_v12_1_xcc_disable_early_write_ack(struct amdgpu_device *adev,
2660 					int xcc_id)
2661 {
2662 	uint32_t data;
2663 
2664 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3);
2665 	data = REG_SET_FIELD(data, TCP_CNTL3, DISABLE_EARLY_WRITE_ACK, 0x1);
2666 
2667 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3, data);
2668 }
2669 
2670 static void gfx_v12_1_xcc_disable_tcp_spill_cache(struct amdgpu_device *adev,
2671 					int xcc_id)
2672 {
2673 	uint32_t data;
2674 
2675 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL);
2676 	data = REG_SET_FIELD(data, TCP_CNTL, TCP_SPILL_CACHE_DISABLE, 0x1);
2677 
2678 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL, data);
2679 }
2680 
2681 static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev)
2682 {
2683 	int i;
2684 
2685 	for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) {
2686 		gfx_v12_1_xcc_disable_burst(adev, i);
2687 		gfx_v12_1_xcc_enable_atomics(adev, i);
2688 		gfx_v12_1_xcc_disable_early_write_ack(adev, i);
2689 		gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i);
2690 	}
2691 }
2692 
2693 static int gfx_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
2694 {
2695 	int r, i, num_xcc;
2696 	struct amdgpu_device *adev = ip_block->adev;
2697 
2698 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2699 		/* rlc autoload firmware */
2700 		r = gfx_v12_1_rlc_backdoor_autoload_enable(adev);
2701 		if (r)
2702 			return r;
2703 	} else {
2704 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2705 			num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2706 
2707 			if (adev->gfx.imu.funcs) {
2708 				if (adev->gfx.imu.funcs->load_microcode)
2709 					adev->gfx.imu.funcs->load_microcode(adev);
2710 			}
2711 
2712 			for (i = 0; i < num_xcc; i++) {
2713 				/* disable gpa mode in backdoor loading */
2714 				gfx_v12_1_xcc_disable_gpa_mode(adev, i);
2715 			}
2716 		}
2717 	}
2718 
2719 	if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
2720 	    (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
2721 		r = gfx_v12_1_wait_for_rlc_autoload_complete(adev);
2722 		if (r) {
2723 			dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
2724 			return r;
2725 		}
2726 	}
2727 
2728 	adev->gfx.is_poweron = true;
2729 
2730 	if (get_gb_addr_config(adev))
2731 		DRM_WARN("Invalid gb_addr_config !\n");
2732 
2733 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
2734 		gfx_v12_1_config_gfx_rs64(adev);
2735 
2736 	r = gfx_v12_1_gfxhub_enable(adev);
2737 	if (r)
2738 		return r;
2739 
2740 	gfx_v12_1_init_golden_registers(adev);
2741 
2742 	gfx_v12_1_constants_init(adev);
2743 
2744 	if (adev->nbio.funcs->gc_doorbell_init)
2745 		adev->nbio.funcs->gc_doorbell_init(adev);
2746 
2747 	r = gfx_v12_1_rlc_resume(adev);
2748 	if (r)
2749 		return r;
2750 
2751 	/*
2752 	 * init golden registers and rlc resume may override some registers,
2753 	 * reconfig them here
2754 	 */
2755 	gfx_v12_1_tcp_harvest(adev);
2756 
2757 	r = gfx_v12_1_cp_resume(adev);
2758 	if (r)
2759 		return r;
2760 
2761 	return r;
2762 }
2763 
2764 static void gfx_v12_1_xcc_fini(struct amdgpu_device *adev,
2765 			      int xcc_id)
2766 {
2767 	uint32_t tmp;
2768 
2769 	if (!adev->no_hw_access) {
2770 		if (amdgpu_gfx_disable_kcq(adev, xcc_id))
2771 			DRM_ERROR("KCQ disable failed\n");
2772 
2773 		amdgpu_mes_kiq_hw_fini(adev, xcc_id);
2774 	}
2775 
2776 	if (amdgpu_sriov_vf(adev)) {
2777 		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
2778 		tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
2779 		tmp &= 0xffffff00;
2780 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
2781 	}
2782 	gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
2783 	gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
2784 }
2785 
2786 static int gfx_v12_1_set_userq_eop_interrupts(struct amdgpu_device *adev,
2787 					      bool enable)
2788 {
2789 	unsigned int irq_type;
2790 	int m, p, r;
2791 
2792 	if (adev->gfx.disable_kq) {
2793 		for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
2794 			for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
2795 				irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2796 					+ (m * adev->gfx.mec.num_pipe_per_mec)
2797 					+ p;
2798 				if (enable)
2799 					r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
2800 							   irq_type);
2801 				else
2802 					r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
2803 							   irq_type);
2804 				if (r)
2805 					return r;
2806 			}
2807 		}
2808 	}
2809 
2810 	return 0;
2811 }
2812 
2813 static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
2814 {
2815 	struct amdgpu_device *adev = ip_block->adev;
2816 	int i, num_xcc;
2817 
2818 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
2819 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
2820 	gfx_v12_1_set_userq_eop_interrupts(adev, false);
2821 
2822 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2823 	for (i = 0; i < num_xcc; i++) {
2824 		gfx_v12_1_xcc_fini(adev, i);
2825 	}
2826 
2827 	adev->gfxhub.funcs->gart_disable(adev);
2828 
2829 	adev->gfx.is_poweron = false;
2830 
2831 	return 0;
2832 }
2833 
2834 static int gfx_v12_1_suspend(struct amdgpu_ip_block *ip_block)
2835 {
2836 	return gfx_v12_1_hw_fini(ip_block);
2837 }
2838 
2839 static int gfx_v12_1_resume(struct amdgpu_ip_block *ip_block)
2840 {
2841 	return gfx_v12_1_hw_init(ip_block);
2842 }
2843 
2844 static bool gfx_v12_1_is_idle(struct amdgpu_ip_block *ip_block)
2845 {
2846 	struct amdgpu_device *adev = ip_block->adev;
2847 	int i, num_xcc;
2848 
2849 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2850 	for (i = 0; i < num_xcc; i++) {
2851 		if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i),
2852 				regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
2853 			return false;
2854 	}
2855 	return true;
2856 }
2857 
2858 static int gfx_v12_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
2859 {
2860 	unsigned i;
2861 	struct amdgpu_device *adev = ip_block->adev;
2862 
2863 	for (i = 0; i < adev->usec_timeout; i++) {
2864 		if (gfx_v12_1_is_idle(ip_block))
2865 			return 0;
2866 		udelay(1);
2867 	}
2868 	return -ETIMEDOUT;
2869 }
2870 
2871 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev)
2872 {
2873 	uint64_t clock = 0;
2874 
2875 	if (adev->smuio.funcs &&
2876 	    adev->smuio.funcs->get_gpu_clock_counter)
2877 		clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
2878 	else
2879 		dev_warn(adev->dev, "query gpu clock counter is not supported\n");
2880 
2881 	return clock;
2882 }
2883 
2884 static int gfx_v12_1_early_init(struct amdgpu_ip_block *ip_block)
2885 {
2886 	struct amdgpu_device *adev = ip_block->adev;
2887 
2888 
2889 	switch (amdgpu_user_queue) {
2890 	case -1:
2891 	default:
2892 		adev->gfx.disable_kq = true;
2893 		adev->gfx.disable_uq = true;
2894 		break;
2895 	case 0:
2896 		adev->gfx.disable_kq = false;
2897 		adev->gfx.disable_uq = true;
2898 		break;
2899 	}
2900 
2901 	adev->gfx.funcs = &gfx_v12_1_gfx_funcs;
2902 
2903 	if (adev->gfx.disable_kq)
2904 		adev->gfx.num_compute_rings = 0;
2905 	else
2906 		adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
2907 						  AMDGPU_MAX_COMPUTE_RINGS);
2908 
2909 	gfx_v12_1_set_kiq_pm4_funcs(adev);
2910 	gfx_v12_1_set_ring_funcs(adev);
2911 	gfx_v12_1_set_irq_funcs(adev);
2912 	gfx_v12_1_set_rlc_funcs(adev);
2913 	gfx_v12_1_set_mqd_funcs(adev);
2914 	gfx_v12_1_set_imu_funcs(adev);
2915 
2916 	gfx_v12_1_init_rlcg_reg_access_ctrl(adev);
2917 
2918 	return gfx_v12_1_init_microcode(adev);
2919 }
2920 
2921 static int gfx_v12_1_late_init(struct amdgpu_ip_block *ip_block)
2922 {
2923 	struct amdgpu_device *adev = ip_block->adev;
2924 	int r;
2925 
2926 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
2927 	if (r)
2928 		return r;
2929 
2930 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
2931 	if (r)
2932 		return r;
2933 
2934 	r = gfx_v12_1_set_userq_eop_interrupts(adev, true);
2935 	if (r)
2936 		return r;
2937 
2938 	return 0;
2939 }
2940 
2941 static bool gfx_v12_1_is_rlc_enabled(struct amdgpu_device *adev)
2942 {
2943 	uint32_t rlc_cntl;
2944 
2945 	/* if RLC is not enabled, do nothing */
2946 	rlc_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
2947 	return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
2948 }
2949 
2950 static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev,
2951 					int xcc_id)
2952 {
2953 	uint32_t data;
2954 	unsigned i;
2955 
2956 	data = RLC_SAFE_MODE__CMD_MASK;
2957 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
2958 
2959 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
2960 
2961 	/* wait for RLC_SAFE_MODE */
2962 	for (i = 0; i < adev->usec_timeout; i++) {
2963 		if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id),
2964 						regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
2965 			break;
2966 		udelay(1);
2967 	}
2968 }
2969 
2970 static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev,
2971 					  int xcc_id)
2972 {
2973 	WREG32_SOC15(GC, GET_INST(GC, xcc_id),
2974 		     regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
2975 }
2976 
2977 static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev,
2978 				      bool enable)
2979 {
2980 	int i, num_xcc;
2981 
2982 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2983 	for (i = 0; i < num_xcc; i++)
2984 		gfx_v12_1_xcc_update_perf_clk(adev, enable, i);
2985 }
2986 
2987 static void gfx_v12_1_update_spm_vmid(struct amdgpu_device *adev,
2988 				      int xcc_id,
2989 				      struct amdgpu_ring *ring,
2990 				      unsigned vmid)
2991 {
2992 	u32 reg, data;
2993 
2994 	reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
2995 	if (amdgpu_sriov_is_pp_one_vf(adev))
2996 		data = RREG32_NO_KIQ(reg);
2997 	else
2998 		data = RREG32(reg);
2999 
3000 	data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
3001 	data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
3002 
3003 	if (amdgpu_sriov_is_pp_one_vf(adev))
3004 		WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data);
3005 	else
3006 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data);
3007 
3008 	if (ring
3009 	    && amdgpu_sriov_is_pp_one_vf(adev)
3010 	    && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
3011 		|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
3012 		uint32_t reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
3013 		amdgpu_ring_emit_wreg(ring, reg, data);
3014 	}
3015 }
3016 
3017 static const struct amdgpu_rlc_funcs gfx_v12_1_rlc_funcs = {
3018 	.is_rlc_enabled = gfx_v12_1_is_rlc_enabled,
3019 	.set_safe_mode = gfx_v12_1_xcc_set_safe_mode,
3020 	.unset_safe_mode = gfx_v12_1_xcc_unset_safe_mode,
3021 	.init = gfx_v12_1_rlc_init,
3022 	.get_csb_size = gfx_v12_1_get_csb_size,
3023 	.get_csb_buffer = gfx_v12_1_get_csb_buffer,
3024 	.resume = gfx_v12_1_rlc_resume,
3025 	.stop = gfx_v12_1_rlc_stop,
3026 	.reset = gfx_v12_1_rlc_reset,
3027 	.start = gfx_v12_1_rlc_start,
3028 	.update_spm_vmid = gfx_v12_1_update_spm_vmid,
3029 };
3030 
3031 #if 0
3032 static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
3033 {
3034 	/* TODO */
3035 }
3036 
3037 static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
3038 {
3039 	/* TODO */
3040 }
3041 #endif
3042 
3043 static int gfx_v12_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
3044 					   enum amd_powergating_state state)
3045 {
3046 	struct amdgpu_device *adev = ip_block->adev;
3047 	bool enable = (state == AMD_PG_STATE_GATE);
3048 
3049 	if (amdgpu_sriov_vf(adev))
3050 		return 0;
3051 
3052 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3053 	case IP_VERSION(12, 1, 0):
3054 		amdgpu_gfx_off_ctrl(adev, enable);
3055 		break;
3056 	default:
3057 		break;
3058 	}
3059 
3060 	return 0;
3061 }
3062 
3063 static void gfx_v12_1_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3064 							   bool enable, int xcc_id)
3065 {
3066 	uint32_t def, data;
3067 
3068 	if (!(adev->cg_flags &
3069 	      (AMD_CG_SUPPORT_GFX_CGCG |
3070 	      AMD_CG_SUPPORT_GFX_CGLS |
3071 	      AMD_CG_SUPPORT_GFX_3D_CGCG |
3072 	      AMD_CG_SUPPORT_GFX_3D_CGLS)))
3073 		return;
3074 
3075 	if (enable) {
3076 		def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
3077 					  regRLC_CGTT_MGCG_OVERRIDE);
3078 
3079 		/* unset CGCG override */
3080 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3081 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3082 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3083 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3084 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
3085 		    adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3086 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3087 
3088 		/* update CGCG override bits */
3089 		if (def != data)
3090 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
3091 				     regRLC_CGTT_MGCG_OVERRIDE, data);
3092 
3093 		/* enable cgcg FSM(0x0000363F) */
3094 		def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
3095 
3096 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
3097 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
3098 			data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3099 				 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3100 		}
3101 
3102 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
3103 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
3104 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3105 				 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3106 		}
3107 
3108 		if (def != data)
3109 			WREG32_SOC15(GC, GET_INST(GC, xcc_id),
3110 				     regRLC_CGCG_CGLS_CTRL, data);
3111 
3112 		/* set IDLE_POLL_COUNT(0x00900100) */
3113 		def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
3114 
3115 		data &= ~CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK;
3116 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3117 		data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3118 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3119 
3120 		if (def != data)
3121 			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
3122 
3123 		data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL);
3124 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
3125 		data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
3126 		data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
3127 		data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
3128 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL, data);
3129 	} else {
3130 		/* Program RLC_CGCG_CGLS_CTRL */
3131 		def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
3132 
3133 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3134 			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3135 
3136 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3137 			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3138 
3139 		if (def != data)
3140 			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
3141 	}
3142 }
3143 
3144 static void gfx_v12_1_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3145 							   bool enable, int xcc_id)
3146 {
3147 	uint32_t data, def;
3148 	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
3149 		return;
3150 
3151 	/* It is disabled by HW by default */
3152 	if (enable) {
3153 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
3154 			/* 1 - RLC_CGTT_MGCG_OVERRIDE */
3155 			def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3156 
3157 			data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3158 				  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3159 				  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
3160 
3161 			if (def != data)
3162 				WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3163 		}
3164 	} else {
3165 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
3166 			def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3167 
3168 			data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3169 				 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3170 				 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
3171 
3172 			if (def != data)
3173 				WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3174 		}
3175 	}
3176 }
3177 
3178 static void gfx_v12_1_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
3179 					       bool enable, int xcc_id)
3180 {
3181 	uint32_t def, data;
3182 
3183 	if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
3184 		return;
3185 
3186 	def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3187 
3188 	if (enable)
3189 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
3190 				  RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
3191 	else
3192 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
3193 				RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
3194 
3195 	if (def != data)
3196 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3197 }
3198 
3199 static void gfx_v12_1_xcc_update_sram_fgcg(struct amdgpu_device *adev,
3200 					   bool enable, int xcc_id)
3201 {
3202 	uint32_t def, data;
3203 
3204 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
3205 		return;
3206 
3207 	def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3208 
3209 	if (enable)
3210 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
3211 	else
3212 		data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
3213 
3214 	if (def != data)
3215 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3216 }
3217 
3218 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev,
3219 					  bool enable, int xcc_id)
3220 {
3221 	uint32_t def, data;
3222 
3223 	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
3224 		return;
3225 
3226 	def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3227 
3228 	if (enable)
3229 		data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3230 	else
3231 		data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3232 
3233 	if (def != data)
3234 		WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3235 }
3236 
3237 static int gfx_v12_1_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
3238 					     bool enable, int xcc_id)
3239 {
3240 	amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
3241 
3242 	gfx_v12_1_xcc_update_coarse_grain_clock_gating(adev, enable, xcc_id);
3243 
3244 	gfx_v12_1_xcc_update_medium_grain_clock_gating(adev, enable, xcc_id);
3245 
3246 	gfx_v12_1_xcc_update_repeater_fgcg(adev, enable, xcc_id);
3247 
3248 	gfx_v12_1_xcc_update_sram_fgcg(adev, enable, xcc_id);
3249 
3250 	gfx_v12_1_xcc_update_perf_clk(adev, enable, xcc_id);
3251 
3252 	if (adev->cg_flags &
3253 	    (AMD_CG_SUPPORT_GFX_MGCG |
3254 	     AMD_CG_SUPPORT_GFX_CGLS |
3255 	     AMD_CG_SUPPORT_GFX_CGCG |
3256 	     AMD_CG_SUPPORT_GFX_3D_CGCG |
3257 	     AMD_CG_SUPPORT_GFX_3D_CGLS))
3258 		gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, enable, xcc_id);
3259 
3260 	amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
3261 
3262 	return 0;
3263 }
3264 
3265 static int gfx_v12_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
3266 					   enum amd_clockgating_state state)
3267 {
3268 	struct amdgpu_device *adev = ip_block->adev;
3269 	int i, num_xcc;
3270 
3271 	if (amdgpu_sriov_vf(adev))
3272 		return 0;
3273 
3274 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3275 	switch (adev->ip_versions[GC_HWIP][0]) {
3276 	case IP_VERSION(12, 1, 0):
3277 		for (i = 0; i < num_xcc; i++)
3278 			gfx_v12_1_xcc_update_gfx_clock_gating(adev,
3279 				  state == AMD_CG_STATE_GATE, i);
3280 		break;
3281 	default:
3282 		break;
3283 	}
3284 
3285 	return 0;
3286 }
3287 
3288 static void gfx_v12_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
3289 {
3290 	struct amdgpu_device *adev = ip_block->adev;
3291 	int data;
3292 
3293 	/* AMD_CG_SUPPORT_GFX_MGCG */
3294 	data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE);
3295 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
3296 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
3297 
3298 	/* AMD_CG_SUPPORT_REPEATER_FGCG */
3299 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
3300 		*flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
3301 
3302 	/* AMD_CG_SUPPORT_GFX_FGCG */
3303 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
3304 		*flags |= AMD_CG_SUPPORT_GFX_FGCG;
3305 
3306 	/* AMD_CG_SUPPORT_GFX_PERF_CLK */
3307 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
3308 		*flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
3309 
3310 	/* AMD_CG_SUPPORT_GFX_CGCG */
3311 	data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL);
3312 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
3313 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
3314 
3315 	/* AMD_CG_SUPPORT_GFX_CGLS */
3316 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
3317 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
3318 }
3319 
3320 static u64 gfx_v12_1_ring_get_rptr_compute(struct amdgpu_ring *ring)
3321 {
3322 	/* gfx12 hardware is 32bit rptr */
3323 	return *(uint32_t *)ring->rptr_cpu_addr;
3324 }
3325 
3326 static u64 gfx_v12_1_ring_get_wptr_compute(struct amdgpu_ring *ring)
3327 {
3328 	u64 wptr;
3329 
3330 	/* XXX check if swapping is necessary on BE */
3331 	if (ring->use_doorbell)
3332 		wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
3333 	else
3334 		BUG();
3335 	return wptr;
3336 }
3337 
3338 static void gfx_v12_1_ring_set_wptr_compute(struct amdgpu_ring *ring)
3339 {
3340 	struct amdgpu_device *adev = ring->adev;
3341 
3342 	/* XXX check if swapping is necessary on BE */
3343 	if (ring->use_doorbell) {
3344 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
3345 			     ring->wptr);
3346 		WDOORBELL64(ring->doorbell_index, ring->wptr);
3347 	} else {
3348 		BUG(); /* only DOORBELL method supported on gfx12 now */
3349 	}
3350 }
3351 
3352 static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring,
3353 					   struct amdgpu_job *job,
3354 					   struct amdgpu_ib *ib,
3355 					   uint32_t flags)
3356 {
3357 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
3358 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
3359 
3360 	/* Currently, there is a high possibility to get wave ID mismatch
3361 	 * between ME and GDS, leading to a hw deadlock, because ME generates
3362 	 * different wave IDs than the GDS expects. This situation happens
3363 	 * randomly when at least 5 compute pipes use GDS ordered append.
3364 	 * The wave IDs generated by ME are also wrong after suspend/resume.
3365 	 * Those are probably bugs somewhere else in the kernel driver.
3366 	 *
3367 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
3368 	 * GDS to 0 for this ring (me/pipe).
3369 	 */
3370 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
3371 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3372 		amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
3373 	}
3374 
3375 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
3376 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3377 	amdgpu_ring_write(ring,
3378 #ifdef __BIG_ENDIAN
3379 				(2 << 0) |
3380 #endif
3381 				lower_32_bits(ib->gpu_addr));
3382 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3383 	amdgpu_ring_write(ring, control);
3384 }
3385 
3386 static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3387 				     u64 seq, unsigned flags)
3388 {
3389 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3390 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3391 
3392 	/* RELEASE_MEM - flush caches, send int */
3393 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3394 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ(1) |
3395 				 PACKET3_RELEASE_MEM_GCR_GLV_WB |
3396 				 PACKET3_RELEASE_MEM_GCR_GL2_WB |
3397 				 PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(2) |
3398 				 PACKET3_RELEASE_MEM_TEMPORAL(3) |
3399 				 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3400 				 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
3401 	amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
3402 				 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
3403 
3404 	/*
3405 	 * the address should be Qword aligned if 64bit write, Dword
3406 	 * aligned if only send 32bit data low (discard data high)
3407 	 */
3408 	if (write64bit)
3409 		BUG_ON(addr & 0x7);
3410 	else
3411 		BUG_ON(addr & 0x3);
3412 	amdgpu_ring_write(ring, lower_32_bits(addr));
3413 	amdgpu_ring_write(ring, upper_32_bits(addr));
3414 	amdgpu_ring_write(ring, lower_32_bits(seq));
3415 	amdgpu_ring_write(ring, upper_32_bits(seq));
3416 	amdgpu_ring_write(ring, 0);
3417 }
3418 
3419 static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3420 {
3421 	uint32_t seq = ring->fence_drv.sync_seq;
3422 	uint64_t addr = ring->fence_drv.gpu_addr;
3423 
3424 	gfx_v12_1_wait_reg_mem(ring, 0, 1, 0, lower_32_bits(addr),
3425 			       upper_32_bits(addr), seq, 0xffffffff, 4);
3426 }
3427 
3428 static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring,
3429 				   uint16_t pasid, uint32_t flush_type,
3430 				   bool all_hub, uint8_t dst_sel)
3431 {
3432 	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
3433 	amdgpu_ring_write(ring,
3434 			  PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
3435 			  PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
3436 			  PACKET3_INVALIDATE_TLBS_PASID(pasid) |
3437 			  PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
3438 }
3439 
3440 static void gfx_v12_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
3441 					 unsigned vmid, uint64_t pd_addr)
3442 {
3443 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3444 
3445 	/* compute doesn't have PFP */
3446 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
3447 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
3448 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3449 		amdgpu_ring_write(ring, 0x0);
3450 	}
3451 }
3452 
3453 static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
3454 					  u64 seq, unsigned int flags)
3455 {
3456 	struct amdgpu_device *adev = ring->adev;
3457 
3458 	/* we only allocate 32bit for each seq wb address */
3459 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
3460 
3461 	/* write fence seq to the "addr" */
3462 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3463 	amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
3464 	amdgpu_ring_write(ring, lower_32_bits(addr));
3465 	amdgpu_ring_write(ring, upper_32_bits(addr));
3466 	amdgpu_ring_write(ring, lower_32_bits(seq));
3467 
3468 	if (flags & AMDGPU_FENCE_FLAG_INT) {
3469 		/* set register to trigger INT */
3470 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3471 		amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
3472 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
3473 		amdgpu_ring_write(ring, 0);
3474 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
3475 	}
3476 }
3477 
3478 static void gfx_v12_1_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
3479 				     uint32_t reg_val_offs)
3480 {
3481 	struct amdgpu_device *adev = ring->adev;
3482 
3483 	reg = soc_v1_0_normalize_xcc_reg_offset(reg);
3484 
3485 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3486 	amdgpu_ring_write(ring, 0 |	/* src: register*/
3487 				(5 << 8) |	/* dst: memory */
3488 				(1 << 20));	/* write confirm */
3489 	amdgpu_ring_write(ring, reg);
3490 	amdgpu_ring_write(ring, 0);
3491 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3492 				reg_val_offs * 4));
3493 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3494 				reg_val_offs * 4));
3495 }
3496 
3497 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring,
3498 				     uint32_t reg,
3499 				     uint32_t val)
3500 {
3501 	uint32_t cmd = 0;
3502 
3503 	reg = soc_v1_0_normalize_xcc_reg_offset(reg);
3504 
3505 	switch (ring->funcs->type) {
3506 	case AMDGPU_RING_TYPE_KIQ:
3507 		cmd = (1 << 16); /* no inc addr */
3508 		break;
3509 	default:
3510 		cmd = WR_CONFIRM;
3511 		break;
3512 	}
3513 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3514 	amdgpu_ring_write(ring, cmd);
3515 	amdgpu_ring_write(ring, reg);
3516 	amdgpu_ring_write(ring, 0);
3517 	amdgpu_ring_write(ring, val);
3518 }
3519 
3520 static void gfx_v12_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
3521 					uint32_t val, uint32_t mask)
3522 {
3523 	gfx_v12_1_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
3524 }
3525 
3526 static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
3527 						   uint32_t reg0, uint32_t reg1,
3528 						   uint32_t ref, uint32_t mask)
3529 {
3530 	gfx_v12_1_wait_reg_mem(ring, 0, 0, 1, reg0, reg1,
3531 			       ref, mask, 0x20);
3532 }
3533 
3534 static void gfx_v12_1_xcc_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3535 							int me, int pipe,
3536 							enum amdgpu_interrupt_state state,
3537 							int xcc_id)
3538 {
3539 	u32 mec_int_cntl, mec_int_cntl_reg;
3540 
3541 	/*
3542 	 * amdgpu controls only the first MEC. That's why this function only
3543 	 * handles the setting of interrupts for this specific MEC. All other
3544 	 * pipes' interrupts are set by amdkfd.
3545 	 */
3546 
3547 	if (me == 1) {
3548 		switch (pipe) {
3549 		case 0:
3550 			mec_int_cntl_reg = SOC15_REG_OFFSET(
3551 					GC, GET_INST(GC, xcc_id),
3552 					regCP_ME1_PIPE0_INT_CNTL);
3553 			break;
3554 		case 1:
3555 			mec_int_cntl_reg = SOC15_REG_OFFSET(
3556 					GC, GET_INST(GC, xcc_id),
3557 					regCP_ME1_PIPE1_INT_CNTL);
3558 			break;
3559 		case 2:
3560 			mec_int_cntl_reg = SOC15_REG_OFFSET(
3561 					GC, GET_INST(GC, xcc_id),
3562 					regCP_ME1_PIPE2_INT_CNTL);
3563 			break;
3564 		case 3:
3565 			mec_int_cntl_reg = SOC15_REG_OFFSET(
3566 					GC, GET_INST(GC, xcc_id),
3567 					regCP_ME1_PIPE3_INT_CNTL);
3568 			break;
3569 		default:
3570 			DRM_DEBUG("invalid pipe %d\n", pipe);
3571 			return;
3572 		}
3573 	} else {
3574 		DRM_DEBUG("invalid me %d\n", me);
3575 		return;
3576 	}
3577 
3578 	switch (state) {
3579 	case AMDGPU_IRQ_STATE_DISABLE:
3580 		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
3581 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3582 					     TIME_STAMP_INT_ENABLE, 0);
3583 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3584 					     GENERIC0_INT_ENABLE, 0);
3585 		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
3586 		break;
3587 	case AMDGPU_IRQ_STATE_ENABLE:
3588 		mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
3589 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3590 					     TIME_STAMP_INT_ENABLE, 1);
3591 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3592 					     GENERIC0_INT_ENABLE, 1);
3593 		WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
3594 		break;
3595 	default:
3596 		break;
3597 	}
3598 }
3599 
3600 static int gfx_v12_1_set_eop_interrupt_state(struct amdgpu_device *adev,
3601 					    struct amdgpu_irq_src *src,
3602 					    unsigned type,
3603 					    enum amdgpu_interrupt_state state)
3604 {
3605 	int i, num_xcc;
3606 
3607 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3608 	for (i = 0; i < num_xcc; i++) {
3609 		switch (type) {
3610 		case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
3611 			gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3612 					adev, 1, 0, state, i);
3613 			break;
3614 		case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
3615 			gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3616 					adev, 1, 1, state, i);
3617 			break;
3618 		case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
3619 			gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3620 					adev, 1, 2, state, i);
3621 			break;
3622 		case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
3623 			gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3624 					adev, 1, 3, state, i);
3625 			break;
3626 		default:
3627 			break;
3628 		}
3629 	}
3630 
3631 	return 0;
3632 }
3633 
3634 static int gfx_v12_1_eop_irq(struct amdgpu_device *adev,
3635 			     struct amdgpu_irq_src *source,
3636 			     struct amdgpu_iv_entry *entry)
3637 {
3638 	u32 doorbell_offset = entry->src_data[0];
3639 	u8 me_id, pipe_id, queue_id;
3640 	struct amdgpu_ring *ring;
3641 	int i, xcc_id;
3642 
3643 	DRM_DEBUG("IH: CP EOP\n");
3644 
3645 	if (adev->enable_mes && doorbell_offset) {
3646 		struct amdgpu_userq_fence_driver *fence_drv = NULL;
3647 		struct xarray *xa = &adev->userq_xa;
3648 		unsigned long flags;
3649 
3650 		xa_lock_irqsave(xa, flags);
3651 		fence_drv = xa_load(xa, doorbell_offset);
3652 		if (fence_drv)
3653 			amdgpu_userq_fence_driver_process(fence_drv);
3654 		xa_unlock_irqrestore(xa, flags);
3655 	} else {
3656 		me_id = (entry->ring_id & 0x0c) >> 2;
3657 		pipe_id = (entry->ring_id & 0x03) >> 0;
3658 		queue_id = (entry->ring_id & 0x70) >> 4;
3659 		xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id);
3660 
3661 		if (xcc_id == -EINVAL)
3662 			return -EINVAL;
3663 
3664 		switch (me_id) {
3665 		case 1:
3666 		case 2:
3667 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3668 				ring = &adev->gfx.compute_ring
3669 						[i +
3670 						 xcc_id * adev->gfx.num_compute_rings];
3671 				/* Per-queue interrupt is supported for MEC starting from VI.
3672 				 * The interrupt can only be enabled/disabled per pipe instead
3673 				 * of per queue.
3674 				 */
3675 				if ((ring->me == me_id) &&
3676 				    (ring->pipe == pipe_id) &&
3677 				    (ring->queue == queue_id))
3678 					amdgpu_fence_process(ring);
3679 			}
3680 			break;
3681 		default:
3682 			dev_dbg(adev->dev, "Unexpected me %d in eop_irq\n", me_id);
3683 			break;
3684 		}
3685 	}
3686 
3687 	return 0;
3688 }
3689 
3690 static int gfx_v12_1_set_priv_reg_fault_state(struct amdgpu_device *adev,
3691 					      struct amdgpu_irq_src *source,
3692 					      unsigned type,
3693 					      enum amdgpu_interrupt_state state)
3694 {
3695 	int i, num_xcc;
3696 
3697 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3698 	switch (state) {
3699 	case AMDGPU_IRQ_STATE_DISABLE:
3700 	case AMDGPU_IRQ_STATE_ENABLE:
3701 		for (i = 0; i < num_xcc; i++)
3702 			WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
3703 					      PRIV_REG_INT_ENABLE,
3704 					      state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
3705 		break;
3706 	default:
3707 		break;
3708 	}
3709 
3710 	return 0;
3711 }
3712 
3713 static int gfx_v12_1_set_priv_inst_fault_state(struct amdgpu_device *adev,
3714 					       struct amdgpu_irq_src *source,
3715 					       unsigned type,
3716 					       enum amdgpu_interrupt_state state)
3717 {
3718 	int i, num_xcc;
3719 
3720 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3721 	switch (state) {
3722 	case AMDGPU_IRQ_STATE_DISABLE:
3723 	case AMDGPU_IRQ_STATE_ENABLE:
3724 		for (i = 0; i < num_xcc; i++)
3725 			WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
3726 				       PRIV_INSTR_INT_ENABLE,
3727 				       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
3728 		break;
3729 	default:
3730 		break;
3731 	}
3732 
3733 	return 0;
3734 }
3735 
3736 static void gfx_v12_1_handle_priv_fault(struct amdgpu_device *adev,
3737 					struct amdgpu_iv_entry *entry)
3738 {
3739 	u8 me_id, pipe_id, queue_id;
3740 	struct amdgpu_ring *ring;
3741 	int i, xcc_id;
3742 
3743 	me_id = (entry->ring_id & 0x0c) >> 2;
3744 	pipe_id = (entry->ring_id & 0x03) >> 0;
3745 	queue_id = (entry->ring_id & 0x70) >> 4;
3746 	xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id);
3747 
3748 	if (xcc_id == -EINVAL)
3749 		return;
3750 
3751 	if (!adev->gfx.disable_kq) {
3752 		switch (me_id) {
3753 		case 1:
3754 		case 2:
3755 			for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3756 				ring = &adev->gfx.compute_ring
3757 					[i +
3758 					 xcc_id * adev->gfx.num_compute_rings];
3759 				if (ring->me == me_id && ring->pipe == pipe_id &&
3760 				    ring->queue == queue_id)
3761 					drm_sched_fault(&ring->sched);
3762 			}
3763 			break;
3764 		default:
3765 			dev_dbg(adev->dev, "Unexpected me %d in priv_fault\n", me_id);
3766 			break;
3767 		}
3768 	}
3769 }
3770 
3771 static int gfx_v12_1_priv_reg_irq(struct amdgpu_device *adev,
3772 				  struct amdgpu_irq_src *source,
3773 				  struct amdgpu_iv_entry *entry)
3774 {
3775 	DRM_ERROR("Illegal register access in command stream\n");
3776 	gfx_v12_1_handle_priv_fault(adev, entry);
3777 	return 0;
3778 }
3779 
3780 static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev,
3781 				   struct amdgpu_irq_src *source,
3782 				   struct amdgpu_iv_entry *entry)
3783 {
3784 	DRM_ERROR("Illegal instruction in command stream\n");
3785 	gfx_v12_1_handle_priv_fault(adev, entry);
3786 	return 0;
3787 }
3788 
3789 static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev,
3790 				  struct amdgpu_irq_src *source,
3791 				  struct amdgpu_iv_entry *entry)
3792 {
3793 	uint32_t rlc_fed_status = 0;
3794 	uint32_t ras_blk = RAS_BLOCK_ID__GFX;
3795 	struct ras_ih_info ih_info = {0};
3796 	int i, num_xcc;
3797 
3798 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3799 	for (i = 0; i < num_xcc; i++)
3800 		rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC,
3801 					GET_INST(GC, i), regRLC_RLCS_FED_STATUS));
3802 
3803 	if (!rlc_fed_status)
3804 		return 0;
3805 
3806 	if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) ||
3807 	    REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR))
3808 		ras_blk = RAS_BLOCK_ID__SDMA;
3809 
3810 	dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk);
3811 
3812 	ih_info.block = ras_blk;
3813 	ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
3814 	amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info);
3815 	return 0;
3816 }
3817 
3818 static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring)
3819 {
3820 	const unsigned int gcr_cntl =
3821 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
3822 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
3823 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
3824 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
3825 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) |
3826 			PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(2);
3827 
3828 	/* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
3829 	amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
3830 	amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
3831 	amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
3832 	amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
3833 	amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
3834 	amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
3835 	amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
3836 	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
3837 }
3838 
3839 static const struct amd_ip_funcs gfx_v12_1_ip_funcs = {
3840 	.name = "gfx_v12_1",
3841 	.early_init = gfx_v12_1_early_init,
3842 	.late_init = gfx_v12_1_late_init,
3843 	.sw_init = gfx_v12_1_sw_init,
3844 	.sw_fini = gfx_v12_1_sw_fini,
3845 	.hw_init = gfx_v12_1_hw_init,
3846 	.hw_fini = gfx_v12_1_hw_fini,
3847 	.suspend = gfx_v12_1_suspend,
3848 	.resume = gfx_v12_1_resume,
3849 	.is_idle = gfx_v12_1_is_idle,
3850 	.wait_for_idle = gfx_v12_1_wait_for_idle,
3851 	.set_clockgating_state = gfx_v12_1_set_clockgating_state,
3852 	.set_powergating_state = gfx_v12_1_set_powergating_state,
3853 	.get_clockgating_state = gfx_v12_1_get_clockgating_state,
3854 };
3855 
3856 static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_compute = {
3857 	.type = AMDGPU_RING_TYPE_COMPUTE,
3858 	.align_mask = 0xff,
3859 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
3860 	.support_64bit_ptrs = true,
3861 	.get_rptr = gfx_v12_1_ring_get_rptr_compute,
3862 	.get_wptr = gfx_v12_1_ring_get_wptr_compute,
3863 	.set_wptr = gfx_v12_1_ring_set_wptr_compute,
3864 	.emit_frame_size =
3865 		7 + /* gfx_v12_1_ring_emit_pipeline_sync */
3866 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
3867 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
3868 		2 + /* gfx_v12_1_ring_emit_vm_flush */
3869 		8 + 8 + 8 + /* gfx_v12_1_ring_emit_fence x3 for user fence, vm fence */
3870 		8, /* gfx_v12_1_emit_mem_sync */
3871 	.emit_ib_size =	7, /* gfx_v12_1_ring_emit_ib_compute */
3872 	.emit_ib = gfx_v12_1_ring_emit_ib_compute,
3873 	.emit_fence = gfx_v12_1_ring_emit_fence,
3874 	.emit_pipeline_sync = gfx_v12_1_ring_emit_pipeline_sync,
3875 	.emit_vm_flush = gfx_v12_1_ring_emit_vm_flush,
3876 	.test_ring = gfx_v12_1_ring_test_ring,
3877 	.test_ib = gfx_v12_1_ring_test_ib,
3878 	.insert_nop = amdgpu_ring_insert_nop,
3879 	.pad_ib = amdgpu_ring_generic_pad_ib,
3880 	.emit_wreg = gfx_v12_1_ring_emit_wreg,
3881 	.emit_reg_wait = gfx_v12_1_ring_emit_reg_wait,
3882 	.emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait,
3883 	.emit_mem_sync = gfx_v12_1_emit_mem_sync,
3884 };
3885 
3886 static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_kiq = {
3887 	.type = AMDGPU_RING_TYPE_KIQ,
3888 	.align_mask = 0xff,
3889 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
3890 	.support_64bit_ptrs = true,
3891 	.get_rptr = gfx_v12_1_ring_get_rptr_compute,
3892 	.get_wptr = gfx_v12_1_ring_get_wptr_compute,
3893 	.set_wptr = gfx_v12_1_ring_set_wptr_compute,
3894 	.emit_frame_size =
3895 		7 + /* gfx_v12_1_ring_emit_pipeline_sync */
3896 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
3897 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
3898 		2 + /* gfx_v12_1_ring_emit_vm_flush */
3899 		8 + 8 + 8, /* gfx_v12_1_ring_emit_fence_kiq x3 for user fence, vm fence */
3900 	.emit_ib_size =	7, /* gfx_v12_1_ring_emit_ib_compute */
3901 	.emit_ib = gfx_v12_1_ring_emit_ib_compute,
3902 	.emit_fence = gfx_v12_1_ring_emit_fence_kiq,
3903 	.test_ring = gfx_v12_1_ring_test_ring,
3904 	.test_ib = gfx_v12_1_ring_test_ib,
3905 	.insert_nop = amdgpu_ring_insert_nop,
3906 	.pad_ib = amdgpu_ring_generic_pad_ib,
3907 	.emit_rreg = gfx_v12_1_ring_emit_rreg,
3908 	.emit_wreg = gfx_v12_1_ring_emit_wreg,
3909 	.emit_reg_wait = gfx_v12_1_ring_emit_reg_wait,
3910 	.emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait,
3911 };
3912 
3913 static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev)
3914 {
3915 	int i, j, num_xcc;
3916 
3917 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3918 	for (i = 0; i < num_xcc; i++) {
3919 		adev->gfx.kiq[i].ring.funcs = &gfx_v12_1_ring_funcs_kiq;
3920 
3921 		for (j = 0; j < adev->gfx.num_compute_rings; j++)
3922 			adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs =
3923 						&gfx_v12_1_ring_funcs_compute;
3924 	}
3925 }
3926 
3927 static const struct amdgpu_irq_src_funcs gfx_v12_1_eop_irq_funcs = {
3928 	.set = gfx_v12_1_set_eop_interrupt_state,
3929 	.process = gfx_v12_1_eop_irq,
3930 };
3931 
3932 static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_reg_irq_funcs = {
3933 	.set = gfx_v12_1_set_priv_reg_fault_state,
3934 	.process = gfx_v12_1_priv_reg_irq,
3935 };
3936 
3937 static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = {
3938 	.set = gfx_v12_1_set_priv_inst_fault_state,
3939 	.process = gfx_v12_1_priv_inst_irq,
3940 };
3941 
3942 static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = {
3943 	.process = gfx_v12_1_rlc_poison_irq,
3944 };
3945 
3946 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
3947 {
3948 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
3949 	adev->gfx.eop_irq.funcs = &gfx_v12_1_eop_irq_funcs;
3950 
3951 	adev->gfx.priv_reg_irq.num_types = 1;
3952 	adev->gfx.priv_reg_irq.funcs = &gfx_v12_1_priv_reg_irq_funcs;
3953 
3954 	adev->gfx.priv_inst_irq.num_types = 1;
3955 	adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs;
3956 
3957 	adev->gfx.rlc_poison_irq.num_types = 1;
3958 	adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs;
3959 }
3960 
3961 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev)
3962 {
3963 	if (adev->flags & AMD_IS_APU)
3964 		adev->gfx.imu.mode = MISSION_MODE;
3965 	else
3966 		adev->gfx.imu.mode = DEBUG_MODE;
3967 	if (!amdgpu_sriov_vf(adev))
3968 		adev->gfx.imu.funcs = &gfx_v12_1_imu_funcs;
3969 }
3970 
3971 static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev)
3972 {
3973 	adev->gfx.rlc.funcs = &gfx_v12_1_rlc_funcs;
3974 }
3975 
3976 static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev)
3977 {
3978 	/* set compute eng mqd */
3979 	adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
3980 		sizeof(struct v12_1_compute_mqd);
3981 	adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
3982 		gfx_v12_1_compute_mqd_init;
3983 }
3984 
3985 static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device *adev,
3986 							  u32 bitmap, int xcc_id)
3987 {
3988 	u32 data;
3989 
3990 	if (!bitmap)
3991 		return;
3992 
3993 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
3994 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
3995 
3996 	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
3997 }
3998 
3999 static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
4000 						 int xcc_id)
4001 {
4002 	u32 data, mask;
4003 
4004 	data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
4005 	data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
4006 
4007 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
4008 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
4009 
4010 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
4011 
4012 	return (~data) & mask;
4013 }
4014 
4015 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
4016 				 struct amdgpu_cu_info *cu_info)
4017 {
4018 	int i, j, k, counter, xcc_id, active_cu_number = 0;
4019 	u32 mask, bitmap;
4020 	unsigned int disable_masks[2 * 2];
4021 
4022 	if (!adev || !cu_info)
4023 		return -EINVAL;
4024 
4025 	if (adev->gfx.config.max_shader_engines > 2 ||
4026 	    adev->gfx.config.max_sh_per_se > 2) {
4027 		dev_err(adev->dev,
4028 			"Max SE (%d) and Max SA per SE (%d) is greater than expected\n",
4029 			adev->gfx.config.max_shader_engines,
4030 			adev->gfx.config.max_sh_per_se);
4031 		return -EINVAL;
4032 	}
4033 
4034 	amdgpu_gfx_parse_disable_cu(adev, disable_masks,
4035 				    adev->gfx.config.max_shader_engines,
4036 				    adev->gfx.config.max_sh_per_se);
4037 
4038 	mutex_lock(&adev->grbm_idx_mutex);
4039 	for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
4040 		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4041 			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4042 				bitmap = i * adev->gfx.config.max_sh_per_se + j;
4043 				if (!((gfx_v12_1_get_sa_active_bitmap(adev, xcc_id) >> bitmap) & 1))
4044 					continue;
4045 				mask = 1;
4046 				counter = 0;
4047 				gfx_v12_1_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
4048 				gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(
4049 					adev,
4050 					disable_masks[i * adev->gfx.config.max_sh_per_se + j],
4051 					xcc_id);
4052 				bitmap = gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id);
4053 
4054 				cu_info->bitmap[xcc_id][i][j] = bitmap;
4055 
4056 				for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
4057 					if (bitmap & mask)
4058 						counter++;
4059 
4060 					mask <<= 1;
4061 				}
4062 				active_cu_number += counter;
4063 			}
4064 		}
4065 		gfx_v12_1_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id);
4066 	}
4067 	mutex_unlock(&adev->grbm_idx_mutex);
4068 
4069 	cu_info->number = active_cu_number;
4070 	cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1;
4071 	cu_info->lds_size = 320;
4072 
4073 	return 0;
4074 }
4075 
4076 const struct amdgpu_ip_block_version gfx_v12_1_ip_block = {
4077 	.type = AMD_IP_BLOCK_TYPE_GFX,
4078 	.major = 12,
4079 	.minor = 1,
4080 	.rev = 0,
4081 	.funcs = &gfx_v12_1_ip_funcs,
4082 };
4083 
4084 static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
4085 {
4086 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4087 	uint32_t tmp_mask;
4088 	int i, r;
4089 
4090 	/* TODO : Initialize golden regs */
4091 	/* gfx_v12_1_init_golden_registers(adev); */
4092 
4093 	tmp_mask = inst_mask;
4094 	for_each_inst(i, tmp_mask)
4095 		gfx_v12_1_xcc_constants_init(adev, i);
4096 
4097 	if (!amdgpu_sriov_vf(adev)) {
4098 		tmp_mask = inst_mask;
4099 		for_each_inst(i, tmp_mask) {
4100 			r = gfx_v12_1_xcc_rlc_resume(adev, i);
4101 			if (r)
4102 				return r;
4103 		}
4104 	}
4105 
4106 	r = gfx_v12_1_xcc_cp_resume(adev, inst_mask);
4107 
4108 	return r;
4109 }
4110 
4111 static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask)
4112 {
4113 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4114 	int i;
4115 
4116 	for_each_inst(i, inst_mask)
4117 		gfx_v12_1_xcc_fini(adev, i);
4118 
4119 	return 0;
4120 }
4121 
4122 struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs = {
4123 	.suspend = &gfx_v12_1_xcp_suspend,
4124 	.resume = &gfx_v12_1_xcp_resume
4125 };
4126