xref: /linux/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
14ed5116aSLikun Gao /*
24ed5116aSLikun Gao  * Copyright 2025 Advanced Micro Devices, Inc.
34ed5116aSLikun Gao  *
44ed5116aSLikun Gao  * Permission is hereby granted, free of charge, to any person obtaining a
54ed5116aSLikun Gao  * copy of this software and associated documentation files (the "Software"),
64ed5116aSLikun Gao  * to deal in the Software without restriction, including without limitation
74ed5116aSLikun Gao  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
84ed5116aSLikun Gao  * and/or sell copies of the Software, and to permit persons to whom the
94ed5116aSLikun Gao  * Software is furnished to do so, subject to the following conditions:
104ed5116aSLikun Gao  *
114ed5116aSLikun Gao  * The above copyright notice and this permission notice shall be included in
124ed5116aSLikun Gao  * all copies or substantial portions of the Software.
134ed5116aSLikun Gao  *
144ed5116aSLikun Gao  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
154ed5116aSLikun Gao  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
164ed5116aSLikun Gao  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
174ed5116aSLikun Gao  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
184ed5116aSLikun Gao  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
194ed5116aSLikun Gao  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
204ed5116aSLikun Gao  * OTHER DEALINGS IN THE SOFTWARE.
214ed5116aSLikun Gao  *
224ed5116aSLikun Gao  */
234ed5116aSLikun Gao 
244ed5116aSLikun Gao #include <linux/delay.h>
254ed5116aSLikun Gao #include <linux/firmware.h>
264ed5116aSLikun Gao #include <linux/module.h>
274ed5116aSLikun Gao #include <linux/pci.h>
284ed5116aSLikun Gao 
294ed5116aSLikun Gao #include "amdgpu.h"
304ed5116aSLikun Gao #include "amdgpu_ucode.h"
314ed5116aSLikun Gao #include "amdgpu_trace.h"
324ed5116aSLikun Gao 
334ed5116aSLikun Gao #include "gc/gc_12_1_0_offset.h"
344ed5116aSLikun Gao #include "gc/gc_12_1_0_sh_mask.h"
35e50a6eceSHawking Zhang #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
364ed5116aSLikun Gao 
374ed5116aSLikun Gao #include "soc15_common.h"
384ed5116aSLikun Gao #include "soc15.h"
394ed5116aSLikun Gao #include "sdma_v7_1_0_pkt_open.h"
404ed5116aSLikun Gao #include "nbio_v4_3.h"
414ed5116aSLikun Gao #include "sdma_common.h"
424ed5116aSLikun Gao #include "sdma_v7_1.h"
434ed5116aSLikun Gao #include "v12_structs.h"
444ed5116aSLikun Gao #include "mes_userqueue.h"
45fcc4fc75SLikun Gao #include "soc_v1_0.h"
464ed5116aSLikun Gao 
474ed5116aSLikun Gao MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin");
484ed5116aSLikun Gao 
494ed5116aSLikun Gao #define SDMA1_REG_OFFSET 0x600
504ed5116aSLikun Gao #define SDMA0_SDMA_IDX_0_END 0x450
514ed5116aSLikun Gao #define SDMA1_HYP_DEC_REG_OFFSET 0x30
524ed5116aSLikun Gao 
534ed5116aSLikun Gao static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
544ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG),
554ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG),
564ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG),
574ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG),
584ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG),
594ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG),
604ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG),
614ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV),
624ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI),
634ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH),
644ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS),
654ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS),
664ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0),
674ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1),
684ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0),
694ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1),
704ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL),
714ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR),
724ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI),
734ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR),
744ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
754ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET),
764ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO),
774ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI),
784ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL),
794ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR),
804ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN),
814ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG),
824ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0),
834ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL),
844ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR),
854ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI),
864ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR),
874ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI),
884ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET),
894ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO),
904ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI),
914ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR),
924ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN),
934ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG),
944ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL),
954ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR),
964ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI),
974ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR),
984ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI),
994ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET),
1004ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO),
1014ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI),
1024ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR),
1034ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN),
1044ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG),
1054ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS),
1064ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL),
1074ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
1084ed5116aSLikun Gao 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS),
1094ed5116aSLikun Gao };
1104ed5116aSLikun Gao 
1114ed5116aSLikun Gao static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
1124ed5116aSLikun Gao static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
1134ed5116aSLikun Gao static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev);
1144ed5116aSLikun Gao static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
1154ed5116aSLikun Gao static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
1164ed5116aSLikun Gao 				uint32_t inst_mask);
1174ed5116aSLikun Gao 
1184ed5116aSLikun Gao static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
1194ed5116aSLikun Gao {
1204ed5116aSLikun Gao 	u32 base;
1214ed5116aSLikun Gao 	u32 dev_inst = GET_INST(SDMA0, instance);
1224ed5116aSLikun Gao 	int xcc_id = adev->sdma.instance[instance].xcc_id;
1234ed5116aSLikun Gao 	int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc;
1244ed5116aSLikun Gao 
1254ed5116aSLikun Gao 	if (internal_offset >= SDMA0_SDMA_IDX_0_END) {
1264ed5116aSLikun Gao 		base = adev->reg_offset[GC_HWIP][xcc_id][1];
1274ed5116aSLikun Gao 		if (xcc_inst != 0)
1284ed5116aSLikun Gao 			internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst;
1294ed5116aSLikun Gao 	} else {
1304ed5116aSLikun Gao 		base = adev->reg_offset[GC_HWIP][xcc_id][0];
1314ed5116aSLikun Gao 		if (xcc_inst != 0)
1324ed5116aSLikun Gao 			internal_offset += SDMA1_REG_OFFSET * xcc_inst;
1334ed5116aSLikun Gao 	}
1344ed5116aSLikun Gao 
1354ed5116aSLikun Gao 	return base + internal_offset;
1364ed5116aSLikun Gao }
1374ed5116aSLikun Gao 
1384ed5116aSLikun Gao static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring,
1394ed5116aSLikun Gao 					      uint64_t addr)
1404ed5116aSLikun Gao {
1414ed5116aSLikun Gao 	unsigned ret;
1424ed5116aSLikun Gao 
1434ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
1444ed5116aSLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(addr));
1454ed5116aSLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(addr));
1464ed5116aSLikun Gao 	amdgpu_ring_write(ring, 1);
1474ed5116aSLikun Gao 	/* this is the offset we need patch later */
1484ed5116aSLikun Gao 	ret = ring->wptr & ring->buf_mask;
1494ed5116aSLikun Gao 	/* insert dummy here and patch it later */
1504ed5116aSLikun Gao 	amdgpu_ring_write(ring, 0);
1514ed5116aSLikun Gao 
1524ed5116aSLikun Gao 	return ret;
1534ed5116aSLikun Gao }
1544ed5116aSLikun Gao 
1554ed5116aSLikun Gao /**
1564ed5116aSLikun Gao  * sdma_v7_1_ring_get_rptr - get the current read pointer
1574ed5116aSLikun Gao  *
1584ed5116aSLikun Gao  * @ring: amdgpu ring pointer
1594ed5116aSLikun Gao  *
1604ed5116aSLikun Gao  * Get the current rptr from the hardware.
1614ed5116aSLikun Gao  */
1624ed5116aSLikun Gao static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring)
1634ed5116aSLikun Gao {
1644ed5116aSLikun Gao 	u64 *rptr;
1654ed5116aSLikun Gao 
1664ed5116aSLikun Gao 	/* XXX check if swapping is necessary on BE */
1674ed5116aSLikun Gao 	rptr = (u64 *)ring->rptr_cpu_addr;
1684ed5116aSLikun Gao 
1694ed5116aSLikun Gao 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
1704ed5116aSLikun Gao 	return ((*rptr) >> 2);
1714ed5116aSLikun Gao }
1724ed5116aSLikun Gao 
1734ed5116aSLikun Gao /**
1744ed5116aSLikun Gao  * sdma_v7_1_ring_get_wptr - get the current write pointer
1754ed5116aSLikun Gao  *
1764ed5116aSLikun Gao  * @ring: amdgpu ring pointer
1774ed5116aSLikun Gao  *
1784ed5116aSLikun Gao  * Get the current wptr from the hardware.
1794ed5116aSLikun Gao  */
1804ed5116aSLikun Gao static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring)
1814ed5116aSLikun Gao {
1824ed5116aSLikun Gao 	u64 wptr = 0;
1834ed5116aSLikun Gao 
1844ed5116aSLikun Gao 	if (ring->use_doorbell) {
1854ed5116aSLikun Gao 		/* XXX check if swapping is necessary on BE */
1864ed5116aSLikun Gao 		wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
1874ed5116aSLikun Gao 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
1884ed5116aSLikun Gao 	}
1894ed5116aSLikun Gao 
1904ed5116aSLikun Gao 	return wptr >> 2;
1914ed5116aSLikun Gao }
1924ed5116aSLikun Gao 
1934ed5116aSLikun Gao /**
1944ed5116aSLikun Gao  * sdma_v7_1_ring_set_wptr - commit the write pointer
1954ed5116aSLikun Gao  *
1964ed5116aSLikun Gao  * @ring: amdgpu ring pointer
1974ed5116aSLikun Gao  *
1984ed5116aSLikun Gao  * Write the wptr back to the hardware.
1994ed5116aSLikun Gao  */
2004ed5116aSLikun Gao static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring)
2014ed5116aSLikun Gao {
2024ed5116aSLikun Gao 	struct amdgpu_device *adev = ring->adev;
2034ed5116aSLikun Gao 
2044ed5116aSLikun Gao 	DRM_DEBUG("Setting write pointer\n");
2054ed5116aSLikun Gao 
2064ed5116aSLikun Gao 	if (ring->use_doorbell) {
2074ed5116aSLikun Gao 		DRM_DEBUG("Using doorbell -- "
2084ed5116aSLikun Gao 			  "wptr_offs == 0x%08x "
2094ed5116aSLikun Gao 			  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
2104ed5116aSLikun Gao 			  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
2114ed5116aSLikun Gao 			  ring->wptr_offs,
2124ed5116aSLikun Gao 			  lower_32_bits(ring->wptr << 2),
2134ed5116aSLikun Gao 			  upper_32_bits(ring->wptr << 2));
2144ed5116aSLikun Gao 		/* XXX check if swapping is necessary on BE */
2154ed5116aSLikun Gao 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
2164ed5116aSLikun Gao 			     ring->wptr << 2);
2174ed5116aSLikun Gao 		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
2184ed5116aSLikun Gao 			  ring->doorbell_index, ring->wptr << 2);
2194ed5116aSLikun Gao 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
2204ed5116aSLikun Gao 	} else {
2214ed5116aSLikun Gao 		DRM_DEBUG("Not using doorbell -- "
2224ed5116aSLikun Gao 			  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
2234ed5116aSLikun Gao 			  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
2244ed5116aSLikun Gao 			  ring->me,
2254ed5116aSLikun Gao 			  lower_32_bits(ring->wptr << 2),
2264ed5116aSLikun Gao 			  ring->me,
2274ed5116aSLikun Gao 			  upper_32_bits(ring->wptr << 2));
2284ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
2294ed5116aSLikun Gao 							     ring->me,
2304ed5116aSLikun Gao 							     regSDMA0_SDMA_QUEUE0_RB_WPTR),
2314ed5116aSLikun Gao 				lower_32_bits(ring->wptr << 2));
2324ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
2334ed5116aSLikun Gao 							     ring->me,
2344ed5116aSLikun Gao 							     regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
2354ed5116aSLikun Gao 				upper_32_bits(ring->wptr << 2));
2364ed5116aSLikun Gao 	}
2374ed5116aSLikun Gao }
2384ed5116aSLikun Gao 
2394ed5116aSLikun Gao static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
2404ed5116aSLikun Gao {
2414ed5116aSLikun Gao 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
2424ed5116aSLikun Gao 	int i;
2434ed5116aSLikun Gao 
2444ed5116aSLikun Gao 	for (i = 0; i < count; i++)
2454ed5116aSLikun Gao 		if (sdma && sdma->burst_nop && (i == 0))
2464ed5116aSLikun Gao 			amdgpu_ring_write(ring, ring->funcs->nop |
2474ed5116aSLikun Gao 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
2484ed5116aSLikun Gao 		else
2494ed5116aSLikun Gao 			amdgpu_ring_write(ring, ring->funcs->nop);
2504ed5116aSLikun Gao }
2514ed5116aSLikun Gao 
2524ed5116aSLikun Gao /**
2534ed5116aSLikun Gao  * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine
2544ed5116aSLikun Gao  *
2554ed5116aSLikun Gao  * @ring: amdgpu ring pointer
2564ed5116aSLikun Gao  * @job: job to retrieve vmid from
2574ed5116aSLikun Gao  * @ib: IB object to schedule
2584ed5116aSLikun Gao  * @flags: unused
2594ed5116aSLikun Gao  *
2604ed5116aSLikun Gao  * Schedule an IB in the DMA ring.
2614ed5116aSLikun Gao  */
2624ed5116aSLikun Gao static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring,
2634ed5116aSLikun Gao 				   struct amdgpu_job *job,
2644ed5116aSLikun Gao 				   struct amdgpu_ib *ib,
2654ed5116aSLikun Gao 				   uint32_t flags)
2664ed5116aSLikun Gao {
2674ed5116aSLikun Gao 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
2684ed5116aSLikun Gao 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
2694ed5116aSLikun Gao 
2704ed5116aSLikun Gao 	/* An IB packet must end on a 8 DW boundary--the next dword
2714ed5116aSLikun Gao 	 * must be on a 8-dword boundary. Our IB packet below is 6
2724ed5116aSLikun Gao 	 * dwords long, thus add x number of NOPs, such that, in
2734ed5116aSLikun Gao 	 * modular arithmetic,
2744ed5116aSLikun Gao 	 * wptr + 6 + x = 8k, k >= 0, which in C is,
2754ed5116aSLikun Gao 	 * (wptr + 6 + x) % 8 = 0.
2764ed5116aSLikun Gao 	 * The expression below, is a solution of x.
2774ed5116aSLikun Gao 	 */
2784ed5116aSLikun Gao 	sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
2794ed5116aSLikun Gao 
2804ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
2814ed5116aSLikun Gao 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
2824ed5116aSLikun Gao 	/* base must be 32 byte aligned */
2834ed5116aSLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
2844ed5116aSLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
2854ed5116aSLikun Gao 	amdgpu_ring_write(ring, ib->length_dw);
2864ed5116aSLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
2874ed5116aSLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
2884ed5116aSLikun Gao }
2894ed5116aSLikun Gao 
2904ed5116aSLikun Gao /**
2914ed5116aSLikun Gao  * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse
2924ed5116aSLikun Gao  *
2934ed5116aSLikun Gao  * @ring: amdgpu ring pointer
2944ed5116aSLikun Gao  *
2954ed5116aSLikun Gao  * flush the IB by graphics cache rinse.
2964ed5116aSLikun Gao  */
2974ed5116aSLikun Gao static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring)
2984ed5116aSLikun Gao {
2994ed5116aSLikun Gao 	uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
3004ed5116aSLikun Gao 		SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
3014ed5116aSLikun Gao 		SDMA_GCR_GLI_INV(1);
3024ed5116aSLikun Gao 
3034ed5116aSLikun Gao 	/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
3044ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
3054ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
3064ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0));
3074ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) |
3084ed5116aSLikun Gao 			  SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0));
3094ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0));
3104ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) |
3114ed5116aSLikun Gao 			  SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0));
3124ed5116aSLikun Gao }
3134ed5116aSLikun Gao 
3144ed5116aSLikun Gao 
3154ed5116aSLikun Gao /**
3164ed5116aSLikun Gao  * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring
3174ed5116aSLikun Gao  *
3184ed5116aSLikun Gao  * @ring: amdgpu ring pointer
3194ed5116aSLikun Gao  * @addr: address
3204ed5116aSLikun Gao  * @seq: fence seq number
3214ed5116aSLikun Gao  * @flags: fence flags
3224ed5116aSLikun Gao  *
3234ed5116aSLikun Gao  * Add a DMA fence packet to the ring to write
3244ed5116aSLikun Gao  * the fence seq number and DMA trap packet to generate
3254ed5116aSLikun Gao  * an interrupt if needed.
3264ed5116aSLikun Gao  */
3274ed5116aSLikun Gao static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
3284ed5116aSLikun Gao 				      unsigned flags)
3294ed5116aSLikun Gao {
3304ed5116aSLikun Gao 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3314ed5116aSLikun Gao 	/* write the fence */
3324ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
3334ed5116aSLikun Gao 			  SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
3344ed5116aSLikun Gao 	/* zero in first two bits */
3354ed5116aSLikun Gao 	BUG_ON(addr & 0x3);
3364ed5116aSLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(addr));
3374ed5116aSLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(addr));
3384ed5116aSLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(seq));
3394ed5116aSLikun Gao 
3404ed5116aSLikun Gao 	/* optionally write high bits as well */
3414ed5116aSLikun Gao 	if (write64bit) {
3424ed5116aSLikun Gao 		addr += 4;
3434ed5116aSLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
3444ed5116aSLikun Gao 				  SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
3454ed5116aSLikun Gao 		/* zero in first two bits */
3464ed5116aSLikun Gao 		BUG_ON(addr & 0x3);
3474ed5116aSLikun Gao 		amdgpu_ring_write(ring, lower_32_bits(addr));
3484ed5116aSLikun Gao 		amdgpu_ring_write(ring, upper_32_bits(addr));
3494ed5116aSLikun Gao 		amdgpu_ring_write(ring, upper_32_bits(seq));
3504ed5116aSLikun Gao 	}
3514ed5116aSLikun Gao 
3524ed5116aSLikun Gao 	if (flags & AMDGPU_FENCE_FLAG_INT) {
3534ed5116aSLikun Gao 		/* generate an interrupt */
3544ed5116aSLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
3554ed5116aSLikun Gao 		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
3564ed5116aSLikun Gao 	}
3574ed5116aSLikun Gao }
3584ed5116aSLikun Gao 
3594ed5116aSLikun Gao /**
3604ed5116aSLikun Gao  * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines
3614ed5116aSLikun Gao  *
3624ed5116aSLikun Gao  * @adev: amdgpu_device pointer
3634ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be disabled
3644ed5116aSLikun Gao  *
3654ed5116aSLikun Gao  * Stop the gfx async dma ring buffers.
3664ed5116aSLikun Gao  */
3674ed5116aSLikun Gao static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
3684ed5116aSLikun Gao 				    uint32_t inst_mask)
3694ed5116aSLikun Gao {
3704ed5116aSLikun Gao 	u32 rb_cntl, ib_cntl;
3714ed5116aSLikun Gao 	int i;
3724ed5116aSLikun Gao 
37305282873SLikun Gao 	for_each_inst(i, inst_mask) {
3744ed5116aSLikun Gao 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
3754ed5116aSLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
3764ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
3774ed5116aSLikun Gao 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
3784ed5116aSLikun Gao 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0);
3794ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
3804ed5116aSLikun Gao 	}
3814ed5116aSLikun Gao }
3824ed5116aSLikun Gao 
3834ed5116aSLikun Gao /**
3844ed5116aSLikun Gao  * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines
3854ed5116aSLikun Gao  *
3864ed5116aSLikun Gao  * @adev: amdgpu_device pointer
3874ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be disabled
3884ed5116aSLikun Gao  *
3894ed5116aSLikun Gao  * Stop the compute async dma queues.
3904ed5116aSLikun Gao  */
3914ed5116aSLikun Gao static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev,
3924ed5116aSLikun Gao 				    uint32_t inst_mask)
3934ed5116aSLikun Gao {
3944ed5116aSLikun Gao 	/* XXX todo */
3954ed5116aSLikun Gao }
3964ed5116aSLikun Gao 
3974ed5116aSLikun Gao /**
3984ed5116aSLikun Gao  * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch
3994ed5116aSLikun Gao  *
4004ed5116aSLikun Gao  * @adev: amdgpu_device pointer
4014ed5116aSLikun Gao  * @enable: enable/disable the DMA MEs context switch.
4024ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be enabled
4034ed5116aSLikun Gao  *
4044ed5116aSLikun Gao  * Halt or unhalt the async dma engines context switch.
4054ed5116aSLikun Gao  */
4064ed5116aSLikun Gao static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev,
4074ed5116aSLikun Gao 					     bool enable, uint32_t inst_mask)
4084ed5116aSLikun Gao {
4094ed5116aSLikun Gao 	int i;
4104ed5116aSLikun Gao 
4114ed5116aSLikun Gao 	for_each_inst(i, inst_mask) {
4124ed5116aSLikun Gao 		WREG32_SOC15_IP(GC,
4134ed5116aSLikun Gao 			sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80);
4144ed5116aSLikun Gao 	}
4154ed5116aSLikun Gao }
4164ed5116aSLikun Gao 
4174ed5116aSLikun Gao /**
4184ed5116aSLikun Gao  * sdma_v7_1_inst_enable - stop the async dma engines
4194ed5116aSLikun Gao  *
4204ed5116aSLikun Gao  * @adev: amdgpu_device pointer
4214ed5116aSLikun Gao  * @enable: enable/disable the DMA MEs.
4224ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be enabled
4234ed5116aSLikun Gao  *
4244ed5116aSLikun Gao  * Halt or unhalt the async dma engines.
4254ed5116aSLikun Gao  */
4264ed5116aSLikun Gao static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
4274ed5116aSLikun Gao 				  bool enable, uint32_t inst_mask)
4284ed5116aSLikun Gao {
4294ed5116aSLikun Gao 	u32 mcu_cntl;
4304ed5116aSLikun Gao 	int i;
4314ed5116aSLikun Gao 
4324ed5116aSLikun Gao 	if (!enable) {
4334ed5116aSLikun Gao 		sdma_v7_1_inst_gfx_stop(adev, inst_mask);
4344ed5116aSLikun Gao 		sdma_v7_1_inst_rlc_stop(adev, inst_mask);
4354ed5116aSLikun Gao 	}
4364ed5116aSLikun Gao 
4374ed5116aSLikun Gao 	if (amdgpu_sriov_vf(adev))
4384ed5116aSLikun Gao 		return;
4394ed5116aSLikun Gao 
44005282873SLikun Gao 	for_each_inst(i, inst_mask) {
4414ed5116aSLikun Gao 		mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
4424ed5116aSLikun Gao 		mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
4434ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
4444ed5116aSLikun Gao 	}
4454ed5116aSLikun Gao }
4464ed5116aSLikun Gao 
4474ed5116aSLikun Gao /**
4484ed5116aSLikun Gao  * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine
4494ed5116aSLikun Gao  *
4504ed5116aSLikun Gao  * @adev: amdgpu_device pointer
4514ed5116aSLikun Gao  * @i: instance
4524ed5116aSLikun Gao  * @restore: used to restore wptr when restart
4534ed5116aSLikun Gao  *
4544ed5116aSLikun Gao  * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
4554ed5116aSLikun Gao  * Return 0 for success.
4564ed5116aSLikun Gao  */
4574ed5116aSLikun Gao static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
4584ed5116aSLikun Gao {
4594ed5116aSLikun Gao 	struct amdgpu_ring *ring;
4604ed5116aSLikun Gao 	u32 rb_cntl, ib_cntl;
4614ed5116aSLikun Gao 	u32 rb_bufsz;
4624ed5116aSLikun Gao 	u32 doorbell;
4634ed5116aSLikun Gao 	u32 doorbell_offset;
4644ed5116aSLikun Gao 	u32 temp;
4654ed5116aSLikun Gao 	u64 wptr_gpu_addr;
4664ed5116aSLikun Gao 	int r;
4674ed5116aSLikun Gao 
4684ed5116aSLikun Gao 	ring = &adev->sdma.instance[i].ring;
4694ed5116aSLikun Gao 
4704ed5116aSLikun Gao 	/* Set ring buffer size in dwords */
4714ed5116aSLikun Gao 	rb_bufsz = order_base_2(ring->ring_size / 4);
4724ed5116aSLikun Gao 	rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
4734ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
4744ed5116aSLikun Gao #ifdef __BIG_ENDIAN
4754ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
4764ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL,
4774ed5116aSLikun Gao 				RPTR_WRITEBACK_SWAP_ENABLE, 1);
4784ed5116aSLikun Gao #endif
4794ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1);
4804ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
4814ed5116aSLikun Gao 
4824ed5116aSLikun Gao 	/* Initialize the ring buffer's read and write pointers */
4834ed5116aSLikun Gao 	if (restore) {
4844ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
4854ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
4864ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
4874ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
4884ed5116aSLikun Gao 	} else {
4894ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0);
4904ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0);
4914ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0);
4924ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0);
4934ed5116aSLikun Gao 	}
4944ed5116aSLikun Gao 	/* setup the wptr shadow polling */
4954ed5116aSLikun Gao 	wptr_gpu_addr = ring->wptr_gpu_addr;
4964ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO),
4974ed5116aSLikun Gao 	       lower_32_bits(wptr_gpu_addr));
4984ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI),
4994ed5116aSLikun Gao 	       upper_32_bits(wptr_gpu_addr));
5004ed5116aSLikun Gao 
5014ed5116aSLikun Gao 	/* set the wb address whether it's enabled or not */
5024ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI),
5034ed5116aSLikun Gao 	       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
5044ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO),
5054ed5116aSLikun Gao 	       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
5064ed5116aSLikun Gao 
5074ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
5084ed5116aSLikun Gao 	if (amdgpu_sriov_vf(adev))
5094ed5116aSLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
5104ed5116aSLikun Gao 	else
5114ed5116aSLikun Gao 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
5124ed5116aSLikun Gao 
5134ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
5144ed5116aSLikun Gao 
5154ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
5164ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
5174ed5116aSLikun Gao 
5184ed5116aSLikun Gao 	if (!restore)
5194ed5116aSLikun Gao 		ring->wptr = 0;
5204ed5116aSLikun Gao 
5214ed5116aSLikun Gao 	/* before programing wptr to a less value, need set minor_ptr_update first */
5224ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1);
5234ed5116aSLikun Gao 
5244ed5116aSLikun Gao 	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
5254ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
5264ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
5274ed5116aSLikun Gao 	}
5284ed5116aSLikun Gao 
5294ed5116aSLikun Gao 	doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL));
5304ed5116aSLikun Gao 	doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET));
5314ed5116aSLikun Gao 
5324ed5116aSLikun Gao 	if (ring->use_doorbell) {
5334ed5116aSLikun Gao 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
5344ed5116aSLikun Gao 		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET,
5354ed5116aSLikun Gao 				OFFSET, ring->doorbell_index);
5364ed5116aSLikun Gao 	} else {
5374ed5116aSLikun Gao 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0);
5384ed5116aSLikun Gao 	}
5394ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell);
5404ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
5414ed5116aSLikun Gao 
5424ed5116aSLikun Gao 	if (i == 0)
5434ed5116aSLikun Gao 		adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
5444ed5116aSLikun Gao 					      ring->doorbell_index,
5454ed5116aSLikun Gao 					      adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
5464ed5116aSLikun Gao 
5474ed5116aSLikun Gao 	if (amdgpu_sriov_vf(adev))
5484ed5116aSLikun Gao 		sdma_v7_1_ring_set_wptr(ring);
5494ed5116aSLikun Gao 
5504ed5116aSLikun Gao 	/* set minor_ptr_update to 0 after wptr programed */
5514ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0);
5524ed5116aSLikun Gao 
5534ed5116aSLikun Gao 	/* Set up sdma hang watchdog */
5544ed5116aSLikun Gao 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL));
5554ed5116aSLikun Gao 	/* 100ms per unit */
5564ed5116aSLikun Gao 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
5574ed5116aSLikun Gao 			     max(adev->usec_timeout/100000, 1));
5584ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp);
5594ed5116aSLikun Gao 
5604ed5116aSLikun Gao 	/* Set up RESP_MODE to non-copy addresses */
5614ed5116aSLikun Gao 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL));
5624ed5116aSLikun Gao 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3);
5634ed5116aSLikun Gao 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9);
5644ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp);
5654ed5116aSLikun Gao 
5664ed5116aSLikun Gao 	/* program default cache read and write policy */
5674ed5116aSLikun Gao 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE));
5684ed5116aSLikun Gao 	/* clean read policy and write policy bits */
5694ed5116aSLikun Gao 	temp &= 0xFF0FFF;
5704ed5116aSLikun Gao 	temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
5714ed5116aSLikun Gao 		 (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
5724ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp);
5734ed5116aSLikun Gao 
5744ed5116aSLikun Gao 	if (!amdgpu_sriov_vf(adev)) {
5754ed5116aSLikun Gao 		/* unhalt engine */
5764ed5116aSLikun Gao 		temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
5774ed5116aSLikun Gao 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0);
5784ed5116aSLikun Gao 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0);
5794ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp);
5804ed5116aSLikun Gao 	}
5814ed5116aSLikun Gao 
5824ed5116aSLikun Gao 	/* enable DMA RB */
5834ed5116aSLikun Gao 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1);
5844ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
5854ed5116aSLikun Gao 
5864ed5116aSLikun Gao 	ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
5874ed5116aSLikun Gao 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1);
5884ed5116aSLikun Gao #ifdef __BIG_ENDIAN
5894ed5116aSLikun Gao 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
5904ed5116aSLikun Gao #endif
5914ed5116aSLikun Gao 	/* enable DMA IBs */
5924ed5116aSLikun Gao 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
5934ed5116aSLikun Gao 	ring->sched.ready = true;
5944ed5116aSLikun Gao 
5954ed5116aSLikun Gao 	if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
5964ed5116aSLikun Gao 		sdma_v7_1_inst_ctx_switch_enable(adev, true, i);
5974ed5116aSLikun Gao 		sdma_v7_1_inst_enable(adev, true, i);
5984ed5116aSLikun Gao 	}
5994ed5116aSLikun Gao 
6004ed5116aSLikun Gao 	r = amdgpu_ring_test_helper(ring);
6014ed5116aSLikun Gao 	if (r)
6024ed5116aSLikun Gao 		ring->sched.ready = false;
6034ed5116aSLikun Gao 
6044ed5116aSLikun Gao 	return r;
6054ed5116aSLikun Gao }
6064ed5116aSLikun Gao 
6074ed5116aSLikun Gao /**
6084ed5116aSLikun Gao  * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines
6094ed5116aSLikun Gao  *
6104ed5116aSLikun Gao  * @adev: amdgpu_device pointer
61197b2e10eSSrinivasan Shanmugam  * @inst_mask: mask of dma engine instances to be enabled
6124ed5116aSLikun Gao  *
6134ed5116aSLikun Gao  * Set up the gfx DMA ring buffers and enable them.
6144ed5116aSLikun Gao  * Returns 0 for success, error for failure.
6154ed5116aSLikun Gao  */
6164ed5116aSLikun Gao static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
6174ed5116aSLikun Gao 				     uint32_t inst_mask)
6184ed5116aSLikun Gao {
6194ed5116aSLikun Gao 	int i, r;
6204ed5116aSLikun Gao 
62105282873SLikun Gao 	for_each_inst(i, inst_mask) {
6224ed5116aSLikun Gao 		r = sdma_v7_1_gfx_resume_instance(adev, i, false);
6234ed5116aSLikun Gao 		if (r)
6244ed5116aSLikun Gao 			return r;
6254ed5116aSLikun Gao 	}
6264ed5116aSLikun Gao 
6274ed5116aSLikun Gao 	return 0;
6284ed5116aSLikun Gao 
6294ed5116aSLikun Gao }
6304ed5116aSLikun Gao 
6314ed5116aSLikun Gao /**
6324ed5116aSLikun Gao  * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines
6334ed5116aSLikun Gao  *
6344ed5116aSLikun Gao  * @adev: amdgpu_device pointer
6354ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be enabled
6364ed5116aSLikun Gao  *
6374ed5116aSLikun Gao  * Set up the compute DMA queues and enable them.
6384ed5116aSLikun Gao  * Returns 0 for success, error for failure.
6394ed5116aSLikun Gao  */
6404ed5116aSLikun Gao static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev,
6414ed5116aSLikun Gao 				     uint32_t inst_mask)
6424ed5116aSLikun Gao {
6434ed5116aSLikun Gao 	return 0;
6444ed5116aSLikun Gao }
6454ed5116aSLikun Gao 
6464ed5116aSLikun Gao static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
6474ed5116aSLikun Gao 					     uint32_t inst_mask)
6484ed5116aSLikun Gao {
6494ed5116aSLikun Gao 	int i;
6504ed5116aSLikun Gao 
65105282873SLikun Gao 	for_each_inst(i, inst_mask) {
6524ed5116aSLikun Gao 		amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
6534ed5116aSLikun Gao 				      &adev->sdma.instance[i].sdma_fw_gpu_addr,
6544ed5116aSLikun Gao 				      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
6554ed5116aSLikun Gao 	}
6564ed5116aSLikun Gao }
6574ed5116aSLikun Gao 
6584ed5116aSLikun Gao /**
6594ed5116aSLikun Gao  * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode
6604ed5116aSLikun Gao  *
6614ed5116aSLikun Gao  * @adev: amdgpu_device pointer
6624ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be enabled
6634ed5116aSLikun Gao  *
6644ed5116aSLikun Gao  * Loads the sDMA0/1 ucode.
6654ed5116aSLikun Gao  * Returns 0 for success, -EINVAL if the ucode is not available.
6664ed5116aSLikun Gao  */
6674ed5116aSLikun Gao static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
6684ed5116aSLikun Gao 					 uint32_t inst_mask)
6694ed5116aSLikun Gao {
6704ed5116aSLikun Gao 	const struct sdma_firmware_header_v3_0 *hdr;
6714ed5116aSLikun Gao 	const __le32 *fw_data;
6724ed5116aSLikun Gao 	u32 fw_size;
6734ed5116aSLikun Gao 	uint32_t tmp, sdma_status, ic_op_cntl;
6744ed5116aSLikun Gao 	int i, r, j;
6754ed5116aSLikun Gao 
6764ed5116aSLikun Gao 	/* halt the MEs */
6774ed5116aSLikun Gao 	sdma_v7_1_inst_enable(adev, false, inst_mask);
6784ed5116aSLikun Gao 
6794ed5116aSLikun Gao 	if (!adev->sdma.instance[0].fw)
6804ed5116aSLikun Gao 		return -EINVAL;
6814ed5116aSLikun Gao 
6824ed5116aSLikun Gao 	hdr = (const struct sdma_firmware_header_v3_0 *)
6834ed5116aSLikun Gao 		adev->sdma.instance[0].fw->data;
6844ed5116aSLikun Gao 	amdgpu_ucode_print_sdma_hdr(&hdr->header);
6854ed5116aSLikun Gao 
6864ed5116aSLikun Gao 	fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
6874ed5116aSLikun Gao 			le32_to_cpu(hdr->ucode_offset_bytes));
6884ed5116aSLikun Gao 	fw_size = le32_to_cpu(hdr->ucode_size_bytes);
6894ed5116aSLikun Gao 
69005282873SLikun Gao 	for_each_inst(i, inst_mask) {
6914ed5116aSLikun Gao 		r = amdgpu_bo_create_reserved(adev, fw_size,
6924ed5116aSLikun Gao 					      PAGE_SIZE,
6934ed5116aSLikun Gao 					      AMDGPU_GEM_DOMAIN_VRAM,
6944ed5116aSLikun Gao 					      &adev->sdma.instance[i].sdma_fw_obj,
6954ed5116aSLikun Gao 					      &adev->sdma.instance[i].sdma_fw_gpu_addr,
6964ed5116aSLikun Gao 					      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
6974ed5116aSLikun Gao 		if (r) {
6984ed5116aSLikun Gao 			dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
6994ed5116aSLikun Gao 			return r;
7004ed5116aSLikun Gao 		}
7014ed5116aSLikun Gao 
7024ed5116aSLikun Gao 		memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
7034ed5116aSLikun Gao 
7044ed5116aSLikun Gao 		amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
7054ed5116aSLikun Gao 		amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
7064ed5116aSLikun Gao 
7074ed5116aSLikun Gao 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL));
7084ed5116aSLikun Gao 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0);
7094ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp);
7104ed5116aSLikun Gao 
7114ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO),
7124ed5116aSLikun Gao 			lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
7134ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI),
7144ed5116aSLikun Gao 			upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
7154ed5116aSLikun Gao 
7164ed5116aSLikun Gao 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
7174ed5116aSLikun Gao 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1);
7184ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp);
7194ed5116aSLikun Gao 
7204ed5116aSLikun Gao 		/* Wait for sdma ucode init complete */
7214ed5116aSLikun Gao 		for (j = 0; j < adev->usec_timeout; j++) {
7224ed5116aSLikun Gao 			ic_op_cntl = RREG32_SOC15_IP(GC,
7234ed5116aSLikun Gao 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
7244ed5116aSLikun Gao 			sdma_status = RREG32_SOC15_IP(GC,
7254ed5116aSLikun Gao 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
7264ed5116aSLikun Gao 			if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
7274ed5116aSLikun Gao 			    (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1))
7284ed5116aSLikun Gao 				break;
7294ed5116aSLikun Gao 			udelay(1);
7304ed5116aSLikun Gao 		}
7314ed5116aSLikun Gao 
7324ed5116aSLikun Gao 		if (j >= adev->usec_timeout) {
7334ed5116aSLikun Gao 			dev_err(adev->dev, "failed to init sdma ucode\n");
7344ed5116aSLikun Gao 			return -EINVAL;
7354ed5116aSLikun Gao 		}
7364ed5116aSLikun Gao 	}
7374ed5116aSLikun Gao 
7384ed5116aSLikun Gao 	return 0;
7394ed5116aSLikun Gao }
7404ed5116aSLikun Gao 
7414ed5116aSLikun Gao static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
7424ed5116aSLikun Gao {
7434ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
7444ed5116aSLikun Gao 	uint32_t inst_mask;
7454ed5116aSLikun Gao 	u32 tmp;
7464ed5116aSLikun Gao 	int i;
7474ed5116aSLikun Gao 
74805282873SLikun Gao 	inst_mask = GENMASK(NUM_XCC(adev->sdma.sdma_mask) - 1, 0);
7494ed5116aSLikun Gao 	sdma_v7_1_inst_gfx_stop(adev, inst_mask);
7504ed5116aSLikun Gao 
75105282873SLikun Gao 	for_each_inst(i, inst_mask) {
7524ed5116aSLikun Gao 		//tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
7534ed5116aSLikun Gao 		//tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
7544ed5116aSLikun Gao 		//WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
7554ed5116aSLikun Gao 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
7564ed5116aSLikun Gao 		tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK;
7574ed5116aSLikun Gao 		tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK;
7584ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp);
7594ed5116aSLikun Gao 
7604ed5116aSLikun Gao 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0);
7614ed5116aSLikun Gao 
7624ed5116aSLikun Gao 		udelay(100);
7634ed5116aSLikun Gao 
7644ed5116aSLikun Gao 		tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
7654ed5116aSLikun Gao 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
7664ed5116aSLikun Gao 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
7674ed5116aSLikun Gao 
7684ed5116aSLikun Gao 		udelay(100);
7694ed5116aSLikun Gao 
7704ed5116aSLikun Gao 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
7714ed5116aSLikun Gao 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
7724ed5116aSLikun Gao 
7734ed5116aSLikun Gao 		udelay(100);
7744ed5116aSLikun Gao 	}
7754ed5116aSLikun Gao 
7764ed5116aSLikun Gao 	return sdma_v7_1_inst_start(adev, inst_mask);
7774ed5116aSLikun Gao }
7784ed5116aSLikun Gao 
7794ed5116aSLikun Gao static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block)
7804ed5116aSLikun Gao {
7814ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
7824ed5116aSLikun Gao 	struct amdgpu_ring *ring;
7834ed5116aSLikun Gao 	int i, r;
7844ed5116aSLikun Gao 	long tmo = msecs_to_jiffies(1000);
7854ed5116aSLikun Gao 
7864ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
7874ed5116aSLikun Gao 		ring = &adev->sdma.instance[i].ring;
7884ed5116aSLikun Gao 		r = amdgpu_ring_test_ib(ring, tmo);
7894ed5116aSLikun Gao 		if (r)
7904ed5116aSLikun Gao 			return true;
7914ed5116aSLikun Gao 	}
7924ed5116aSLikun Gao 
7934ed5116aSLikun Gao 	return false;
7944ed5116aSLikun Gao }
7954ed5116aSLikun Gao 
7964ed5116aSLikun Gao static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring,
7974ed5116aSLikun Gao 				 unsigned int vmid,
7984ed5116aSLikun Gao 				 struct amdgpu_fence *timedout_fence)
7994ed5116aSLikun Gao {
8004ed5116aSLikun Gao 	struct amdgpu_device *adev = ring->adev;
8014ed5116aSLikun Gao 	int r;
8024ed5116aSLikun Gao 
8034ed5116aSLikun Gao 	if (ring->me >= adev->sdma.num_instances) {
8044ed5116aSLikun Gao 		dev_err(adev->dev, "sdma instance not found\n");
8054ed5116aSLikun Gao 		return -EINVAL;
8064ed5116aSLikun Gao 	}
8074ed5116aSLikun Gao 
8084ed5116aSLikun Gao 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
8094ed5116aSLikun Gao 
8104ed5116aSLikun Gao 	r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
8114ed5116aSLikun Gao 	if (r)
8124ed5116aSLikun Gao 		return r;
8134ed5116aSLikun Gao 
8144ed5116aSLikun Gao 	r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true);
8154ed5116aSLikun Gao 	if (r)
8164ed5116aSLikun Gao 		return r;
8174ed5116aSLikun Gao 
8184ed5116aSLikun Gao 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
8194ed5116aSLikun Gao }
8204ed5116aSLikun Gao 
8214ed5116aSLikun Gao /**
8224ed5116aSLikun Gao  * sdma_v7_1_inst_start - setup and start the async dma engines
8234ed5116aSLikun Gao  *
8244ed5116aSLikun Gao  * @adev: amdgpu_device pointer
8254ed5116aSLikun Gao  * @inst_mask: mask of dma engine instances to be enabled
8264ed5116aSLikun Gao  *
8274ed5116aSLikun Gao  * Set up the DMA engines and enable them.
8284ed5116aSLikun Gao  * Returns 0 for success, error for failure.
8294ed5116aSLikun Gao  */
8304ed5116aSLikun Gao static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
8314ed5116aSLikun Gao 				uint32_t inst_mask)
8324ed5116aSLikun Gao {
8334ed5116aSLikun Gao 	int r = 0;
8344ed5116aSLikun Gao 
8354ed5116aSLikun Gao 	if (amdgpu_sriov_vf(adev)) {
8364ed5116aSLikun Gao 		sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
8374ed5116aSLikun Gao 		sdma_v7_1_inst_enable(adev, false, inst_mask);
8384ed5116aSLikun Gao 
8394ed5116aSLikun Gao 		/* set RB registers */
8404ed5116aSLikun Gao 		r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
8414ed5116aSLikun Gao 		return r;
8424ed5116aSLikun Gao 	}
8434ed5116aSLikun Gao 
8444ed5116aSLikun Gao 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
8454ed5116aSLikun Gao 		r = sdma_v7_1_inst_load_microcode(adev, inst_mask);
8464ed5116aSLikun Gao 		if (r) {
8474ed5116aSLikun Gao 			sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
8484ed5116aSLikun Gao 			return r;
8494ed5116aSLikun Gao 		}
8504ed5116aSLikun Gao 
8514ed5116aSLikun Gao 		if (amdgpu_emu_mode == 1)
8524ed5116aSLikun Gao 			msleep(1000);
8534ed5116aSLikun Gao 	}
8544ed5116aSLikun Gao 
8554ed5116aSLikun Gao 	/* unhalt the MEs */
8564ed5116aSLikun Gao 	sdma_v7_1_inst_enable(adev, true, inst_mask);
8574ed5116aSLikun Gao 	/* enable sdma ring preemption */
8584ed5116aSLikun Gao 	sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask);
8594ed5116aSLikun Gao 
8604ed5116aSLikun Gao 	/* start the gfx rings and rlc compute queues */
8614ed5116aSLikun Gao 	r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
8624ed5116aSLikun Gao 	if (r)
8634ed5116aSLikun Gao 		return r;
8644ed5116aSLikun Gao 	r = sdma_v7_1_inst_rlc_resume(adev, inst_mask);
8654ed5116aSLikun Gao 
8664ed5116aSLikun Gao 	return r;
8674ed5116aSLikun Gao }
8684ed5116aSLikun Gao 
8694ed5116aSLikun Gao static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd,
8704ed5116aSLikun Gao 			      struct amdgpu_mqd_prop *prop)
8714ed5116aSLikun Gao {
8724ed5116aSLikun Gao 	struct v12_sdma_mqd *m = mqd;
8734ed5116aSLikun Gao 	uint64_t wb_gpu_addr;
8744ed5116aSLikun Gao 
8754ed5116aSLikun Gao 	m->sdmax_rlcx_rb_cntl =
8764ed5116aSLikun Gao 		order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
8774ed5116aSLikun Gao 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
8784ed5116aSLikun Gao 		4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
8794ed5116aSLikun Gao 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
8804ed5116aSLikun Gao 
8814ed5116aSLikun Gao 	m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
8824ed5116aSLikun Gao 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
8834ed5116aSLikun Gao 
8844ed5116aSLikun Gao 	wb_gpu_addr = prop->wptr_gpu_addr;
8854ed5116aSLikun Gao 	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
8864ed5116aSLikun Gao 	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
8874ed5116aSLikun Gao 
8884ed5116aSLikun Gao 	wb_gpu_addr = prop->rptr_gpu_addr;
8894ed5116aSLikun Gao 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
8904ed5116aSLikun Gao 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
8914ed5116aSLikun Gao 
8924ed5116aSLikun Gao 	m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0,
8934ed5116aSLikun Gao 							regSDMA0_SDMA_QUEUE0_IB_CNTL));
8944ed5116aSLikun Gao 
8954ed5116aSLikun Gao 	m->sdmax_rlcx_doorbell_offset =
8964ed5116aSLikun Gao 		prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
8974ed5116aSLikun Gao 
8984ed5116aSLikun Gao 	m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
8994ed5116aSLikun Gao 
9004ed5116aSLikun Gao 	m->sdmax_rlcx_doorbell_log = 0;
9014ed5116aSLikun Gao 	m->sdmax_rlcx_rb_aql_cntl = 0x4000;	//regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT;
9024ed5116aSLikun Gao 	m->sdmax_rlcx_dummy_reg = 0xf;	//regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT;
9034ed5116aSLikun Gao 
9044ed5116aSLikun Gao 	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
9054ed5116aSLikun Gao 	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
9064ed5116aSLikun Gao 
9074ed5116aSLikun Gao 	return 0;
9084ed5116aSLikun Gao }
9094ed5116aSLikun Gao 
9104ed5116aSLikun Gao static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev)
9114ed5116aSLikun Gao {
9124ed5116aSLikun Gao 	adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
9134ed5116aSLikun Gao 	adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init;
9144ed5116aSLikun Gao }
9154ed5116aSLikun Gao 
9164ed5116aSLikun Gao /**
9174ed5116aSLikun Gao  * sdma_v7_1_ring_test_ring - simple async dma engine test
9184ed5116aSLikun Gao  *
9194ed5116aSLikun Gao  * @ring: amdgpu_ring structure holding ring information
9204ed5116aSLikun Gao  *
9214ed5116aSLikun Gao  * Test the DMA engine by writing using it to write an
9224ed5116aSLikun Gao  * value to memory.
9234ed5116aSLikun Gao  * Returns 0 for success, error for failure.
9244ed5116aSLikun Gao  */
9254ed5116aSLikun Gao static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring)
9264ed5116aSLikun Gao {
9274ed5116aSLikun Gao 	struct amdgpu_device *adev = ring->adev;
9284ed5116aSLikun Gao 	unsigned i;
9294ed5116aSLikun Gao 	unsigned index;
9304ed5116aSLikun Gao 	int r;
9314ed5116aSLikun Gao 	u32 tmp;
9324ed5116aSLikun Gao 	u64 gpu_addr;
9334ed5116aSLikun Gao 
9344ed5116aSLikun Gao 	tmp = 0xCAFEDEAD;
9354ed5116aSLikun Gao 
9364ed5116aSLikun Gao 	r = amdgpu_device_wb_get(adev, &index);
9374ed5116aSLikun Gao 	if (r) {
9384ed5116aSLikun Gao 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
9394ed5116aSLikun Gao 		return r;
9404ed5116aSLikun Gao 	}
9414ed5116aSLikun Gao 
9424ed5116aSLikun Gao 	gpu_addr = adev->wb.gpu_addr + (index * 4);
9434ed5116aSLikun Gao 	adev->wb.wb[index] = cpu_to_le32(tmp);
9444ed5116aSLikun Gao 
9454ed5116aSLikun Gao 	r = amdgpu_ring_alloc(ring, 5);
9464ed5116aSLikun Gao 	if (r) {
9474ed5116aSLikun Gao 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
9484ed5116aSLikun Gao 		amdgpu_device_wb_free(adev, index);
9494ed5116aSLikun Gao 		return r;
9504ed5116aSLikun Gao 	}
9514ed5116aSLikun Gao 
9524ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
9534ed5116aSLikun Gao 			  SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
9544ed5116aSLikun Gao 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
9554ed5116aSLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
9564ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
9574ed5116aSLikun Gao 	amdgpu_ring_write(ring, 0xDEADBEEF);
9584ed5116aSLikun Gao 	amdgpu_ring_commit(ring);
9594ed5116aSLikun Gao 
9604ed5116aSLikun Gao 	for (i = 0; i < adev->usec_timeout; i++) {
9614ed5116aSLikun Gao 		tmp = le32_to_cpu(adev->wb.wb[index]);
9624ed5116aSLikun Gao 		if (tmp == 0xDEADBEEF)
9634ed5116aSLikun Gao 			break;
9644ed5116aSLikun Gao 		if (amdgpu_emu_mode == 1)
9654ed5116aSLikun Gao 			msleep(1);
9664ed5116aSLikun Gao 		else
9674ed5116aSLikun Gao 			udelay(1);
9684ed5116aSLikun Gao 	}
9694ed5116aSLikun Gao 
9704ed5116aSLikun Gao 	if (i >= adev->usec_timeout)
9714ed5116aSLikun Gao 		r = -ETIMEDOUT;
9724ed5116aSLikun Gao 
9734ed5116aSLikun Gao 	amdgpu_device_wb_free(adev, index);
9744ed5116aSLikun Gao 
9754ed5116aSLikun Gao 	return r;
9764ed5116aSLikun Gao }
9774ed5116aSLikun Gao 
9784ed5116aSLikun Gao /**
9794ed5116aSLikun Gao  * sdma_v7_1_ring_test_ib - test an IB on the DMA engine
9804ed5116aSLikun Gao  *
9814ed5116aSLikun Gao  * @ring: amdgpu_ring structure holding ring information
9824ed5116aSLikun Gao  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
9834ed5116aSLikun Gao  *
9844ed5116aSLikun Gao  * Test a simple IB in the DMA ring.
9854ed5116aSLikun Gao  * Returns 0 on success, error on failure.
9864ed5116aSLikun Gao  */
9874ed5116aSLikun Gao static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
9884ed5116aSLikun Gao {
9894ed5116aSLikun Gao 	struct amdgpu_device *adev = ring->adev;
9904ed5116aSLikun Gao 	struct amdgpu_ib ib;
9914ed5116aSLikun Gao 	struct dma_fence *f = NULL;
9924ed5116aSLikun Gao 	unsigned index;
9934ed5116aSLikun Gao 	long r;
9944ed5116aSLikun Gao 	u32 tmp = 0;
9954ed5116aSLikun Gao 	u64 gpu_addr;
9964ed5116aSLikun Gao 
9974ed5116aSLikun Gao 	tmp = 0xCAFEDEAD;
9984ed5116aSLikun Gao 	memset(&ib, 0, sizeof(ib));
9994ed5116aSLikun Gao 
10004ed5116aSLikun Gao 	r = amdgpu_device_wb_get(adev, &index);
10014ed5116aSLikun Gao 	if (r) {
10024ed5116aSLikun Gao 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
10034ed5116aSLikun Gao 		return r;
10044ed5116aSLikun Gao 	}
10054ed5116aSLikun Gao 
10064ed5116aSLikun Gao 	gpu_addr = adev->wb.gpu_addr + (index * 4);
10074ed5116aSLikun Gao 	adev->wb.wb[index] = cpu_to_le32(tmp);
10084ed5116aSLikun Gao 
10094ed5116aSLikun Gao 	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
10104ed5116aSLikun Gao 	if (r) {
10114ed5116aSLikun Gao 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
10124ed5116aSLikun Gao 		goto err0;
10134ed5116aSLikun Gao 	}
10144ed5116aSLikun Gao 
10154ed5116aSLikun Gao 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
10164ed5116aSLikun Gao 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
10174ed5116aSLikun Gao 	ib.ptr[1] = lower_32_bits(gpu_addr);
10184ed5116aSLikun Gao 	ib.ptr[2] = upper_32_bits(gpu_addr);
10194ed5116aSLikun Gao 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
10204ed5116aSLikun Gao 	ib.ptr[4] = 0xDEADBEEF;
10214ed5116aSLikun Gao 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
10224ed5116aSLikun Gao 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
10234ed5116aSLikun Gao 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
10244ed5116aSLikun Gao 	ib.length_dw = 8;
10254ed5116aSLikun Gao 
10264ed5116aSLikun Gao 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
10274ed5116aSLikun Gao 	if (r)
10284ed5116aSLikun Gao 		goto err1;
10294ed5116aSLikun Gao 
10304ed5116aSLikun Gao 	r = dma_fence_wait_timeout(f, false, timeout);
10314ed5116aSLikun Gao 	if (r == 0) {
10324ed5116aSLikun Gao 		DRM_ERROR("amdgpu: IB test timed out\n");
10334ed5116aSLikun Gao 		r = -ETIMEDOUT;
10344ed5116aSLikun Gao 		goto err1;
10354ed5116aSLikun Gao 	} else if (r < 0) {
10364ed5116aSLikun Gao 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
10374ed5116aSLikun Gao 		goto err1;
10384ed5116aSLikun Gao 	}
10394ed5116aSLikun Gao 
10404ed5116aSLikun Gao 	tmp = le32_to_cpu(adev->wb.wb[index]);
10414ed5116aSLikun Gao 
10424ed5116aSLikun Gao 	if (tmp == 0xDEADBEEF)
10434ed5116aSLikun Gao 		r = 0;
10444ed5116aSLikun Gao 	else
10454ed5116aSLikun Gao 		r = -EINVAL;
10464ed5116aSLikun Gao 
10474ed5116aSLikun Gao err1:
10484ed5116aSLikun Gao 	amdgpu_ib_free(&ib, NULL);
10494ed5116aSLikun Gao 	dma_fence_put(f);
10504ed5116aSLikun Gao err0:
10514ed5116aSLikun Gao 	amdgpu_device_wb_free(adev, index);
10524ed5116aSLikun Gao 	return r;
10534ed5116aSLikun Gao }
10544ed5116aSLikun Gao 
10554ed5116aSLikun Gao 
10564ed5116aSLikun Gao /**
10574ed5116aSLikun Gao  * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART
10584ed5116aSLikun Gao  *
10594ed5116aSLikun Gao  * @ib: indirect buffer to fill with commands
10604ed5116aSLikun Gao  * @pe: addr of the page entry
10614ed5116aSLikun Gao  * @src: src addr to copy from
10624ed5116aSLikun Gao  * @count: number of page entries to update
10634ed5116aSLikun Gao  *
10644ed5116aSLikun Gao  * Update PTEs by copying them from the GART using sDMA.
10654ed5116aSLikun Gao  */
10664ed5116aSLikun Gao static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib,
10674ed5116aSLikun Gao 				  uint64_t pe, uint64_t src,
10684ed5116aSLikun Gao 				  unsigned count)
10694ed5116aSLikun Gao {
10704ed5116aSLikun Gao 	unsigned bytes = count * 8;
10714ed5116aSLikun Gao 
10724ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
10734ed5116aSLikun Gao 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
10744ed5116aSLikun Gao 
10754ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = bytes - 1;
10764ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
10774ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
10784ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
10794ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
10804ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
10814ed5116aSLikun Gao 
10824ed5116aSLikun Gao }
10834ed5116aSLikun Gao 
10844ed5116aSLikun Gao /**
10854ed5116aSLikun Gao  * sdma_v7_1_vm_write_pte - update PTEs by writing them manually
10864ed5116aSLikun Gao  *
10874ed5116aSLikun Gao  * @ib: indirect buffer to fill with commands
10884ed5116aSLikun Gao  * @pe: addr of the page entry
10894ed5116aSLikun Gao  * @value: dst addr to write into pe
10904ed5116aSLikun Gao  * @count: number of page entries to update
10914ed5116aSLikun Gao  * @incr: increase next addr by incr bytes
10924ed5116aSLikun Gao  *
10934ed5116aSLikun Gao  * Update PTEs by writing them manually using sDMA.
10944ed5116aSLikun Gao  */
10954ed5116aSLikun Gao static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
10964ed5116aSLikun Gao 				   uint64_t value, unsigned count,
10974ed5116aSLikun Gao 				   uint32_t incr)
10984ed5116aSLikun Gao {
10994ed5116aSLikun Gao 	unsigned ndw = count * 2;
11004ed5116aSLikun Gao 
11014ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
11024ed5116aSLikun Gao 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
11034ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
11044ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
11054ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = ndw - 1;
11064ed5116aSLikun Gao 	for (; ndw > 0; ndw -= 2) {
11074ed5116aSLikun Gao 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
11084ed5116aSLikun Gao 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
11094ed5116aSLikun Gao 		value += incr;
11104ed5116aSLikun Gao 	}
11114ed5116aSLikun Gao }
11124ed5116aSLikun Gao 
11134ed5116aSLikun Gao /**
11144ed5116aSLikun Gao  * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA
11154ed5116aSLikun Gao  *
11164ed5116aSLikun Gao  * @ib: indirect buffer to fill with commands
11174ed5116aSLikun Gao  * @pe: addr of the page entry
11184ed5116aSLikun Gao  * @addr: dst addr to write into pe
11194ed5116aSLikun Gao  * @count: number of page entries to update
11204ed5116aSLikun Gao  * @incr: increase next addr by incr bytes
11214ed5116aSLikun Gao  * @flags: access flags
11224ed5116aSLikun Gao  *
11234ed5116aSLikun Gao  * Update the page tables using sDMA.
11244ed5116aSLikun Gao  */
11254ed5116aSLikun Gao static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
11264ed5116aSLikun Gao 				     uint64_t pe,
11274ed5116aSLikun Gao 				     uint64_t addr, unsigned count,
11284ed5116aSLikun Gao 				     uint32_t incr, uint64_t flags)
11294ed5116aSLikun Gao {
11304ed5116aSLikun Gao 	/* for physically contiguous pages (vram) */
11311c85f126SMukul Joshi 	u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
11321c85f126SMukul Joshi 
11331c85f126SMukul Joshi 	if (amdgpu_mtype_local)
11341c85f126SMukul Joshi 		header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3);
11351c85f126SMukul Joshi 	else
11361c85f126SMukul Joshi 		header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) |
11371c85f126SMukul Joshi 			   SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
11381c85f126SMukul Joshi 			   SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3));
11391c85f126SMukul Joshi 
11401c85f126SMukul Joshi 	ib->ptr[ib->length_dw++] = header;
11414ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
11424ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
11434ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
11444ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
11454ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
11464ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
11474ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = incr; /* increment size */
11484ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = 0;
11494ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
11504ed5116aSLikun Gao }
11514ed5116aSLikun Gao 
11524ed5116aSLikun Gao /**
11534ed5116aSLikun Gao  * sdma_v7_1_ring_pad_ib - pad the IB
11544ed5116aSLikun Gao  *
11554ed5116aSLikun Gao  * @ring: amdgpu ring pointer
11564ed5116aSLikun Gao  * @ib: indirect buffer to fill with padding
11574ed5116aSLikun Gao  *
11584ed5116aSLikun Gao  * Pad the IB with NOPs to a boundary multiple of 8.
11594ed5116aSLikun Gao  */
11604ed5116aSLikun Gao static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
11614ed5116aSLikun Gao {
11624ed5116aSLikun Gao 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
11634ed5116aSLikun Gao 	u32 pad_count;
11644ed5116aSLikun Gao 	int i;
11654ed5116aSLikun Gao 
11664ed5116aSLikun Gao 	pad_count = (-ib->length_dw) & 0x7;
11674ed5116aSLikun Gao 	for (i = 0; i < pad_count; i++)
11684ed5116aSLikun Gao 		if (sdma && sdma->burst_nop && (i == 0))
11694ed5116aSLikun Gao 			ib->ptr[ib->length_dw++] =
11704ed5116aSLikun Gao 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
11714ed5116aSLikun Gao 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
11724ed5116aSLikun Gao 		else
11734ed5116aSLikun Gao 			ib->ptr[ib->length_dw++] =
11744ed5116aSLikun Gao 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
11754ed5116aSLikun Gao }
11764ed5116aSLikun Gao 
11774ed5116aSLikun Gao /**
11784ed5116aSLikun Gao  * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline
11794ed5116aSLikun Gao  *
11804ed5116aSLikun Gao  * @ring: amdgpu_ring pointer
11814ed5116aSLikun Gao  *
11824ed5116aSLikun Gao  * Make sure all previous operations are completed (CIK).
11834ed5116aSLikun Gao  */
11844ed5116aSLikun Gao static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
11854ed5116aSLikun Gao {
11864ed5116aSLikun Gao 	uint32_t seq = ring->fence_drv.sync_seq;
11874ed5116aSLikun Gao 	uint64_t addr = ring->fence_drv.gpu_addr;
11884ed5116aSLikun Gao 
11894ed5116aSLikun Gao 	/* wait for idle */
11904ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
11914ed5116aSLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
11924ed5116aSLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
11934ed5116aSLikun Gao 	amdgpu_ring_write(ring, addr & 0xfffffffc);
11944ed5116aSLikun Gao 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
11954ed5116aSLikun Gao 	amdgpu_ring_write(ring, seq); /* reference */
11964ed5116aSLikun Gao 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
11974ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
11984ed5116aSLikun Gao 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
11994ed5116aSLikun Gao }
12004ed5116aSLikun Gao 
12014ed5116aSLikun Gao /**
12024ed5116aSLikun Gao  * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA
12034ed5116aSLikun Gao  *
12044ed5116aSLikun Gao  * @ring: amdgpu_ring pointer
12054ed5116aSLikun Gao  * @vmid: vmid number to use
12064ed5116aSLikun Gao  * @pd_addr: address
12074ed5116aSLikun Gao  *
12084ed5116aSLikun Gao  * Update the page table base and flush the VM TLB
12094ed5116aSLikun Gao  * using sDMA.
12104ed5116aSLikun Gao  */
12114ed5116aSLikun Gao static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
12124ed5116aSLikun Gao 					 unsigned vmid, uint64_t pd_addr)
12134ed5116aSLikun Gao {
12144ed5116aSLikun Gao 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
12154ed5116aSLikun Gao }
12164ed5116aSLikun Gao 
12174ed5116aSLikun Gao static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring,
12184ed5116aSLikun Gao 				     uint32_t reg, uint32_t val)
12194ed5116aSLikun Gao {
12204ed5116aSLikun Gao 	/* SRBM WRITE command will not support on sdma v7.
12214ed5116aSLikun Gao 	 * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
12224ed5116aSLikun Gao 	 */
12234ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
1224fcc4fc75SLikun Gao 	amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
12254ed5116aSLikun Gao 	amdgpu_ring_write(ring, val);
12264ed5116aSLikun Gao }
12274ed5116aSLikun Gao 
12284ed5116aSLikun Gao static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
12294ed5116aSLikun Gao 					 uint32_t val, uint32_t mask)
12304ed5116aSLikun Gao {
12314ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
12324ed5116aSLikun Gao 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1233fcc4fc75SLikun Gao 	amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
12344ed5116aSLikun Gao 	amdgpu_ring_write(ring, 0);
12354ed5116aSLikun Gao 	amdgpu_ring_write(ring, val); /* reference */
12364ed5116aSLikun Gao 	amdgpu_ring_write(ring, mask); /* mask */
12374ed5116aSLikun Gao 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
12384ed5116aSLikun Gao 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
12394ed5116aSLikun Gao }
12404ed5116aSLikun Gao 
12414ed5116aSLikun Gao static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
12424ed5116aSLikun Gao 						   uint32_t reg0, uint32_t reg1,
12434ed5116aSLikun Gao 						   uint32_t ref, uint32_t mask)
12444ed5116aSLikun Gao {
12454ed5116aSLikun Gao 	amdgpu_ring_emit_wreg(ring, reg0, ref);
12464ed5116aSLikun Gao 	/* wait for a cycle to reset vm_inv_eng*_ack */
12474ed5116aSLikun Gao 	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
12484ed5116aSLikun Gao 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
12494ed5116aSLikun Gao }
12504ed5116aSLikun Gao 
12514ed5116aSLikun Gao static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
12524ed5116aSLikun Gao {
12534ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
12544ed5116aSLikun Gao 	int r;
12554ed5116aSLikun Gao 
12564ed5116aSLikun Gao 	r = amdgpu_sdma_init_microcode(adev, 0, true);
12574ed5116aSLikun Gao 	if (r) {
12584ed5116aSLikun Gao 		DRM_ERROR("Failed to init sdma firmware!\n");
12594ed5116aSLikun Gao 		return r;
12604ed5116aSLikun Gao 	}
12614ed5116aSLikun Gao 
12624ed5116aSLikun Gao 	sdma_v7_1_set_ring_funcs(adev);
12634ed5116aSLikun Gao 	sdma_v7_1_set_buffer_funcs(adev);
12644ed5116aSLikun Gao 	sdma_v7_1_set_vm_pte_funcs(adev);
12654ed5116aSLikun Gao 	sdma_v7_1_set_irq_funcs(adev);
12664ed5116aSLikun Gao 	sdma_v7_1_set_mqd_funcs(adev);
12674ed5116aSLikun Gao 
12684ed5116aSLikun Gao 	return 0;
12694ed5116aSLikun Gao }
12704ed5116aSLikun Gao 
12714ed5116aSLikun Gao static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
12724ed5116aSLikun Gao {
12734ed5116aSLikun Gao 	struct amdgpu_ring *ring;
12744ed5116aSLikun Gao 	int r, i;
12754ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
12764ed5116aSLikun Gao 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
12774ed5116aSLikun Gao 	uint32_t *ptr;
12784ed5116aSLikun Gao 	u32 xcc_id;
12794ed5116aSLikun Gao 
12804ed5116aSLikun Gao 	/* SDMA trap event */
1281db9ca58eSHawking Zhang 	r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX,
1282e50a6eceSHawking Zhang 			      GFX_12_1_0__SRCID__SDMA_TRAP,
12834ed5116aSLikun Gao 			      &adev->sdma.trap_irq);
12844ed5116aSLikun Gao 	if (r)
12854ed5116aSLikun Gao 		return r;
12864ed5116aSLikun Gao 
12874ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
12884ed5116aSLikun Gao 		ring = &adev->sdma.instance[i].ring;
12894ed5116aSLikun Gao 		ring->ring_obj = NULL;
12904ed5116aSLikun Gao 		ring->use_doorbell = true;
12914ed5116aSLikun Gao 		ring->me = i;
129249f47cbfSLikun Gao 
129349f47cbfSLikun Gao 		for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) {
129449f47cbfSLikun Gao 			if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id))
129549f47cbfSLikun Gao 				break;
129649f47cbfSLikun Gao 		}
12974ed5116aSLikun Gao 
12984ed5116aSLikun Gao 		DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
129949f47cbfSLikun Gao 				xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc,
13004ed5116aSLikun Gao 				ring->use_doorbell?"true":"false");
13014ed5116aSLikun Gao 
13024ed5116aSLikun Gao 		ring->doorbell_index =
13034ed5116aSLikun Gao 			(adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
13044ed5116aSLikun Gao 
13054ed5116aSLikun Gao 		ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
13064ed5116aSLikun Gao 		sprintf(ring->name, "sdma%d.%d", xcc_id,
130749f47cbfSLikun Gao 				GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc);
13084ed5116aSLikun Gao 		r = amdgpu_ring_init(adev, ring, 1024,
13094ed5116aSLikun Gao 				     &adev->sdma.trap_irq,
13104ed5116aSLikun Gao 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
13114ed5116aSLikun Gao 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
13124ed5116aSLikun Gao 		if (r)
13134ed5116aSLikun Gao 			return r;
13144ed5116aSLikun Gao 	}
13154ed5116aSLikun Gao 
13164ed5116aSLikun Gao 	adev->sdma.supported_reset =
13174ed5116aSLikun Gao 		amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
13184ed5116aSLikun Gao 	if (!amdgpu_sriov_vf(adev) &&
13194ed5116aSLikun Gao 	    !adev->debug_disable_gpu_ring_reset)
13204ed5116aSLikun Gao 		adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
13214ed5116aSLikun Gao 
13224ed5116aSLikun Gao 	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
13234ed5116aSLikun Gao 	if (r)
13244ed5116aSLikun Gao 		return r;
13254ed5116aSLikun Gao 	/* Allocate memory for SDMA IP Dump buffer */
13264ed5116aSLikun Gao 	ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
13274ed5116aSLikun Gao 	if (ptr)
13284ed5116aSLikun Gao 		adev->sdma.ip_dump = ptr;
13294ed5116aSLikun Gao 	else
13304ed5116aSLikun Gao 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
13314ed5116aSLikun Gao 
13324ed5116aSLikun Gao #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
13334ed5116aSLikun Gao 	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
13344ed5116aSLikun Gao #endif
13354ed5116aSLikun Gao 
13364ed5116aSLikun Gao 	return r;
13374ed5116aSLikun Gao }
13384ed5116aSLikun Gao 
13394ed5116aSLikun Gao static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
13404ed5116aSLikun Gao {
13414ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
13424ed5116aSLikun Gao 	int i;
13434ed5116aSLikun Gao 
13444ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++)
13454ed5116aSLikun Gao 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
13464ed5116aSLikun Gao 
13474ed5116aSLikun Gao 	amdgpu_sdma_sysfs_reset_mask_fini(adev);
13484ed5116aSLikun Gao 	amdgpu_sdma_destroy_inst_ctx(adev, true);
13494ed5116aSLikun Gao 
13504ed5116aSLikun Gao 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
135149f47cbfSLikun Gao 		sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask);
13524ed5116aSLikun Gao 
13534ed5116aSLikun Gao 	kfree(adev->sdma.ip_dump);
13544ed5116aSLikun Gao 
13554ed5116aSLikun Gao 	return 0;
13564ed5116aSLikun Gao }
13574ed5116aSLikun Gao 
13584ed5116aSLikun Gao static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
13594ed5116aSLikun Gao {
13604ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
136105282873SLikun Gao 	uint32_t inst_mask;
13624ed5116aSLikun Gao 
136305282873SLikun Gao 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
136405282873SLikun Gao 
136505282873SLikun Gao 	return sdma_v7_1_inst_start(adev, inst_mask);
13664ed5116aSLikun Gao }
13674ed5116aSLikun Gao 
13684ed5116aSLikun Gao static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
13694ed5116aSLikun Gao {
13704ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
13714ed5116aSLikun Gao 
13724ed5116aSLikun Gao 	if (amdgpu_sriov_vf(adev))
13734ed5116aSLikun Gao 		return 0;
13744ed5116aSLikun Gao 
137549f47cbfSLikun Gao 	sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask);
137649f47cbfSLikun Gao 	sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask);
13774ed5116aSLikun Gao 
13784ed5116aSLikun Gao 	return 0;
13794ed5116aSLikun Gao }
13804ed5116aSLikun Gao 
13814ed5116aSLikun Gao static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block)
13824ed5116aSLikun Gao {
13834ed5116aSLikun Gao 	return sdma_v7_1_hw_fini(ip_block);
13844ed5116aSLikun Gao }
13854ed5116aSLikun Gao 
13864ed5116aSLikun Gao static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block)
13874ed5116aSLikun Gao {
13884ed5116aSLikun Gao 	return sdma_v7_1_hw_init(ip_block);
13894ed5116aSLikun Gao }
13904ed5116aSLikun Gao 
13914ed5116aSLikun Gao static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block)
13924ed5116aSLikun Gao {
13934ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
13944ed5116aSLikun Gao 	u32 i;
13954ed5116aSLikun Gao 
13964ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
13974ed5116aSLikun Gao 		u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
13984ed5116aSLikun Gao 
13994ed5116aSLikun Gao 		if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
14004ed5116aSLikun Gao 			return false;
14014ed5116aSLikun Gao 	}
14024ed5116aSLikun Gao 
14034ed5116aSLikun Gao 	return true;
14044ed5116aSLikun Gao }
14054ed5116aSLikun Gao 
14064ed5116aSLikun Gao static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
14074ed5116aSLikun Gao {
14084ed5116aSLikun Gao 	unsigned i, j;
14094ed5116aSLikun Gao 	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
14104ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
14114ed5116aSLikun Gao 
14124ed5116aSLikun Gao 	for (i = 0; i < adev->usec_timeout; i++) {
14134ed5116aSLikun Gao 		for (j = 0; j < adev->sdma.num_instances; j++) {
14144ed5116aSLikun Gao 			sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev,
14154ed5116aSLikun Gao 						j, regSDMA0_SDMA_STATUS_REG));
14164ed5116aSLikun Gao 			if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
14174ed5116aSLikun Gao 				break;
14184ed5116aSLikun Gao 		}
14194ed5116aSLikun Gao 		if (j == adev->sdma.num_instances)
14204ed5116aSLikun Gao 			return 0;
14214ed5116aSLikun Gao 		udelay(1);
14224ed5116aSLikun Gao 	}
14234ed5116aSLikun Gao 	return -ETIMEDOUT;
14244ed5116aSLikun Gao }
14254ed5116aSLikun Gao 
14264ed5116aSLikun Gao static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring)
14274ed5116aSLikun Gao {
14284ed5116aSLikun Gao 	int i, r = 0;
14294ed5116aSLikun Gao 	struct amdgpu_device *adev = ring->adev;
14304ed5116aSLikun Gao 	u32 index = 0;
14314ed5116aSLikun Gao 	u64 sdma_gfx_preempt;
14324ed5116aSLikun Gao 
14334ed5116aSLikun Gao 	amdgpu_sdma_get_index_from_ring(ring, &index);
14344ed5116aSLikun Gao 	sdma_gfx_preempt =
14354ed5116aSLikun Gao 		sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT);
14364ed5116aSLikun Gao 
14374ed5116aSLikun Gao 	/* assert preemption condition */
14384ed5116aSLikun Gao 	amdgpu_ring_set_preempt_cond_exec(ring, false);
14394ed5116aSLikun Gao 
14404ed5116aSLikun Gao 	/* emit the trailing fence */
14414ed5116aSLikun Gao 	ring->trail_seq += 1;
14424ed5116aSLikun Gao 	r = amdgpu_ring_alloc(ring, 10);
14434ed5116aSLikun Gao 	if (r) {
14444ed5116aSLikun Gao 		DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
14454ed5116aSLikun Gao 		return r;
14464ed5116aSLikun Gao 	}
14474ed5116aSLikun Gao 	sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
14484ed5116aSLikun Gao 				  ring->trail_seq, 0);
14494ed5116aSLikun Gao 	amdgpu_ring_commit(ring);
14504ed5116aSLikun Gao 
14514ed5116aSLikun Gao 	/* assert IB preemption */
14524ed5116aSLikun Gao 	WREG32(sdma_gfx_preempt, 1);
14534ed5116aSLikun Gao 
14544ed5116aSLikun Gao 	/* poll the trailing fence */
14554ed5116aSLikun Gao 	for (i = 0; i < adev->usec_timeout; i++) {
14564ed5116aSLikun Gao 		if (ring->trail_seq ==
14574ed5116aSLikun Gao 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
14584ed5116aSLikun Gao 			break;
14594ed5116aSLikun Gao 		udelay(1);
14604ed5116aSLikun Gao 	}
14614ed5116aSLikun Gao 
14624ed5116aSLikun Gao 	if (i >= adev->usec_timeout) {
14634ed5116aSLikun Gao 		r = -EINVAL;
14644ed5116aSLikun Gao 		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
14654ed5116aSLikun Gao 	}
14664ed5116aSLikun Gao 
14674ed5116aSLikun Gao 	/* deassert IB preemption */
14684ed5116aSLikun Gao 	WREG32(sdma_gfx_preempt, 0);
14694ed5116aSLikun Gao 
14704ed5116aSLikun Gao 	/* deassert the preemption condition */
14714ed5116aSLikun Gao 	amdgpu_ring_set_preempt_cond_exec(ring, true);
14724ed5116aSLikun Gao 	return r;
14734ed5116aSLikun Gao }
14744ed5116aSLikun Gao 
14754ed5116aSLikun Gao static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev,
14764ed5116aSLikun Gao 					struct amdgpu_irq_src *source,
14774ed5116aSLikun Gao 					unsigned type,
14784ed5116aSLikun Gao 					enum amdgpu_interrupt_state state)
14794ed5116aSLikun Gao {
14804ed5116aSLikun Gao 	u32 sdma_cntl;
14814ed5116aSLikun Gao 
14824ed5116aSLikun Gao 	u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL);
14834ed5116aSLikun Gao 
14844ed5116aSLikun Gao 	sdma_cntl = RREG32(reg_offset);
14854ed5116aSLikun Gao 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE,
14864ed5116aSLikun Gao 		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
14874ed5116aSLikun Gao 	WREG32(reg_offset, sdma_cntl);
14884ed5116aSLikun Gao 
14894ed5116aSLikun Gao 	return 0;
14904ed5116aSLikun Gao }
14914ed5116aSLikun Gao 
14924ed5116aSLikun Gao static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
14934ed5116aSLikun Gao 				      struct amdgpu_irq_src *source,
14944ed5116aSLikun Gao 				      struct amdgpu_iv_entry *entry)
14954ed5116aSLikun Gao {
149649f47cbfSLikun Gao 	int inst, instances, queue, xcc_id = 0;
14974ed5116aSLikun Gao 
14984ed5116aSLikun Gao 	DRM_DEBUG("IH: SDMA trap\n");
14994ed5116aSLikun Gao 
1500*f7e06786STvrtko Ursulin 	if (drm_WARN_ON_ONCE(&adev->ddev,
1501*f7e06786STvrtko Ursulin 			     adev->enable_mes &&
1502*f7e06786STvrtko Ursulin 			     (entry->src_data[0] & AMDGPU_FENCE_MES_QUEUE_FLAG)))
15034ed5116aSLikun Gao 		return 0;
15044ed5116aSLikun Gao 
15054ed5116aSLikun Gao 	queue = entry->ring_id & 0xf;
15064ed5116aSLikun Gao 	if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc)
15074ed5116aSLikun Gao 		xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
15084ed5116aSLikun Gao 	else
15094ed5116aSLikun Gao 		dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
151049f47cbfSLikun Gao 	inst = ((entry->ring_id & 0xf0) >> 4) +
151149f47cbfSLikun Gao 		GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc;
151249f47cbfSLikun Gao 	for (instances = 0; instances < adev->sdma.num_instances; instances++) {
151349f47cbfSLikun Gao 		if (inst == GET_INST(SDMA0, instances))
151449f47cbfSLikun Gao 			break;
151549f47cbfSLikun Gao 	}
15164ed5116aSLikun Gao 	if (instances > adev->sdma.num_instances - 1) {
15174ed5116aSLikun Gao 		DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
15184ed5116aSLikun Gao 		return -EINVAL;
15194ed5116aSLikun Gao 	}
15204ed5116aSLikun Gao 
15214ed5116aSLikun Gao 	switch (entry->client_id) {
1522db9ca58eSHawking Zhang 	case SOC_V1_0_IH_CLIENTID_GFX:
15234ed5116aSLikun Gao 		switch (queue) {
15244ed5116aSLikun Gao 		case 0:
15254ed5116aSLikun Gao 			amdgpu_fence_process(&adev->sdma.instance[instances].ring);
15264ed5116aSLikun Gao 			break;
15274ed5116aSLikun Gao 		default:
15284ed5116aSLikun Gao 			break;
15294ed5116aSLikun Gao 		}
15304ed5116aSLikun Gao 		break;
15314ed5116aSLikun Gao 	}
15324ed5116aSLikun Gao 	return 0;
15334ed5116aSLikun Gao }
15344ed5116aSLikun Gao 
15354ed5116aSLikun Gao static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev,
15364ed5116aSLikun Gao 					      struct amdgpu_irq_src *source,
15374ed5116aSLikun Gao 					      struct amdgpu_iv_entry *entry)
15384ed5116aSLikun Gao {
15394ed5116aSLikun Gao 	return 0;
15404ed5116aSLikun Gao }
15414ed5116aSLikun Gao 
15424ed5116aSLikun Gao static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
15434ed5116aSLikun Gao 					   enum amd_clockgating_state state)
15444ed5116aSLikun Gao {
15454ed5116aSLikun Gao 	return 0;
15464ed5116aSLikun Gao }
15474ed5116aSLikun Gao 
15484ed5116aSLikun Gao static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
15494ed5116aSLikun Gao 					  enum amd_powergating_state state)
15504ed5116aSLikun Gao {
15514ed5116aSLikun Gao 	return 0;
15524ed5116aSLikun Gao }
15534ed5116aSLikun Gao 
15544ed5116aSLikun Gao static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block,
15554ed5116aSLikun Gao 					    u64 *flags)
15564ed5116aSLikun Gao {
15574ed5116aSLikun Gao }
15584ed5116aSLikun Gao 
15594ed5116aSLikun Gao static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
15604ed5116aSLikun Gao {
15614ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
15624ed5116aSLikun Gao 	int i, j;
15634ed5116aSLikun Gao 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
15644ed5116aSLikun Gao 	uint32_t instance_offset;
15654ed5116aSLikun Gao 
15664ed5116aSLikun Gao 	if (!adev->sdma.ip_dump)
15674ed5116aSLikun Gao 		return;
15684ed5116aSLikun Gao 
15694ed5116aSLikun Gao 	drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
15704ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
15714ed5116aSLikun Gao 		instance_offset = i * reg_count;
15724ed5116aSLikun Gao 		drm_printf(p, "\nInstance:%d\n", i);
15734ed5116aSLikun Gao 
15744ed5116aSLikun Gao 		for (j = 0; j < reg_count; j++)
15754ed5116aSLikun Gao 			drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name,
15764ed5116aSLikun Gao 				   adev->sdma.ip_dump[instance_offset + j]);
15774ed5116aSLikun Gao 	}
15784ed5116aSLikun Gao }
15794ed5116aSLikun Gao 
15804ed5116aSLikun Gao static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block)
15814ed5116aSLikun Gao {
15824ed5116aSLikun Gao 	struct amdgpu_device *adev = ip_block->adev;
15834ed5116aSLikun Gao 	int i, j;
15844ed5116aSLikun Gao 	uint32_t instance_offset;
15854ed5116aSLikun Gao 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
15864ed5116aSLikun Gao 
15874ed5116aSLikun Gao 	if (!adev->sdma.ip_dump)
15884ed5116aSLikun Gao 		return;
15894ed5116aSLikun Gao 
15904ed5116aSLikun Gao 	amdgpu_gfx_off_ctrl(adev, false);
15914ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
15924ed5116aSLikun Gao 		instance_offset = i * reg_count;
15934ed5116aSLikun Gao 		for (j = 0; j < reg_count; j++)
15944ed5116aSLikun Gao 			adev->sdma.ip_dump[instance_offset + j] =
15954ed5116aSLikun Gao 				RREG32(sdma_v7_1_get_reg_offset(adev, i,
15964ed5116aSLikun Gao 				       sdma_reg_list_7_1[j].reg_offset));
15974ed5116aSLikun Gao 	}
15984ed5116aSLikun Gao 	amdgpu_gfx_off_ctrl(adev, true);
15994ed5116aSLikun Gao }
16004ed5116aSLikun Gao 
16014ed5116aSLikun Gao const struct amd_ip_funcs sdma_v7_1_ip_funcs = {
16024ed5116aSLikun Gao 	.name = "sdma_v7_1",
16034ed5116aSLikun Gao 	.early_init = sdma_v7_1_early_init,
16044ed5116aSLikun Gao 	.late_init = NULL,
16054ed5116aSLikun Gao 	.sw_init = sdma_v7_1_sw_init,
16064ed5116aSLikun Gao 	.sw_fini = sdma_v7_1_sw_fini,
16074ed5116aSLikun Gao 	.hw_init = sdma_v7_1_hw_init,
16084ed5116aSLikun Gao 	.hw_fini = sdma_v7_1_hw_fini,
16094ed5116aSLikun Gao 	.suspend = sdma_v7_1_suspend,
16104ed5116aSLikun Gao 	.resume = sdma_v7_1_resume,
16114ed5116aSLikun Gao 	.is_idle = sdma_v7_1_is_idle,
16124ed5116aSLikun Gao 	.wait_for_idle = sdma_v7_1_wait_for_idle,
16134ed5116aSLikun Gao 	.soft_reset = sdma_v7_1_soft_reset,
16144ed5116aSLikun Gao 	.check_soft_reset = sdma_v7_1_check_soft_reset,
16154ed5116aSLikun Gao 	.set_clockgating_state = sdma_v7_1_set_clockgating_state,
16164ed5116aSLikun Gao 	.set_powergating_state = sdma_v7_1_set_powergating_state,
16174ed5116aSLikun Gao 	.get_clockgating_state = sdma_v7_1_get_clockgating_state,
16184ed5116aSLikun Gao 	.dump_ip_state = sdma_v7_1_dump_ip_state,
16194ed5116aSLikun Gao 	.print_ip_state = sdma_v7_1_print_ip_state,
16204ed5116aSLikun Gao };
16214ed5116aSLikun Gao 
16224ed5116aSLikun Gao static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = {
16234ed5116aSLikun Gao 	.type = AMDGPU_RING_TYPE_SDMA,
16244ed5116aSLikun Gao 	.align_mask = 0xf,
16254ed5116aSLikun Gao 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
16264ed5116aSLikun Gao 	.support_64bit_ptrs = true,
16274ed5116aSLikun Gao 	.secure_submission_supported = true,
16284ed5116aSLikun Gao 	.get_rptr = sdma_v7_1_ring_get_rptr,
16294ed5116aSLikun Gao 	.get_wptr = sdma_v7_1_ring_get_wptr,
16304ed5116aSLikun Gao 	.set_wptr = sdma_v7_1_ring_set_wptr,
16314ed5116aSLikun Gao 	.emit_frame_size =
16324ed5116aSLikun Gao 		5 + /* sdma_v7_1_ring_init_cond_exec */
16334ed5116aSLikun Gao 		6 + /* sdma_v7_1_ring_emit_pipeline_sync */
16344ed5116aSLikun Gao 		/* sdma_v7_1_ring_emit_vm_flush */
16354ed5116aSLikun Gao 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
16364ed5116aSLikun Gao 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
16374ed5116aSLikun Gao 		10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */
16384ed5116aSLikun Gao 	.emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */
16394ed5116aSLikun Gao 	.emit_ib = sdma_v7_1_ring_emit_ib,
16404ed5116aSLikun Gao 	.emit_mem_sync = sdma_v7_1_ring_emit_mem_sync,
16414ed5116aSLikun Gao 	.emit_fence = sdma_v7_1_ring_emit_fence,
16424ed5116aSLikun Gao 	.emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync,
16434ed5116aSLikun Gao 	.emit_vm_flush = sdma_v7_1_ring_emit_vm_flush,
16444ed5116aSLikun Gao 	.test_ring = sdma_v7_1_ring_test_ring,
16454ed5116aSLikun Gao 	.test_ib = sdma_v7_1_ring_test_ib,
16464ed5116aSLikun Gao 	.insert_nop = sdma_v7_1_ring_insert_nop,
16474ed5116aSLikun Gao 	.pad_ib = sdma_v7_1_ring_pad_ib,
16484ed5116aSLikun Gao 	.emit_wreg = sdma_v7_1_ring_emit_wreg,
16494ed5116aSLikun Gao 	.emit_reg_wait = sdma_v7_1_ring_emit_reg_wait,
16504ed5116aSLikun Gao 	.emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait,
16514ed5116aSLikun Gao 	.init_cond_exec = sdma_v7_1_ring_init_cond_exec,
16524ed5116aSLikun Gao 	.preempt_ib = sdma_v7_1_ring_preempt_ib,
16534ed5116aSLikun Gao 	.reset = sdma_v7_1_reset_queue,
16544ed5116aSLikun Gao };
16554ed5116aSLikun Gao 
16564ed5116aSLikun Gao static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev)
16574ed5116aSLikun Gao {
16584ed5116aSLikun Gao 	int i, dev_inst;
16594ed5116aSLikun Gao 
16604ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
16614ed5116aSLikun Gao 		adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs;
16624ed5116aSLikun Gao 		adev->sdma.instance[i].ring.me = i;
16634ed5116aSLikun Gao 
16644ed5116aSLikun Gao 		dev_inst = GET_INST(SDMA0, i);
16654ed5116aSLikun Gao 		/* XCC to which SDMA belongs depends on physical instance */
16664ed5116aSLikun Gao 		adev->sdma.instance[i].xcc_id =
16674ed5116aSLikun Gao 			dev_inst / adev->sdma.num_inst_per_xcc;
16684ed5116aSLikun Gao 	}
16694ed5116aSLikun Gao }
16704ed5116aSLikun Gao 
16714ed5116aSLikun Gao static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = {
16724ed5116aSLikun Gao 	.set = sdma_v7_1_set_trap_irq_state,
16734ed5116aSLikun Gao 	.process = sdma_v7_1_process_trap_irq,
16744ed5116aSLikun Gao };
16754ed5116aSLikun Gao 
16764ed5116aSLikun Gao static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = {
16774ed5116aSLikun Gao 	.process = sdma_v7_1_process_illegal_inst_irq,
16784ed5116aSLikun Gao };
16794ed5116aSLikun Gao 
16804ed5116aSLikun Gao static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev)
16814ed5116aSLikun Gao {
16824ed5116aSLikun Gao 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
16834ed5116aSLikun Gao 					adev->sdma.num_instances;
16844ed5116aSLikun Gao 	adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs;
16854ed5116aSLikun Gao 	adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs;
16864ed5116aSLikun Gao }
16874ed5116aSLikun Gao 
16884ed5116aSLikun Gao /**
16894ed5116aSLikun Gao  * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine
16904ed5116aSLikun Gao  *
16914ed5116aSLikun Gao  * @ib: indirect buffer to fill with commands
16924ed5116aSLikun Gao  * @src_offset: src GPU address
16934ed5116aSLikun Gao  * @dst_offset: dst GPU address
16944ed5116aSLikun Gao  * @byte_count: number of bytes to xfer
16954ed5116aSLikun Gao  * @copy_flags: copy flags for the buffers
16964ed5116aSLikun Gao  *
16974ed5116aSLikun Gao  * Copy GPU buffers using the DMA engine.
16984ed5116aSLikun Gao  * Used by the amdgpu ttm implementation to move pages if
16994ed5116aSLikun Gao  * registered as the asic copy callback.
17004ed5116aSLikun Gao  */
17014ed5116aSLikun Gao static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib,
17024ed5116aSLikun Gao 				       uint64_t src_offset,
17034ed5116aSLikun Gao 				       uint64_t dst_offset,
17044ed5116aSLikun Gao 				       uint32_t byte_count,
17054ed5116aSLikun Gao 				       uint32_t copy_flags)
17064ed5116aSLikun Gao {
17074ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
17084ed5116aSLikun Gao 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
17094ed5116aSLikun Gao 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
17104ed5116aSLikun Gao 
17114ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = byte_count - 1;
17124ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
17134ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
17144ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
17154ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
17164ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
17174ed5116aSLikun Gao }
17184ed5116aSLikun Gao 
17194ed5116aSLikun Gao /**
17204ed5116aSLikun Gao  * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine
17214ed5116aSLikun Gao  *
17224ed5116aSLikun Gao  * @ib: indirect buffer to fill
17234ed5116aSLikun Gao  * @src_data: value to write to buffer
17244ed5116aSLikun Gao  * @dst_offset: dst GPU address
17254ed5116aSLikun Gao  * @byte_count: number of bytes to xfer
17264ed5116aSLikun Gao  *
17274ed5116aSLikun Gao  * Fill GPU buffers using the DMA engine.
17284ed5116aSLikun Gao  */
17294ed5116aSLikun Gao static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
17304ed5116aSLikun Gao 				       uint32_t src_data,
17314ed5116aSLikun Gao 				       uint64_t dst_offset,
17324ed5116aSLikun Gao 				       uint32_t byte_count)
17334ed5116aSLikun Gao {
17344ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
17354ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
17364ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
17374ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = src_data;
17384ed5116aSLikun Gao 	ib->ptr[ib->length_dw++] = byte_count - 1;
17394ed5116aSLikun Gao }
17404ed5116aSLikun Gao 
17414ed5116aSLikun Gao static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
17424ed5116aSLikun Gao 	.copy_max_bytes = 0x400000,
17434ed5116aSLikun Gao 	.copy_num_dw = 8,
17444ed5116aSLikun Gao 	.emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
17454ed5116aSLikun Gao 	.fill_max_bytes = 0x400000,
17464ed5116aSLikun Gao 	.fill_num_dw = 5,
17474ed5116aSLikun Gao 	.emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
17484ed5116aSLikun Gao };
17494ed5116aSLikun Gao 
17504ed5116aSLikun Gao static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
17514ed5116aSLikun Gao {
17524ed5116aSLikun Gao 	adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs;
17534ed5116aSLikun Gao 	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
17544ed5116aSLikun Gao }
17554ed5116aSLikun Gao 
17564ed5116aSLikun Gao static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
17574ed5116aSLikun Gao 	.copy_pte_num_dw = 8,
17584ed5116aSLikun Gao 	.copy_pte = sdma_v7_1_vm_copy_pte,
17594ed5116aSLikun Gao 	.write_pte = sdma_v7_1_vm_write_pte,
17604ed5116aSLikun Gao 	.set_pte_pde = sdma_v7_1_vm_set_pte_pde,
17614ed5116aSLikun Gao };
17624ed5116aSLikun Gao 
17634ed5116aSLikun Gao static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev)
17644ed5116aSLikun Gao {
17654ed5116aSLikun Gao 	unsigned i;
17664ed5116aSLikun Gao 
17674ed5116aSLikun Gao 	adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs;
17684ed5116aSLikun Gao 	for (i = 0; i < adev->sdma.num_instances; i++) {
17694ed5116aSLikun Gao 		adev->vm_manager.vm_pte_scheds[i] =
17704ed5116aSLikun Gao 			&adev->sdma.instance[i].ring.sched;
17714ed5116aSLikun Gao 	}
17724ed5116aSLikun Gao 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
17734ed5116aSLikun Gao }
17744ed5116aSLikun Gao 
17754ed5116aSLikun Gao const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
17764ed5116aSLikun Gao 	.type = AMD_IP_BLOCK_TYPE_SDMA,
17774ed5116aSLikun Gao 	.major = 7,
17784ed5116aSLikun Gao 	.minor = 1,
17794ed5116aSLikun Gao 	.rev = 0,
17804ed5116aSLikun Gao 	.funcs = &sdma_v7_1_ip_funcs,
17814ed5116aSLikun Gao };
17824ed5116aSLikun Gao 
17834ed5116aSLikun Gao static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask)
17844ed5116aSLikun Gao {
17854ed5116aSLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
17864ed5116aSLikun Gao 	int r;
17874ed5116aSLikun Gao 
17884ed5116aSLikun Gao 	r = sdma_v7_1_inst_start(adev, inst_mask);
17894ed5116aSLikun Gao 
17904ed5116aSLikun Gao 	return r;
17914ed5116aSLikun Gao }
17924ed5116aSLikun Gao 
17934ed5116aSLikun Gao static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask)
17944ed5116aSLikun Gao {
17954ed5116aSLikun Gao 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
17964ed5116aSLikun Gao 
17974ed5116aSLikun Gao 	sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
17984ed5116aSLikun Gao 	sdma_v7_1_inst_enable(adev, false, inst_mask);
17994ed5116aSLikun Gao 
18004ed5116aSLikun Gao 	return 0;
18014ed5116aSLikun Gao }
18024ed5116aSLikun Gao 
18034ed5116aSLikun Gao struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = {
18044ed5116aSLikun Gao 	.suspend = &sdma_v7_1_xcp_suspend,
18054ed5116aSLikun Gao 	.resume = &sdma_v7_1_xcp_resume
18064ed5116aSLikun Gao };
1807