14ed5116aSLikun Gao /* 24ed5116aSLikun Gao * Copyright 2025 Advanced Micro Devices, Inc. 34ed5116aSLikun Gao * 44ed5116aSLikun Gao * Permission is hereby granted, free of charge, to any person obtaining a 54ed5116aSLikun Gao * copy of this software and associated documentation files (the "Software"), 64ed5116aSLikun Gao * to deal in the Software without restriction, including without limitation 74ed5116aSLikun Gao * the rights to use, copy, modify, merge, publish, distribute, sublicense, 84ed5116aSLikun Gao * and/or sell copies of the Software, and to permit persons to whom the 94ed5116aSLikun Gao * Software is furnished to do so, subject to the following conditions: 104ed5116aSLikun Gao * 114ed5116aSLikun Gao * The above copyright notice and this permission notice shall be included in 124ed5116aSLikun Gao * all copies or substantial portions of the Software. 134ed5116aSLikun Gao * 144ed5116aSLikun Gao * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 154ed5116aSLikun Gao * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 164ed5116aSLikun Gao * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 174ed5116aSLikun Gao * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 184ed5116aSLikun Gao * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 194ed5116aSLikun Gao * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 204ed5116aSLikun Gao * OTHER DEALINGS IN THE SOFTWARE. 214ed5116aSLikun Gao * 224ed5116aSLikun Gao */ 234ed5116aSLikun Gao 244ed5116aSLikun Gao #include <linux/delay.h> 254ed5116aSLikun Gao #include <linux/firmware.h> 264ed5116aSLikun Gao #include <linux/module.h> 274ed5116aSLikun Gao #include <linux/pci.h> 284ed5116aSLikun Gao 294ed5116aSLikun Gao #include "amdgpu.h" 304ed5116aSLikun Gao #include "amdgpu_ucode.h" 314ed5116aSLikun Gao #include "amdgpu_trace.h" 324ed5116aSLikun Gao 334ed5116aSLikun Gao #include "gc/gc_12_1_0_offset.h" 344ed5116aSLikun Gao #include "gc/gc_12_1_0_sh_mask.h" 35e50a6eceSHawking Zhang #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h" 364ed5116aSLikun Gao 374ed5116aSLikun Gao #include "soc15_common.h" 384ed5116aSLikun Gao #include "soc15.h" 394ed5116aSLikun Gao #include "sdma_v7_1_0_pkt_open.h" 404ed5116aSLikun Gao #include "nbio_v4_3.h" 414ed5116aSLikun Gao #include "sdma_common.h" 424ed5116aSLikun Gao #include "sdma_v7_1.h" 434ed5116aSLikun Gao #include "v12_structs.h" 444ed5116aSLikun Gao #include "mes_userqueue.h" 45fcc4fc75SLikun Gao #include "soc_v1_0.h" 464ed5116aSLikun Gao 474ed5116aSLikun Gao MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin"); 484ed5116aSLikun Gao 494ed5116aSLikun Gao #define SDMA1_REG_OFFSET 0x600 504ed5116aSLikun Gao #define SDMA0_SDMA_IDX_0_END 0x450 514ed5116aSLikun Gao #define SDMA1_HYP_DEC_REG_OFFSET 0x30 524ed5116aSLikun Gao 534ed5116aSLikun Gao static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = { 544ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG), 554ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG), 564ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG), 574ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG), 584ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG), 594ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG), 604ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG), 614ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV), 624ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI), 634ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH), 644ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS), 654ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS), 664ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0), 674ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1), 684ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0), 694ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1), 704ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL), 714ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR), 724ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 734ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR), 744ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 754ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET), 764ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO), 774ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI), 784ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL), 794ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR), 804ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN), 814ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG), 824ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0), 834ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL), 844ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR), 854ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI), 864ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR), 874ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI), 884ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET), 894ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO), 904ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI), 914ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR), 924ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN), 934ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG), 944ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL), 954ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR), 964ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI), 974ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR), 984ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI), 994ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET), 1004ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO), 1014ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI), 1024ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR), 1034ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN), 1044ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG), 1054ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS), 1064ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL), 1074ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), 1084ed5116aSLikun Gao SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS), 1094ed5116aSLikun Gao }; 1104ed5116aSLikun Gao 1114ed5116aSLikun Gao static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev); 1124ed5116aSLikun Gao static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev); 1134ed5116aSLikun Gao static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev); 1144ed5116aSLikun Gao static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev); 1154ed5116aSLikun Gao static int sdma_v7_1_inst_start(struct amdgpu_device *adev, 1164ed5116aSLikun Gao uint32_t inst_mask); 1174ed5116aSLikun Gao 1184ed5116aSLikun Gao static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) 1194ed5116aSLikun Gao { 1204ed5116aSLikun Gao u32 base; 1214ed5116aSLikun Gao u32 dev_inst = GET_INST(SDMA0, instance); 1224ed5116aSLikun Gao int xcc_id = adev->sdma.instance[instance].xcc_id; 1234ed5116aSLikun Gao int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc; 1244ed5116aSLikun Gao 1254ed5116aSLikun Gao if (internal_offset >= SDMA0_SDMA_IDX_0_END) { 1264ed5116aSLikun Gao base = adev->reg_offset[GC_HWIP][xcc_id][1]; 1274ed5116aSLikun Gao if (xcc_inst != 0) 1284ed5116aSLikun Gao internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst; 1294ed5116aSLikun Gao } else { 1304ed5116aSLikun Gao base = adev->reg_offset[GC_HWIP][xcc_id][0]; 1314ed5116aSLikun Gao if (xcc_inst != 0) 1324ed5116aSLikun Gao internal_offset += SDMA1_REG_OFFSET * xcc_inst; 1334ed5116aSLikun Gao } 1344ed5116aSLikun Gao 1354ed5116aSLikun Gao return base + internal_offset; 1364ed5116aSLikun Gao } 1374ed5116aSLikun Gao 1384ed5116aSLikun Gao static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring, 1394ed5116aSLikun Gao uint64_t addr) 1404ed5116aSLikun Gao { 1414ed5116aSLikun Gao unsigned ret; 1424ed5116aSLikun Gao 1434ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); 1444ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(addr)); 1454ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr)); 1464ed5116aSLikun Gao amdgpu_ring_write(ring, 1); 1474ed5116aSLikun Gao /* this is the offset we need patch later */ 1484ed5116aSLikun Gao ret = ring->wptr & ring->buf_mask; 1494ed5116aSLikun Gao /* insert dummy here and patch it later */ 1504ed5116aSLikun Gao amdgpu_ring_write(ring, 0); 1514ed5116aSLikun Gao 1524ed5116aSLikun Gao return ret; 1534ed5116aSLikun Gao } 1544ed5116aSLikun Gao 1554ed5116aSLikun Gao /** 1564ed5116aSLikun Gao * sdma_v7_1_ring_get_rptr - get the current read pointer 1574ed5116aSLikun Gao * 1584ed5116aSLikun Gao * @ring: amdgpu ring pointer 1594ed5116aSLikun Gao * 1604ed5116aSLikun Gao * Get the current rptr from the hardware. 1614ed5116aSLikun Gao */ 1624ed5116aSLikun Gao static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring) 1634ed5116aSLikun Gao { 1644ed5116aSLikun Gao u64 *rptr; 1654ed5116aSLikun Gao 1664ed5116aSLikun Gao /* XXX check if swapping is necessary on BE */ 1674ed5116aSLikun Gao rptr = (u64 *)ring->rptr_cpu_addr; 1684ed5116aSLikun Gao 1694ed5116aSLikun Gao DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); 1704ed5116aSLikun Gao return ((*rptr) >> 2); 1714ed5116aSLikun Gao } 1724ed5116aSLikun Gao 1734ed5116aSLikun Gao /** 1744ed5116aSLikun Gao * sdma_v7_1_ring_get_wptr - get the current write pointer 1754ed5116aSLikun Gao * 1764ed5116aSLikun Gao * @ring: amdgpu ring pointer 1774ed5116aSLikun Gao * 1784ed5116aSLikun Gao * Get the current wptr from the hardware. 1794ed5116aSLikun Gao */ 1804ed5116aSLikun Gao static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring) 1814ed5116aSLikun Gao { 1824ed5116aSLikun Gao u64 wptr = 0; 1834ed5116aSLikun Gao 1844ed5116aSLikun Gao if (ring->use_doorbell) { 1854ed5116aSLikun Gao /* XXX check if swapping is necessary on BE */ 1864ed5116aSLikun Gao wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); 1874ed5116aSLikun Gao DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); 1884ed5116aSLikun Gao } 1894ed5116aSLikun Gao 1904ed5116aSLikun Gao return wptr >> 2; 1914ed5116aSLikun Gao } 1924ed5116aSLikun Gao 1934ed5116aSLikun Gao /** 1944ed5116aSLikun Gao * sdma_v7_1_ring_set_wptr - commit the write pointer 1954ed5116aSLikun Gao * 1964ed5116aSLikun Gao * @ring: amdgpu ring pointer 1974ed5116aSLikun Gao * 1984ed5116aSLikun Gao * Write the wptr back to the hardware. 1994ed5116aSLikun Gao */ 2004ed5116aSLikun Gao static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring) 2014ed5116aSLikun Gao { 2024ed5116aSLikun Gao struct amdgpu_device *adev = ring->adev; 2034ed5116aSLikun Gao 2044ed5116aSLikun Gao DRM_DEBUG("Setting write pointer\n"); 2054ed5116aSLikun Gao 2064ed5116aSLikun Gao if (ring->use_doorbell) { 2074ed5116aSLikun Gao DRM_DEBUG("Using doorbell -- " 2084ed5116aSLikun Gao "wptr_offs == 0x%08x " 2094ed5116aSLikun Gao "lower_32_bits(ring->wptr) << 2 == 0x%08x " 2104ed5116aSLikun Gao "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", 2114ed5116aSLikun Gao ring->wptr_offs, 2124ed5116aSLikun Gao lower_32_bits(ring->wptr << 2), 2134ed5116aSLikun Gao upper_32_bits(ring->wptr << 2)); 2144ed5116aSLikun Gao /* XXX check if swapping is necessary on BE */ 2154ed5116aSLikun Gao atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 2164ed5116aSLikun Gao ring->wptr << 2); 2174ed5116aSLikun Gao DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", 2184ed5116aSLikun Gao ring->doorbell_index, ring->wptr << 2); 2194ed5116aSLikun Gao WDOORBELL64(ring->doorbell_index, ring->wptr << 2); 2204ed5116aSLikun Gao } else { 2214ed5116aSLikun Gao DRM_DEBUG("Not using doorbell -- " 2224ed5116aSLikun Gao "regSDMA%i_GFX_RB_WPTR == 0x%08x " 2234ed5116aSLikun Gao "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", 2244ed5116aSLikun Gao ring->me, 2254ed5116aSLikun Gao lower_32_bits(ring->wptr << 2), 2264ed5116aSLikun Gao ring->me, 2274ed5116aSLikun Gao upper_32_bits(ring->wptr << 2)); 2284ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 2294ed5116aSLikun Gao ring->me, 2304ed5116aSLikun Gao regSDMA0_SDMA_QUEUE0_RB_WPTR), 2314ed5116aSLikun Gao lower_32_bits(ring->wptr << 2)); 2324ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 2334ed5116aSLikun Gao ring->me, 2344ed5116aSLikun Gao regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 2354ed5116aSLikun Gao upper_32_bits(ring->wptr << 2)); 2364ed5116aSLikun Gao } 2374ed5116aSLikun Gao } 2384ed5116aSLikun Gao 2394ed5116aSLikun Gao static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) 2404ed5116aSLikun Gao { 2414ed5116aSLikun Gao struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 2424ed5116aSLikun Gao int i; 2434ed5116aSLikun Gao 2444ed5116aSLikun Gao for (i = 0; i < count; i++) 2454ed5116aSLikun Gao if (sdma && sdma->burst_nop && (i == 0)) 2464ed5116aSLikun Gao amdgpu_ring_write(ring, ring->funcs->nop | 2474ed5116aSLikun Gao SDMA_PKT_NOP_HEADER_COUNT(count - 1)); 2484ed5116aSLikun Gao else 2494ed5116aSLikun Gao amdgpu_ring_write(ring, ring->funcs->nop); 2504ed5116aSLikun Gao } 2514ed5116aSLikun Gao 2524ed5116aSLikun Gao /** 2534ed5116aSLikun Gao * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine 2544ed5116aSLikun Gao * 2554ed5116aSLikun Gao * @ring: amdgpu ring pointer 2564ed5116aSLikun Gao * @job: job to retrieve vmid from 2574ed5116aSLikun Gao * @ib: IB object to schedule 2584ed5116aSLikun Gao * @flags: unused 2594ed5116aSLikun Gao * 2604ed5116aSLikun Gao * Schedule an IB in the DMA ring. 2614ed5116aSLikun Gao */ 2624ed5116aSLikun Gao static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring, 2634ed5116aSLikun Gao struct amdgpu_job *job, 2644ed5116aSLikun Gao struct amdgpu_ib *ib, 2654ed5116aSLikun Gao uint32_t flags) 2664ed5116aSLikun Gao { 2674ed5116aSLikun Gao unsigned vmid = AMDGPU_JOB_GET_VMID(job); 2684ed5116aSLikun Gao uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); 2694ed5116aSLikun Gao 2704ed5116aSLikun Gao /* An IB packet must end on a 8 DW boundary--the next dword 2714ed5116aSLikun Gao * must be on a 8-dword boundary. Our IB packet below is 6 2724ed5116aSLikun Gao * dwords long, thus add x number of NOPs, such that, in 2734ed5116aSLikun Gao * modular arithmetic, 2744ed5116aSLikun Gao * wptr + 6 + x = 8k, k >= 0, which in C is, 2754ed5116aSLikun Gao * (wptr + 6 + x) % 8 = 0. 2764ed5116aSLikun Gao * The expression below, is a solution of x. 2774ed5116aSLikun Gao */ 2784ed5116aSLikun Gao sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7); 2794ed5116aSLikun Gao 2804ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) | 2814ed5116aSLikun Gao SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); 2824ed5116aSLikun Gao /* base must be 32 byte aligned */ 2834ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0); 2844ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 2854ed5116aSLikun Gao amdgpu_ring_write(ring, ib->length_dw); 2864ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); 2874ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); 2884ed5116aSLikun Gao } 2894ed5116aSLikun Gao 2904ed5116aSLikun Gao /** 2914ed5116aSLikun Gao * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse 2924ed5116aSLikun Gao * 2934ed5116aSLikun Gao * @ring: amdgpu ring pointer 2944ed5116aSLikun Gao * 2954ed5116aSLikun Gao * flush the IB by graphics cache rinse. 2964ed5116aSLikun Gao */ 2974ed5116aSLikun Gao static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring) 2984ed5116aSLikun Gao { 2994ed5116aSLikun Gao uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | 3004ed5116aSLikun Gao SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | 3014ed5116aSLikun Gao SDMA_GCR_GLI_INV(1); 3024ed5116aSLikun Gao 3034ed5116aSLikun Gao /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ 3044ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ)); 3054ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); 3064ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0)); 3074ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) | 3084ed5116aSLikun Gao SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0)); 3094ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0)); 3104ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) | 3114ed5116aSLikun Gao SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0)); 3124ed5116aSLikun Gao } 3134ed5116aSLikun Gao 3144ed5116aSLikun Gao 3154ed5116aSLikun Gao /** 3164ed5116aSLikun Gao * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring 3174ed5116aSLikun Gao * 3184ed5116aSLikun Gao * @ring: amdgpu ring pointer 3194ed5116aSLikun Gao * @addr: address 3204ed5116aSLikun Gao * @seq: fence seq number 3214ed5116aSLikun Gao * @flags: fence flags 3224ed5116aSLikun Gao * 3234ed5116aSLikun Gao * Add a DMA fence packet to the ring to write 3244ed5116aSLikun Gao * the fence seq number and DMA trap packet to generate 3254ed5116aSLikun Gao * an interrupt if needed. 3264ed5116aSLikun Gao */ 3274ed5116aSLikun Gao static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, 3284ed5116aSLikun Gao unsigned flags) 3294ed5116aSLikun Gao { 3304ed5116aSLikun Gao bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 3314ed5116aSLikun Gao /* write the fence */ 3324ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | 3334ed5116aSLikun Gao SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */ 3344ed5116aSLikun Gao /* zero in first two bits */ 3354ed5116aSLikun Gao BUG_ON(addr & 0x3); 3364ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(addr)); 3374ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr)); 3384ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(seq)); 3394ed5116aSLikun Gao 3404ed5116aSLikun Gao /* optionally write high bits as well */ 3414ed5116aSLikun Gao if (write64bit) { 3424ed5116aSLikun Gao addr += 4; 3434ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) | 3444ed5116aSLikun Gao SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); 3454ed5116aSLikun Gao /* zero in first two bits */ 3464ed5116aSLikun Gao BUG_ON(addr & 0x3); 3474ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(addr)); 3484ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr)); 3494ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(seq)); 3504ed5116aSLikun Gao } 3514ed5116aSLikun Gao 3524ed5116aSLikun Gao if (flags & AMDGPU_FENCE_FLAG_INT) { 3534ed5116aSLikun Gao /* generate an interrupt */ 3544ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP)); 3554ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); 3564ed5116aSLikun Gao } 3574ed5116aSLikun Gao } 3584ed5116aSLikun Gao 3594ed5116aSLikun Gao /** 3604ed5116aSLikun Gao * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines 3614ed5116aSLikun Gao * 3624ed5116aSLikun Gao * @adev: amdgpu_device pointer 3634ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be disabled 3644ed5116aSLikun Gao * 3654ed5116aSLikun Gao * Stop the gfx async dma ring buffers. 3664ed5116aSLikun Gao */ 3674ed5116aSLikun Gao static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev, 3684ed5116aSLikun Gao uint32_t inst_mask) 3694ed5116aSLikun Gao { 3704ed5116aSLikun Gao u32 rb_cntl, ib_cntl; 3714ed5116aSLikun Gao int i; 3724ed5116aSLikun Gao 37305282873SLikun Gao for_each_inst(i, inst_mask) { 3744ed5116aSLikun Gao rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL)); 3754ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0); 3764ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl); 3774ed5116aSLikun Gao ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL)); 3784ed5116aSLikun Gao ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0); 3794ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl); 3804ed5116aSLikun Gao } 3814ed5116aSLikun Gao } 3824ed5116aSLikun Gao 3834ed5116aSLikun Gao /** 3844ed5116aSLikun Gao * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines 3854ed5116aSLikun Gao * 3864ed5116aSLikun Gao * @adev: amdgpu_device pointer 3874ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be disabled 3884ed5116aSLikun Gao * 3894ed5116aSLikun Gao * Stop the compute async dma queues. 3904ed5116aSLikun Gao */ 3914ed5116aSLikun Gao static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev, 3924ed5116aSLikun Gao uint32_t inst_mask) 3934ed5116aSLikun Gao { 3944ed5116aSLikun Gao /* XXX todo */ 3954ed5116aSLikun Gao } 3964ed5116aSLikun Gao 3974ed5116aSLikun Gao /** 3984ed5116aSLikun Gao * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch 3994ed5116aSLikun Gao * 4004ed5116aSLikun Gao * @adev: amdgpu_device pointer 4014ed5116aSLikun Gao * @enable: enable/disable the DMA MEs context switch. 4024ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be enabled 4034ed5116aSLikun Gao * 4044ed5116aSLikun Gao * Halt or unhalt the async dma engines context switch. 4054ed5116aSLikun Gao */ 4064ed5116aSLikun Gao static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev, 4074ed5116aSLikun Gao bool enable, uint32_t inst_mask) 4084ed5116aSLikun Gao { 4094ed5116aSLikun Gao int i; 4104ed5116aSLikun Gao 4114ed5116aSLikun Gao for_each_inst(i, inst_mask) { 4124ed5116aSLikun Gao WREG32_SOC15_IP(GC, 4134ed5116aSLikun Gao sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80); 4144ed5116aSLikun Gao } 4154ed5116aSLikun Gao } 4164ed5116aSLikun Gao 4174ed5116aSLikun Gao /** 4184ed5116aSLikun Gao * sdma_v7_1_inst_enable - stop the async dma engines 4194ed5116aSLikun Gao * 4204ed5116aSLikun Gao * @adev: amdgpu_device pointer 4214ed5116aSLikun Gao * @enable: enable/disable the DMA MEs. 4224ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be enabled 4234ed5116aSLikun Gao * 4244ed5116aSLikun Gao * Halt or unhalt the async dma engines. 4254ed5116aSLikun Gao */ 4264ed5116aSLikun Gao static void sdma_v7_1_inst_enable(struct amdgpu_device *adev, 4274ed5116aSLikun Gao bool enable, uint32_t inst_mask) 4284ed5116aSLikun Gao { 4294ed5116aSLikun Gao u32 mcu_cntl; 4304ed5116aSLikun Gao int i; 4314ed5116aSLikun Gao 4324ed5116aSLikun Gao if (!enable) { 4334ed5116aSLikun Gao sdma_v7_1_inst_gfx_stop(adev, inst_mask); 4344ed5116aSLikun Gao sdma_v7_1_inst_rlc_stop(adev, inst_mask); 4354ed5116aSLikun Gao } 4364ed5116aSLikun Gao 4374ed5116aSLikun Gao if (amdgpu_sriov_vf(adev)) 4384ed5116aSLikun Gao return; 4394ed5116aSLikun Gao 44005282873SLikun Gao for_each_inst(i, inst_mask) { 4414ed5116aSLikun Gao mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL)); 4424ed5116aSLikun Gao mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1); 4434ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl); 4444ed5116aSLikun Gao } 4454ed5116aSLikun Gao } 4464ed5116aSLikun Gao 4474ed5116aSLikun Gao /** 4484ed5116aSLikun Gao * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine 4494ed5116aSLikun Gao * 4504ed5116aSLikun Gao * @adev: amdgpu_device pointer 4514ed5116aSLikun Gao * @i: instance 4524ed5116aSLikun Gao * @restore: used to restore wptr when restart 4534ed5116aSLikun Gao * 4544ed5116aSLikun Gao * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. 4554ed5116aSLikun Gao * Return 0 for success. 4564ed5116aSLikun Gao */ 4574ed5116aSLikun Gao static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) 4584ed5116aSLikun Gao { 4594ed5116aSLikun Gao struct amdgpu_ring *ring; 4604ed5116aSLikun Gao u32 rb_cntl, ib_cntl; 4614ed5116aSLikun Gao u32 rb_bufsz; 4624ed5116aSLikun Gao u32 doorbell; 4634ed5116aSLikun Gao u32 doorbell_offset; 4644ed5116aSLikun Gao u32 temp; 4654ed5116aSLikun Gao u64 wptr_gpu_addr; 4664ed5116aSLikun Gao int r; 4674ed5116aSLikun Gao 4684ed5116aSLikun Gao ring = &adev->sdma.instance[i].ring; 4694ed5116aSLikun Gao 4704ed5116aSLikun Gao /* Set ring buffer size in dwords */ 4714ed5116aSLikun Gao rb_bufsz = order_base_2(ring->ring_size / 4); 4724ed5116aSLikun Gao rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL)); 4734ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); 4744ed5116aSLikun Gao #ifdef __BIG_ENDIAN 4754ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); 4764ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, 4774ed5116aSLikun Gao RPTR_WRITEBACK_SWAP_ENABLE, 1); 4784ed5116aSLikun Gao #endif 4794ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1); 4804ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl); 4814ed5116aSLikun Gao 4824ed5116aSLikun Gao /* Initialize the ring buffer's read and write pointers */ 4834ed5116aSLikun Gao if (restore) { 4844ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2)); 4854ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); 4864ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2)); 4874ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); 4884ed5116aSLikun Gao } else { 4894ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0); 4904ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0); 4914ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0); 4924ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0); 4934ed5116aSLikun Gao } 4944ed5116aSLikun Gao /* setup the wptr shadow polling */ 4954ed5116aSLikun Gao wptr_gpu_addr = ring->wptr_gpu_addr; 4964ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO), 4974ed5116aSLikun Gao lower_32_bits(wptr_gpu_addr)); 4984ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI), 4994ed5116aSLikun Gao upper_32_bits(wptr_gpu_addr)); 5004ed5116aSLikun Gao 5014ed5116aSLikun Gao /* set the wb address whether it's enabled or not */ 5024ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI), 5034ed5116aSLikun Gao upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); 5044ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO), 5054ed5116aSLikun Gao lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); 5064ed5116aSLikun Gao 5074ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); 5084ed5116aSLikun Gao if (amdgpu_sriov_vf(adev)) 5094ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); 5104ed5116aSLikun Gao else 5114ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); 5124ed5116aSLikun Gao 5134ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); 5144ed5116aSLikun Gao 5154ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8); 5164ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); 5174ed5116aSLikun Gao 5184ed5116aSLikun Gao if (!restore) 5194ed5116aSLikun Gao ring->wptr = 0; 5204ed5116aSLikun Gao 5214ed5116aSLikun Gao /* before programing wptr to a less value, need set minor_ptr_update first */ 5224ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1); 5234ed5116aSLikun Gao 5244ed5116aSLikun Gao if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ 5254ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); 5264ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); 5274ed5116aSLikun Gao } 5284ed5116aSLikun Gao 5294ed5116aSLikun Gao doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL)); 5304ed5116aSLikun Gao doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET)); 5314ed5116aSLikun Gao 5324ed5116aSLikun Gao if (ring->use_doorbell) { 5334ed5116aSLikun Gao doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1); 5344ed5116aSLikun Gao doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET, 5354ed5116aSLikun Gao OFFSET, ring->doorbell_index); 5364ed5116aSLikun Gao } else { 5374ed5116aSLikun Gao doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0); 5384ed5116aSLikun Gao } 5394ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell); 5404ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset); 5414ed5116aSLikun Gao 5424ed5116aSLikun Gao if (i == 0) 5434ed5116aSLikun Gao adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, 5444ed5116aSLikun Gao ring->doorbell_index, 5454ed5116aSLikun Gao adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); 5464ed5116aSLikun Gao 5474ed5116aSLikun Gao if (amdgpu_sriov_vf(adev)) 5484ed5116aSLikun Gao sdma_v7_1_ring_set_wptr(ring); 5494ed5116aSLikun Gao 5504ed5116aSLikun Gao /* set minor_ptr_update to 0 after wptr programed */ 5514ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0); 5524ed5116aSLikun Gao 5534ed5116aSLikun Gao /* Set up sdma hang watchdog */ 5544ed5116aSLikun Gao temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL)); 5554ed5116aSLikun Gao /* 100ms per unit */ 5564ed5116aSLikun Gao temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT, 5574ed5116aSLikun Gao max(adev->usec_timeout/100000, 1)); 5584ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp); 5594ed5116aSLikun Gao 5604ed5116aSLikun Gao /* Set up RESP_MODE to non-copy addresses */ 5614ed5116aSLikun Gao temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL)); 5624ed5116aSLikun Gao temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3); 5634ed5116aSLikun Gao temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9); 5644ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp); 5654ed5116aSLikun Gao 5664ed5116aSLikun Gao /* program default cache read and write policy */ 5674ed5116aSLikun Gao temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE)); 5684ed5116aSLikun Gao /* clean read policy and write policy bits */ 5694ed5116aSLikun Gao temp &= 0xFF0FFF; 5704ed5116aSLikun Gao temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | 5714ed5116aSLikun Gao (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); 5724ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp); 5734ed5116aSLikun Gao 5744ed5116aSLikun Gao if (!amdgpu_sriov_vf(adev)) { 5754ed5116aSLikun Gao /* unhalt engine */ 5764ed5116aSLikun Gao temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL)); 5774ed5116aSLikun Gao temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0); 5784ed5116aSLikun Gao temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0); 5794ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp); 5804ed5116aSLikun Gao } 5814ed5116aSLikun Gao 5824ed5116aSLikun Gao /* enable DMA RB */ 5834ed5116aSLikun Gao rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1); 5844ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl); 5854ed5116aSLikun Gao 5864ed5116aSLikun Gao ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL)); 5874ed5116aSLikun Gao ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1); 5884ed5116aSLikun Gao #ifdef __BIG_ENDIAN 5894ed5116aSLikun Gao ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); 5904ed5116aSLikun Gao #endif 5914ed5116aSLikun Gao /* enable DMA IBs */ 5924ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl); 5934ed5116aSLikun Gao ring->sched.ready = true; 5944ed5116aSLikun Gao 5954ed5116aSLikun Gao if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ 5964ed5116aSLikun Gao sdma_v7_1_inst_ctx_switch_enable(adev, true, i); 5974ed5116aSLikun Gao sdma_v7_1_inst_enable(adev, true, i); 5984ed5116aSLikun Gao } 5994ed5116aSLikun Gao 6004ed5116aSLikun Gao r = amdgpu_ring_test_helper(ring); 6014ed5116aSLikun Gao if (r) 6024ed5116aSLikun Gao ring->sched.ready = false; 6034ed5116aSLikun Gao 6044ed5116aSLikun Gao return r; 6054ed5116aSLikun Gao } 6064ed5116aSLikun Gao 6074ed5116aSLikun Gao /** 6084ed5116aSLikun Gao * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines 6094ed5116aSLikun Gao * 6104ed5116aSLikun Gao * @adev: amdgpu_device pointer 61197b2e10eSSrinivasan Shanmugam * @inst_mask: mask of dma engine instances to be enabled 6124ed5116aSLikun Gao * 6134ed5116aSLikun Gao * Set up the gfx DMA ring buffers and enable them. 6144ed5116aSLikun Gao * Returns 0 for success, error for failure. 6154ed5116aSLikun Gao */ 6164ed5116aSLikun Gao static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev, 6174ed5116aSLikun Gao uint32_t inst_mask) 6184ed5116aSLikun Gao { 6194ed5116aSLikun Gao int i, r; 6204ed5116aSLikun Gao 62105282873SLikun Gao for_each_inst(i, inst_mask) { 6224ed5116aSLikun Gao r = sdma_v7_1_gfx_resume_instance(adev, i, false); 6234ed5116aSLikun Gao if (r) 6244ed5116aSLikun Gao return r; 6254ed5116aSLikun Gao } 6264ed5116aSLikun Gao 6274ed5116aSLikun Gao return 0; 6284ed5116aSLikun Gao 6294ed5116aSLikun Gao } 6304ed5116aSLikun Gao 6314ed5116aSLikun Gao /** 6324ed5116aSLikun Gao * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines 6334ed5116aSLikun Gao * 6344ed5116aSLikun Gao * @adev: amdgpu_device pointer 6354ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be enabled 6364ed5116aSLikun Gao * 6374ed5116aSLikun Gao * Set up the compute DMA queues and enable them. 6384ed5116aSLikun Gao * Returns 0 for success, error for failure. 6394ed5116aSLikun Gao */ 6404ed5116aSLikun Gao static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev, 6414ed5116aSLikun Gao uint32_t inst_mask) 6424ed5116aSLikun Gao { 6434ed5116aSLikun Gao return 0; 6444ed5116aSLikun Gao } 6454ed5116aSLikun Gao 6464ed5116aSLikun Gao static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev, 6474ed5116aSLikun Gao uint32_t inst_mask) 6484ed5116aSLikun Gao { 6494ed5116aSLikun Gao int i; 6504ed5116aSLikun Gao 65105282873SLikun Gao for_each_inst(i, inst_mask) { 6524ed5116aSLikun Gao amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj, 6534ed5116aSLikun Gao &adev->sdma.instance[i].sdma_fw_gpu_addr, 6544ed5116aSLikun Gao (void **)&adev->sdma.instance[i].sdma_fw_ptr); 6554ed5116aSLikun Gao } 6564ed5116aSLikun Gao } 6574ed5116aSLikun Gao 6584ed5116aSLikun Gao /** 6594ed5116aSLikun Gao * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode 6604ed5116aSLikun Gao * 6614ed5116aSLikun Gao * @adev: amdgpu_device pointer 6624ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be enabled 6634ed5116aSLikun Gao * 6644ed5116aSLikun Gao * Loads the sDMA0/1 ucode. 6654ed5116aSLikun Gao * Returns 0 for success, -EINVAL if the ucode is not available. 6664ed5116aSLikun Gao */ 6674ed5116aSLikun Gao static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev, 6684ed5116aSLikun Gao uint32_t inst_mask) 6694ed5116aSLikun Gao { 6704ed5116aSLikun Gao const struct sdma_firmware_header_v3_0 *hdr; 6714ed5116aSLikun Gao const __le32 *fw_data; 6724ed5116aSLikun Gao u32 fw_size; 6734ed5116aSLikun Gao uint32_t tmp, sdma_status, ic_op_cntl; 6744ed5116aSLikun Gao int i, r, j; 6754ed5116aSLikun Gao 6764ed5116aSLikun Gao /* halt the MEs */ 6774ed5116aSLikun Gao sdma_v7_1_inst_enable(adev, false, inst_mask); 6784ed5116aSLikun Gao 6794ed5116aSLikun Gao if (!adev->sdma.instance[0].fw) 6804ed5116aSLikun Gao return -EINVAL; 6814ed5116aSLikun Gao 6824ed5116aSLikun Gao hdr = (const struct sdma_firmware_header_v3_0 *) 6834ed5116aSLikun Gao adev->sdma.instance[0].fw->data; 6844ed5116aSLikun Gao amdgpu_ucode_print_sdma_hdr(&hdr->header); 6854ed5116aSLikun Gao 6864ed5116aSLikun Gao fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data + 6874ed5116aSLikun Gao le32_to_cpu(hdr->ucode_offset_bytes)); 6884ed5116aSLikun Gao fw_size = le32_to_cpu(hdr->ucode_size_bytes); 6894ed5116aSLikun Gao 69005282873SLikun Gao for_each_inst(i, inst_mask) { 6914ed5116aSLikun Gao r = amdgpu_bo_create_reserved(adev, fw_size, 6924ed5116aSLikun Gao PAGE_SIZE, 6934ed5116aSLikun Gao AMDGPU_GEM_DOMAIN_VRAM, 6944ed5116aSLikun Gao &adev->sdma.instance[i].sdma_fw_obj, 6954ed5116aSLikun Gao &adev->sdma.instance[i].sdma_fw_gpu_addr, 6964ed5116aSLikun Gao (void **)&adev->sdma.instance[i].sdma_fw_ptr); 6974ed5116aSLikun Gao if (r) { 6984ed5116aSLikun Gao dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r); 6994ed5116aSLikun Gao return r; 7004ed5116aSLikun Gao } 7014ed5116aSLikun Gao 7024ed5116aSLikun Gao memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size); 7034ed5116aSLikun Gao 7044ed5116aSLikun Gao amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj); 7054ed5116aSLikun Gao amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj); 7064ed5116aSLikun Gao 7074ed5116aSLikun Gao tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL)); 7084ed5116aSLikun Gao tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0); 7094ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp); 7104ed5116aSLikun Gao 7114ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO), 7124ed5116aSLikun Gao lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); 7134ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI), 7144ed5116aSLikun Gao upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr)); 7154ed5116aSLikun Gao 7164ed5116aSLikun Gao tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL)); 7174ed5116aSLikun Gao tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1); 7184ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp); 7194ed5116aSLikun Gao 7204ed5116aSLikun Gao /* Wait for sdma ucode init complete */ 7214ed5116aSLikun Gao for (j = 0; j < adev->usec_timeout; j++) { 7224ed5116aSLikun Gao ic_op_cntl = RREG32_SOC15_IP(GC, 7234ed5116aSLikun Gao sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL)); 7244ed5116aSLikun Gao sdma_status = RREG32_SOC15_IP(GC, 7254ed5116aSLikun Gao sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG)); 7264ed5116aSLikun Gao if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) && 7274ed5116aSLikun Gao (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1)) 7284ed5116aSLikun Gao break; 7294ed5116aSLikun Gao udelay(1); 7304ed5116aSLikun Gao } 7314ed5116aSLikun Gao 7324ed5116aSLikun Gao if (j >= adev->usec_timeout) { 7334ed5116aSLikun Gao dev_err(adev->dev, "failed to init sdma ucode\n"); 7344ed5116aSLikun Gao return -EINVAL; 7354ed5116aSLikun Gao } 7364ed5116aSLikun Gao } 7374ed5116aSLikun Gao 7384ed5116aSLikun Gao return 0; 7394ed5116aSLikun Gao } 7404ed5116aSLikun Gao 7414ed5116aSLikun Gao static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block) 7424ed5116aSLikun Gao { 7434ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 7444ed5116aSLikun Gao uint32_t inst_mask; 7454ed5116aSLikun Gao u32 tmp; 7464ed5116aSLikun Gao int i; 7474ed5116aSLikun Gao 74805282873SLikun Gao inst_mask = GENMASK(NUM_XCC(adev->sdma.sdma_mask) - 1, 0); 7494ed5116aSLikun Gao sdma_v7_1_inst_gfx_stop(adev, inst_mask); 7504ed5116aSLikun Gao 75105282873SLikun Gao for_each_inst(i, inst_mask) { 7524ed5116aSLikun Gao //tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE)); 7534ed5116aSLikun Gao //tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK; 7544ed5116aSLikun Gao //WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp); 7554ed5116aSLikun Gao tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL)); 7564ed5116aSLikun Gao tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK; 7574ed5116aSLikun Gao tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK; 7584ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp); 7594ed5116aSLikun Gao 7604ed5116aSLikun Gao WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0); 7614ed5116aSLikun Gao 7624ed5116aSLikun Gao udelay(100); 7634ed5116aSLikun Gao 7644ed5116aSLikun Gao tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i; 7654ed5116aSLikun Gao WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); 7664ed5116aSLikun Gao tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 7674ed5116aSLikun Gao 7684ed5116aSLikun Gao udelay(100); 7694ed5116aSLikun Gao 7704ed5116aSLikun Gao WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0); 7714ed5116aSLikun Gao tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); 7724ed5116aSLikun Gao 7734ed5116aSLikun Gao udelay(100); 7744ed5116aSLikun Gao } 7754ed5116aSLikun Gao 7764ed5116aSLikun Gao return sdma_v7_1_inst_start(adev, inst_mask); 7774ed5116aSLikun Gao } 7784ed5116aSLikun Gao 7794ed5116aSLikun Gao static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block) 7804ed5116aSLikun Gao { 7814ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 7824ed5116aSLikun Gao struct amdgpu_ring *ring; 7834ed5116aSLikun Gao int i, r; 7844ed5116aSLikun Gao long tmo = msecs_to_jiffies(1000); 7854ed5116aSLikun Gao 7864ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 7874ed5116aSLikun Gao ring = &adev->sdma.instance[i].ring; 7884ed5116aSLikun Gao r = amdgpu_ring_test_ib(ring, tmo); 7894ed5116aSLikun Gao if (r) 7904ed5116aSLikun Gao return true; 7914ed5116aSLikun Gao } 7924ed5116aSLikun Gao 7934ed5116aSLikun Gao return false; 7944ed5116aSLikun Gao } 7954ed5116aSLikun Gao 7964ed5116aSLikun Gao static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring, 7974ed5116aSLikun Gao unsigned int vmid, 7984ed5116aSLikun Gao struct amdgpu_fence *timedout_fence) 7994ed5116aSLikun Gao { 8004ed5116aSLikun Gao struct amdgpu_device *adev = ring->adev; 8014ed5116aSLikun Gao int r; 8024ed5116aSLikun Gao 8034ed5116aSLikun Gao if (ring->me >= adev->sdma.num_instances) { 8044ed5116aSLikun Gao dev_err(adev->dev, "sdma instance not found\n"); 8054ed5116aSLikun Gao return -EINVAL; 8064ed5116aSLikun Gao } 8074ed5116aSLikun Gao 8084ed5116aSLikun Gao amdgpu_ring_reset_helper_begin(ring, timedout_fence); 8094ed5116aSLikun Gao 8104ed5116aSLikun Gao r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0); 8114ed5116aSLikun Gao if (r) 8124ed5116aSLikun Gao return r; 8134ed5116aSLikun Gao 8144ed5116aSLikun Gao r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true); 8154ed5116aSLikun Gao if (r) 8164ed5116aSLikun Gao return r; 8174ed5116aSLikun Gao 8184ed5116aSLikun Gao return amdgpu_ring_reset_helper_end(ring, timedout_fence); 8194ed5116aSLikun Gao } 8204ed5116aSLikun Gao 8214ed5116aSLikun Gao /** 8224ed5116aSLikun Gao * sdma_v7_1_inst_start - setup and start the async dma engines 8234ed5116aSLikun Gao * 8244ed5116aSLikun Gao * @adev: amdgpu_device pointer 8254ed5116aSLikun Gao * @inst_mask: mask of dma engine instances to be enabled 8264ed5116aSLikun Gao * 8274ed5116aSLikun Gao * Set up the DMA engines and enable them. 8284ed5116aSLikun Gao * Returns 0 for success, error for failure. 8294ed5116aSLikun Gao */ 8304ed5116aSLikun Gao static int sdma_v7_1_inst_start(struct amdgpu_device *adev, 8314ed5116aSLikun Gao uint32_t inst_mask) 8324ed5116aSLikun Gao { 8334ed5116aSLikun Gao int r = 0; 8344ed5116aSLikun Gao 8354ed5116aSLikun Gao if (amdgpu_sriov_vf(adev)) { 8364ed5116aSLikun Gao sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask); 8374ed5116aSLikun Gao sdma_v7_1_inst_enable(adev, false, inst_mask); 8384ed5116aSLikun Gao 8394ed5116aSLikun Gao /* set RB registers */ 8404ed5116aSLikun Gao r = sdma_v7_1_inst_gfx_resume(adev, inst_mask); 8414ed5116aSLikun Gao return r; 8424ed5116aSLikun Gao } 8434ed5116aSLikun Gao 8444ed5116aSLikun Gao if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 8454ed5116aSLikun Gao r = sdma_v7_1_inst_load_microcode(adev, inst_mask); 8464ed5116aSLikun Gao if (r) { 8474ed5116aSLikun Gao sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask); 8484ed5116aSLikun Gao return r; 8494ed5116aSLikun Gao } 8504ed5116aSLikun Gao 8514ed5116aSLikun Gao if (amdgpu_emu_mode == 1) 8524ed5116aSLikun Gao msleep(1000); 8534ed5116aSLikun Gao } 8544ed5116aSLikun Gao 8554ed5116aSLikun Gao /* unhalt the MEs */ 8564ed5116aSLikun Gao sdma_v7_1_inst_enable(adev, true, inst_mask); 8574ed5116aSLikun Gao /* enable sdma ring preemption */ 8584ed5116aSLikun Gao sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask); 8594ed5116aSLikun Gao 8604ed5116aSLikun Gao /* start the gfx rings and rlc compute queues */ 8614ed5116aSLikun Gao r = sdma_v7_1_inst_gfx_resume(adev, inst_mask); 8624ed5116aSLikun Gao if (r) 8634ed5116aSLikun Gao return r; 8644ed5116aSLikun Gao r = sdma_v7_1_inst_rlc_resume(adev, inst_mask); 8654ed5116aSLikun Gao 8664ed5116aSLikun Gao return r; 8674ed5116aSLikun Gao } 8684ed5116aSLikun Gao 8694ed5116aSLikun Gao static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd, 8704ed5116aSLikun Gao struct amdgpu_mqd_prop *prop) 8714ed5116aSLikun Gao { 8724ed5116aSLikun Gao struct v12_sdma_mqd *m = mqd; 8734ed5116aSLikun Gao uint64_t wb_gpu_addr; 8744ed5116aSLikun Gao 8754ed5116aSLikun Gao m->sdmax_rlcx_rb_cntl = 8764ed5116aSLikun Gao order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT | 8774ed5116aSLikun Gao 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | 8784ed5116aSLikun Gao 4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | 8794ed5116aSLikun Gao 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT; 8804ed5116aSLikun Gao 8814ed5116aSLikun Gao m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); 8824ed5116aSLikun Gao m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); 8834ed5116aSLikun Gao 8844ed5116aSLikun Gao wb_gpu_addr = prop->wptr_gpu_addr; 8854ed5116aSLikun Gao m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); 8864ed5116aSLikun Gao m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); 8874ed5116aSLikun Gao 8884ed5116aSLikun Gao wb_gpu_addr = prop->rptr_gpu_addr; 8894ed5116aSLikun Gao m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); 8904ed5116aSLikun Gao m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); 8914ed5116aSLikun Gao 8924ed5116aSLikun Gao m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0, 8934ed5116aSLikun Gao regSDMA0_SDMA_QUEUE0_IB_CNTL)); 8944ed5116aSLikun Gao 8954ed5116aSLikun Gao m->sdmax_rlcx_doorbell_offset = 8964ed5116aSLikun Gao prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT; 8974ed5116aSLikun Gao 8984ed5116aSLikun Gao m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1); 8994ed5116aSLikun Gao 9004ed5116aSLikun Gao m->sdmax_rlcx_doorbell_log = 0; 9014ed5116aSLikun Gao m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT; 9024ed5116aSLikun Gao m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT; 9034ed5116aSLikun Gao 9044ed5116aSLikun Gao m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr); 9054ed5116aSLikun Gao m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr); 9064ed5116aSLikun Gao 9074ed5116aSLikun Gao return 0; 9084ed5116aSLikun Gao } 9094ed5116aSLikun Gao 9104ed5116aSLikun Gao static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev) 9114ed5116aSLikun Gao { 9124ed5116aSLikun Gao adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd); 9134ed5116aSLikun Gao adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init; 9144ed5116aSLikun Gao } 9154ed5116aSLikun Gao 9164ed5116aSLikun Gao /** 9174ed5116aSLikun Gao * sdma_v7_1_ring_test_ring - simple async dma engine test 9184ed5116aSLikun Gao * 9194ed5116aSLikun Gao * @ring: amdgpu_ring structure holding ring information 9204ed5116aSLikun Gao * 9214ed5116aSLikun Gao * Test the DMA engine by writing using it to write an 9224ed5116aSLikun Gao * value to memory. 9234ed5116aSLikun Gao * Returns 0 for success, error for failure. 9244ed5116aSLikun Gao */ 9254ed5116aSLikun Gao static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring) 9264ed5116aSLikun Gao { 9274ed5116aSLikun Gao struct amdgpu_device *adev = ring->adev; 9284ed5116aSLikun Gao unsigned i; 9294ed5116aSLikun Gao unsigned index; 9304ed5116aSLikun Gao int r; 9314ed5116aSLikun Gao u32 tmp; 9324ed5116aSLikun Gao u64 gpu_addr; 9334ed5116aSLikun Gao 9344ed5116aSLikun Gao tmp = 0xCAFEDEAD; 9354ed5116aSLikun Gao 9364ed5116aSLikun Gao r = amdgpu_device_wb_get(adev, &index); 9374ed5116aSLikun Gao if (r) { 9384ed5116aSLikun Gao dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); 9394ed5116aSLikun Gao return r; 9404ed5116aSLikun Gao } 9414ed5116aSLikun Gao 9424ed5116aSLikun Gao gpu_addr = adev->wb.gpu_addr + (index * 4); 9434ed5116aSLikun Gao adev->wb.wb[index] = cpu_to_le32(tmp); 9444ed5116aSLikun Gao 9454ed5116aSLikun Gao r = amdgpu_ring_alloc(ring, 5); 9464ed5116aSLikun Gao if (r) { 9474ed5116aSLikun Gao DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); 9484ed5116aSLikun Gao amdgpu_device_wb_free(adev, index); 9494ed5116aSLikun Gao return r; 9504ed5116aSLikun Gao } 9514ed5116aSLikun Gao 9524ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 9534ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); 9544ed5116aSLikun Gao amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); 9554ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); 9564ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0)); 9574ed5116aSLikun Gao amdgpu_ring_write(ring, 0xDEADBEEF); 9584ed5116aSLikun Gao amdgpu_ring_commit(ring); 9594ed5116aSLikun Gao 9604ed5116aSLikun Gao for (i = 0; i < adev->usec_timeout; i++) { 9614ed5116aSLikun Gao tmp = le32_to_cpu(adev->wb.wb[index]); 9624ed5116aSLikun Gao if (tmp == 0xDEADBEEF) 9634ed5116aSLikun Gao break; 9644ed5116aSLikun Gao if (amdgpu_emu_mode == 1) 9654ed5116aSLikun Gao msleep(1); 9664ed5116aSLikun Gao else 9674ed5116aSLikun Gao udelay(1); 9684ed5116aSLikun Gao } 9694ed5116aSLikun Gao 9704ed5116aSLikun Gao if (i >= adev->usec_timeout) 9714ed5116aSLikun Gao r = -ETIMEDOUT; 9724ed5116aSLikun Gao 9734ed5116aSLikun Gao amdgpu_device_wb_free(adev, index); 9744ed5116aSLikun Gao 9754ed5116aSLikun Gao return r; 9764ed5116aSLikun Gao } 9774ed5116aSLikun Gao 9784ed5116aSLikun Gao /** 9794ed5116aSLikun Gao * sdma_v7_1_ring_test_ib - test an IB on the DMA engine 9804ed5116aSLikun Gao * 9814ed5116aSLikun Gao * @ring: amdgpu_ring structure holding ring information 9824ed5116aSLikun Gao * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT 9834ed5116aSLikun Gao * 9844ed5116aSLikun Gao * Test a simple IB in the DMA ring. 9854ed5116aSLikun Gao * Returns 0 on success, error on failure. 9864ed5116aSLikun Gao */ 9874ed5116aSLikun Gao static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout) 9884ed5116aSLikun Gao { 9894ed5116aSLikun Gao struct amdgpu_device *adev = ring->adev; 9904ed5116aSLikun Gao struct amdgpu_ib ib; 9914ed5116aSLikun Gao struct dma_fence *f = NULL; 9924ed5116aSLikun Gao unsigned index; 9934ed5116aSLikun Gao long r; 9944ed5116aSLikun Gao u32 tmp = 0; 9954ed5116aSLikun Gao u64 gpu_addr; 9964ed5116aSLikun Gao 9974ed5116aSLikun Gao tmp = 0xCAFEDEAD; 9984ed5116aSLikun Gao memset(&ib, 0, sizeof(ib)); 9994ed5116aSLikun Gao 10004ed5116aSLikun Gao r = amdgpu_device_wb_get(adev, &index); 10014ed5116aSLikun Gao if (r) { 10024ed5116aSLikun Gao dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 10034ed5116aSLikun Gao return r; 10044ed5116aSLikun Gao } 10054ed5116aSLikun Gao 10064ed5116aSLikun Gao gpu_addr = adev->wb.gpu_addr + (index * 4); 10074ed5116aSLikun Gao adev->wb.wb[index] = cpu_to_le32(tmp); 10084ed5116aSLikun Gao 10094ed5116aSLikun Gao r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); 10104ed5116aSLikun Gao if (r) { 10114ed5116aSLikun Gao DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 10124ed5116aSLikun Gao goto err0; 10134ed5116aSLikun Gao } 10144ed5116aSLikun Gao 10154ed5116aSLikun Gao ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 10164ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 10174ed5116aSLikun Gao ib.ptr[1] = lower_32_bits(gpu_addr); 10184ed5116aSLikun Gao ib.ptr[2] = upper_32_bits(gpu_addr); 10194ed5116aSLikun Gao ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0); 10204ed5116aSLikun Gao ib.ptr[4] = 0xDEADBEEF; 10214ed5116aSLikun Gao ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 10224ed5116aSLikun Gao ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 10234ed5116aSLikun Gao ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); 10244ed5116aSLikun Gao ib.length_dw = 8; 10254ed5116aSLikun Gao 10264ed5116aSLikun Gao r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 10274ed5116aSLikun Gao if (r) 10284ed5116aSLikun Gao goto err1; 10294ed5116aSLikun Gao 10304ed5116aSLikun Gao r = dma_fence_wait_timeout(f, false, timeout); 10314ed5116aSLikun Gao if (r == 0) { 10324ed5116aSLikun Gao DRM_ERROR("amdgpu: IB test timed out\n"); 10334ed5116aSLikun Gao r = -ETIMEDOUT; 10344ed5116aSLikun Gao goto err1; 10354ed5116aSLikun Gao } else if (r < 0) { 10364ed5116aSLikun Gao DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 10374ed5116aSLikun Gao goto err1; 10384ed5116aSLikun Gao } 10394ed5116aSLikun Gao 10404ed5116aSLikun Gao tmp = le32_to_cpu(adev->wb.wb[index]); 10414ed5116aSLikun Gao 10424ed5116aSLikun Gao if (tmp == 0xDEADBEEF) 10434ed5116aSLikun Gao r = 0; 10444ed5116aSLikun Gao else 10454ed5116aSLikun Gao r = -EINVAL; 10464ed5116aSLikun Gao 10474ed5116aSLikun Gao err1: 10484ed5116aSLikun Gao amdgpu_ib_free(&ib, NULL); 10494ed5116aSLikun Gao dma_fence_put(f); 10504ed5116aSLikun Gao err0: 10514ed5116aSLikun Gao amdgpu_device_wb_free(adev, index); 10524ed5116aSLikun Gao return r; 10534ed5116aSLikun Gao } 10544ed5116aSLikun Gao 10554ed5116aSLikun Gao 10564ed5116aSLikun Gao /** 10574ed5116aSLikun Gao * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART 10584ed5116aSLikun Gao * 10594ed5116aSLikun Gao * @ib: indirect buffer to fill with commands 10604ed5116aSLikun Gao * @pe: addr of the page entry 10614ed5116aSLikun Gao * @src: src addr to copy from 10624ed5116aSLikun Gao * @count: number of page entries to update 10634ed5116aSLikun Gao * 10644ed5116aSLikun Gao * Update PTEs by copying them from the GART using sDMA. 10654ed5116aSLikun Gao */ 10664ed5116aSLikun Gao static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib, 10674ed5116aSLikun Gao uint64_t pe, uint64_t src, 10684ed5116aSLikun Gao unsigned count) 10694ed5116aSLikun Gao { 10704ed5116aSLikun Gao unsigned bytes = count * 8; 10714ed5116aSLikun Gao 10724ed5116aSLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | 10734ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); 10744ed5116aSLikun Gao 10754ed5116aSLikun Gao ib->ptr[ib->length_dw++] = bytes - 1; 10764ed5116aSLikun Gao ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 10774ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(src); 10784ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(src); 10794ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(pe); 10804ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(pe); 10814ed5116aSLikun Gao 10824ed5116aSLikun Gao } 10834ed5116aSLikun Gao 10844ed5116aSLikun Gao /** 10854ed5116aSLikun Gao * sdma_v7_1_vm_write_pte - update PTEs by writing them manually 10864ed5116aSLikun Gao * 10874ed5116aSLikun Gao * @ib: indirect buffer to fill with commands 10884ed5116aSLikun Gao * @pe: addr of the page entry 10894ed5116aSLikun Gao * @value: dst addr to write into pe 10904ed5116aSLikun Gao * @count: number of page entries to update 10914ed5116aSLikun Gao * @incr: increase next addr by incr bytes 10924ed5116aSLikun Gao * 10934ed5116aSLikun Gao * Update PTEs by writing them manually using sDMA. 10944ed5116aSLikun Gao */ 10954ed5116aSLikun Gao static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe, 10964ed5116aSLikun Gao uint64_t value, unsigned count, 10974ed5116aSLikun Gao uint32_t incr) 10984ed5116aSLikun Gao { 10994ed5116aSLikun Gao unsigned ndw = count * 2; 11004ed5116aSLikun Gao 11014ed5116aSLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) | 11024ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); 11034ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(pe); 11044ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(pe); 11054ed5116aSLikun Gao ib->ptr[ib->length_dw++] = ndw - 1; 11064ed5116aSLikun Gao for (; ndw > 0; ndw -= 2) { 11074ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(value); 11084ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(value); 11094ed5116aSLikun Gao value += incr; 11104ed5116aSLikun Gao } 11114ed5116aSLikun Gao } 11124ed5116aSLikun Gao 11134ed5116aSLikun Gao /** 11144ed5116aSLikun Gao * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA 11154ed5116aSLikun Gao * 11164ed5116aSLikun Gao * @ib: indirect buffer to fill with commands 11174ed5116aSLikun Gao * @pe: addr of the page entry 11184ed5116aSLikun Gao * @addr: dst addr to write into pe 11194ed5116aSLikun Gao * @count: number of page entries to update 11204ed5116aSLikun Gao * @incr: increase next addr by incr bytes 11214ed5116aSLikun Gao * @flags: access flags 11224ed5116aSLikun Gao * 11234ed5116aSLikun Gao * Update the page tables using sDMA. 11244ed5116aSLikun Gao */ 11254ed5116aSLikun Gao static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib, 11264ed5116aSLikun Gao uint64_t pe, 11274ed5116aSLikun Gao uint64_t addr, unsigned count, 11284ed5116aSLikun Gao uint32_t incr, uint64_t flags) 11294ed5116aSLikun Gao { 11304ed5116aSLikun Gao /* for physically contiguous pages (vram) */ 11311c85f126SMukul Joshi u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE); 11321c85f126SMukul Joshi 11331c85f126SMukul Joshi if (amdgpu_mtype_local) 11341c85f126SMukul Joshi header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3); 11351c85f126SMukul Joshi else 11361c85f126SMukul Joshi header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) | 11371c85f126SMukul Joshi SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) | 11381c85f126SMukul Joshi SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3)); 11391c85f126SMukul Joshi 11401c85f126SMukul Joshi ib->ptr[ib->length_dw++] = header; 11414ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */ 11424ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(pe); 11434ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */ 11444ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(flags); 11454ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */ 11464ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(addr); 11474ed5116aSLikun Gao ib->ptr[ib->length_dw++] = incr; /* increment size */ 11484ed5116aSLikun Gao ib->ptr[ib->length_dw++] = 0; 11494ed5116aSLikun Gao ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ 11504ed5116aSLikun Gao } 11514ed5116aSLikun Gao 11524ed5116aSLikun Gao /** 11534ed5116aSLikun Gao * sdma_v7_1_ring_pad_ib - pad the IB 11544ed5116aSLikun Gao * 11554ed5116aSLikun Gao * @ring: amdgpu ring pointer 11564ed5116aSLikun Gao * @ib: indirect buffer to fill with padding 11574ed5116aSLikun Gao * 11584ed5116aSLikun Gao * Pad the IB with NOPs to a boundary multiple of 8. 11594ed5116aSLikun Gao */ 11604ed5116aSLikun Gao static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) 11614ed5116aSLikun Gao { 11624ed5116aSLikun Gao struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); 11634ed5116aSLikun Gao u32 pad_count; 11644ed5116aSLikun Gao int i; 11654ed5116aSLikun Gao 11664ed5116aSLikun Gao pad_count = (-ib->length_dw) & 0x7; 11674ed5116aSLikun Gao for (i = 0; i < pad_count; i++) 11684ed5116aSLikun Gao if (sdma && sdma->burst_nop && (i == 0)) 11694ed5116aSLikun Gao ib->ptr[ib->length_dw++] = 11704ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) | 11714ed5116aSLikun Gao SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); 11724ed5116aSLikun Gao else 11734ed5116aSLikun Gao ib->ptr[ib->length_dw++] = 11744ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP); 11754ed5116aSLikun Gao } 11764ed5116aSLikun Gao 11774ed5116aSLikun Gao /** 11784ed5116aSLikun Gao * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline 11794ed5116aSLikun Gao * 11804ed5116aSLikun Gao * @ring: amdgpu_ring pointer 11814ed5116aSLikun Gao * 11824ed5116aSLikun Gao * Make sure all previous operations are completed (CIK). 11834ed5116aSLikun Gao */ 11844ed5116aSLikun Gao static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 11854ed5116aSLikun Gao { 11864ed5116aSLikun Gao uint32_t seq = ring->fence_drv.sync_seq; 11874ed5116aSLikun Gao uint64_t addr = ring->fence_drv.gpu_addr; 11884ed5116aSLikun Gao 11894ed5116aSLikun Gao /* wait for idle */ 11904ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | 11914ed5116aSLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ 11924ed5116aSLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1)); 11934ed5116aSLikun Gao amdgpu_ring_write(ring, addr & 0xfffffffc); 11944ed5116aSLikun Gao amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 11954ed5116aSLikun Gao amdgpu_ring_write(ring, seq); /* reference */ 11964ed5116aSLikun Gao amdgpu_ring_write(ring, 0xffffffff); /* mask */ 11974ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 11984ed5116aSLikun Gao SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ 11994ed5116aSLikun Gao } 12004ed5116aSLikun Gao 12014ed5116aSLikun Gao /** 12024ed5116aSLikun Gao * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA 12034ed5116aSLikun Gao * 12044ed5116aSLikun Gao * @ring: amdgpu_ring pointer 12054ed5116aSLikun Gao * @vmid: vmid number to use 12064ed5116aSLikun Gao * @pd_addr: address 12074ed5116aSLikun Gao * 12084ed5116aSLikun Gao * Update the page table base and flush the VM TLB 12094ed5116aSLikun Gao * using sDMA. 12104ed5116aSLikun Gao */ 12114ed5116aSLikun Gao static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring, 12124ed5116aSLikun Gao unsigned vmid, uint64_t pd_addr) 12134ed5116aSLikun Gao { 12144ed5116aSLikun Gao amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 12154ed5116aSLikun Gao } 12164ed5116aSLikun Gao 12174ed5116aSLikun Gao static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring, 12184ed5116aSLikun Gao uint32_t reg, uint32_t val) 12194ed5116aSLikun Gao { 12204ed5116aSLikun Gao /* SRBM WRITE command will not support on sdma v7. 12214ed5116aSLikun Gao * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE 12224ed5116aSLikun Gao */ 12234ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE)); 1224fcc4fc75SLikun Gao amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2); 12254ed5116aSLikun Gao amdgpu_ring_write(ring, val); 12264ed5116aSLikun Gao } 12274ed5116aSLikun Gao 12284ed5116aSLikun Gao static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 12294ed5116aSLikun Gao uint32_t val, uint32_t mask) 12304ed5116aSLikun Gao { 12314ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) | 12324ed5116aSLikun Gao SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ 1233fcc4fc75SLikun Gao amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2); 12344ed5116aSLikun Gao amdgpu_ring_write(ring, 0); 12354ed5116aSLikun Gao amdgpu_ring_write(ring, val); /* reference */ 12364ed5116aSLikun Gao amdgpu_ring_write(ring, mask); /* mask */ 12374ed5116aSLikun Gao amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | 12384ed5116aSLikun Gao SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); 12394ed5116aSLikun Gao } 12404ed5116aSLikun Gao 12414ed5116aSLikun Gao static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 12424ed5116aSLikun Gao uint32_t reg0, uint32_t reg1, 12434ed5116aSLikun Gao uint32_t ref, uint32_t mask) 12444ed5116aSLikun Gao { 12454ed5116aSLikun Gao amdgpu_ring_emit_wreg(ring, reg0, ref); 12464ed5116aSLikun Gao /* wait for a cycle to reset vm_inv_eng*_ack */ 12474ed5116aSLikun Gao amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); 12484ed5116aSLikun Gao amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); 12494ed5116aSLikun Gao } 12504ed5116aSLikun Gao 12514ed5116aSLikun Gao static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block) 12524ed5116aSLikun Gao { 12534ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 12544ed5116aSLikun Gao int r; 12554ed5116aSLikun Gao 12564ed5116aSLikun Gao r = amdgpu_sdma_init_microcode(adev, 0, true); 12574ed5116aSLikun Gao if (r) { 12584ed5116aSLikun Gao DRM_ERROR("Failed to init sdma firmware!\n"); 12594ed5116aSLikun Gao return r; 12604ed5116aSLikun Gao } 12614ed5116aSLikun Gao 12624ed5116aSLikun Gao sdma_v7_1_set_ring_funcs(adev); 12634ed5116aSLikun Gao sdma_v7_1_set_buffer_funcs(adev); 12644ed5116aSLikun Gao sdma_v7_1_set_vm_pte_funcs(adev); 12654ed5116aSLikun Gao sdma_v7_1_set_irq_funcs(adev); 12664ed5116aSLikun Gao sdma_v7_1_set_mqd_funcs(adev); 12674ed5116aSLikun Gao 12684ed5116aSLikun Gao return 0; 12694ed5116aSLikun Gao } 12704ed5116aSLikun Gao 12714ed5116aSLikun Gao static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block) 12724ed5116aSLikun Gao { 12734ed5116aSLikun Gao struct amdgpu_ring *ring; 12744ed5116aSLikun Gao int r, i; 12754ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 12764ed5116aSLikun Gao uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1); 12774ed5116aSLikun Gao uint32_t *ptr; 12784ed5116aSLikun Gao u32 xcc_id; 12794ed5116aSLikun Gao 12804ed5116aSLikun Gao /* SDMA trap event */ 1281db9ca58eSHawking Zhang r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX, 1282e50a6eceSHawking Zhang GFX_12_1_0__SRCID__SDMA_TRAP, 12834ed5116aSLikun Gao &adev->sdma.trap_irq); 12844ed5116aSLikun Gao if (r) 12854ed5116aSLikun Gao return r; 12864ed5116aSLikun Gao 12874ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 12884ed5116aSLikun Gao ring = &adev->sdma.instance[i].ring; 12894ed5116aSLikun Gao ring->ring_obj = NULL; 12904ed5116aSLikun Gao ring->use_doorbell = true; 12914ed5116aSLikun Gao ring->me = i; 129249f47cbfSLikun Gao 129349f47cbfSLikun Gao for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) { 129449f47cbfSLikun Gao if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id)) 129549f47cbfSLikun Gao break; 129649f47cbfSLikun Gao } 12974ed5116aSLikun Gao 12984ed5116aSLikun Gao DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n", 129949f47cbfSLikun Gao xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc, 13004ed5116aSLikun Gao ring->use_doorbell?"true":"false"); 13014ed5116aSLikun Gao 13024ed5116aSLikun Gao ring->doorbell_index = 13034ed5116aSLikun Gao (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset 13044ed5116aSLikun Gao 13054ed5116aSLikun Gao ring->vm_hub = AMDGPU_GFXHUB(xcc_id); 13064ed5116aSLikun Gao sprintf(ring->name, "sdma%d.%d", xcc_id, 130749f47cbfSLikun Gao GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc); 13084ed5116aSLikun Gao r = amdgpu_ring_init(adev, ring, 1024, 13094ed5116aSLikun Gao &adev->sdma.trap_irq, 13104ed5116aSLikun Gao AMDGPU_SDMA_IRQ_INSTANCE0 + i, 13114ed5116aSLikun Gao AMDGPU_RING_PRIO_DEFAULT, NULL); 13124ed5116aSLikun Gao if (r) 13134ed5116aSLikun Gao return r; 13144ed5116aSLikun Gao } 13154ed5116aSLikun Gao 13164ed5116aSLikun Gao adev->sdma.supported_reset = 13174ed5116aSLikun Gao amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); 13184ed5116aSLikun Gao if (!amdgpu_sriov_vf(adev) && 13194ed5116aSLikun Gao !adev->debug_disable_gpu_ring_reset) 13204ed5116aSLikun Gao adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; 13214ed5116aSLikun Gao 13224ed5116aSLikun Gao r = amdgpu_sdma_sysfs_reset_mask_init(adev); 13234ed5116aSLikun Gao if (r) 13244ed5116aSLikun Gao return r; 13254ed5116aSLikun Gao /* Allocate memory for SDMA IP Dump buffer */ 13264ed5116aSLikun Gao ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); 13274ed5116aSLikun Gao if (ptr) 13284ed5116aSLikun Gao adev->sdma.ip_dump = ptr; 13294ed5116aSLikun Gao else 13304ed5116aSLikun Gao DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n"); 13314ed5116aSLikun Gao 13324ed5116aSLikun Gao #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ 13334ed5116aSLikun Gao adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs; 13344ed5116aSLikun Gao #endif 13354ed5116aSLikun Gao 13364ed5116aSLikun Gao return r; 13374ed5116aSLikun Gao } 13384ed5116aSLikun Gao 13394ed5116aSLikun Gao static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block) 13404ed5116aSLikun Gao { 13414ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 13424ed5116aSLikun Gao int i; 13434ed5116aSLikun Gao 13444ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) 13454ed5116aSLikun Gao amdgpu_ring_fini(&adev->sdma.instance[i].ring); 13464ed5116aSLikun Gao 13474ed5116aSLikun Gao amdgpu_sdma_sysfs_reset_mask_fini(adev); 13484ed5116aSLikun Gao amdgpu_sdma_destroy_inst_ctx(adev, true); 13494ed5116aSLikun Gao 13504ed5116aSLikun Gao if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) 135149f47cbfSLikun Gao sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask); 13524ed5116aSLikun Gao 13534ed5116aSLikun Gao kfree(adev->sdma.ip_dump); 13544ed5116aSLikun Gao 13554ed5116aSLikun Gao return 0; 13564ed5116aSLikun Gao } 13574ed5116aSLikun Gao 13584ed5116aSLikun Gao static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block) 13594ed5116aSLikun Gao { 13604ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 136105282873SLikun Gao uint32_t inst_mask; 13624ed5116aSLikun Gao 136305282873SLikun Gao inst_mask = GENMASK(adev->sdma.num_instances - 1, 0); 136405282873SLikun Gao 136505282873SLikun Gao return sdma_v7_1_inst_start(adev, inst_mask); 13664ed5116aSLikun Gao } 13674ed5116aSLikun Gao 13684ed5116aSLikun Gao static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block) 13694ed5116aSLikun Gao { 13704ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 13714ed5116aSLikun Gao 13724ed5116aSLikun Gao if (amdgpu_sriov_vf(adev)) 13734ed5116aSLikun Gao return 0; 13744ed5116aSLikun Gao 137549f47cbfSLikun Gao sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask); 137649f47cbfSLikun Gao sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask); 13774ed5116aSLikun Gao 13784ed5116aSLikun Gao return 0; 13794ed5116aSLikun Gao } 13804ed5116aSLikun Gao 13814ed5116aSLikun Gao static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block) 13824ed5116aSLikun Gao { 13834ed5116aSLikun Gao return sdma_v7_1_hw_fini(ip_block); 13844ed5116aSLikun Gao } 13854ed5116aSLikun Gao 13864ed5116aSLikun Gao static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block) 13874ed5116aSLikun Gao { 13884ed5116aSLikun Gao return sdma_v7_1_hw_init(ip_block); 13894ed5116aSLikun Gao } 13904ed5116aSLikun Gao 13914ed5116aSLikun Gao static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block) 13924ed5116aSLikun Gao { 13934ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 13944ed5116aSLikun Gao u32 i; 13954ed5116aSLikun Gao 13964ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 13974ed5116aSLikun Gao u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG)); 13984ed5116aSLikun Gao 13994ed5116aSLikun Gao if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK)) 14004ed5116aSLikun Gao return false; 14014ed5116aSLikun Gao } 14024ed5116aSLikun Gao 14034ed5116aSLikun Gao return true; 14044ed5116aSLikun Gao } 14054ed5116aSLikun Gao 14064ed5116aSLikun Gao static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block) 14074ed5116aSLikun Gao { 14084ed5116aSLikun Gao unsigned i, j; 14094ed5116aSLikun Gao u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; 14104ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 14114ed5116aSLikun Gao 14124ed5116aSLikun Gao for (i = 0; i < adev->usec_timeout; i++) { 14134ed5116aSLikun Gao for (j = 0; j < adev->sdma.num_instances; j++) { 14144ed5116aSLikun Gao sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev, 14154ed5116aSLikun Gao j, regSDMA0_SDMA_STATUS_REG)); 14164ed5116aSLikun Gao if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK)) 14174ed5116aSLikun Gao break; 14184ed5116aSLikun Gao } 14194ed5116aSLikun Gao if (j == adev->sdma.num_instances) 14204ed5116aSLikun Gao return 0; 14214ed5116aSLikun Gao udelay(1); 14224ed5116aSLikun Gao } 14234ed5116aSLikun Gao return -ETIMEDOUT; 14244ed5116aSLikun Gao } 14254ed5116aSLikun Gao 14264ed5116aSLikun Gao static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring) 14274ed5116aSLikun Gao { 14284ed5116aSLikun Gao int i, r = 0; 14294ed5116aSLikun Gao struct amdgpu_device *adev = ring->adev; 14304ed5116aSLikun Gao u32 index = 0; 14314ed5116aSLikun Gao u64 sdma_gfx_preempt; 14324ed5116aSLikun Gao 14334ed5116aSLikun Gao amdgpu_sdma_get_index_from_ring(ring, &index); 14344ed5116aSLikun Gao sdma_gfx_preempt = 14354ed5116aSLikun Gao sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT); 14364ed5116aSLikun Gao 14374ed5116aSLikun Gao /* assert preemption condition */ 14384ed5116aSLikun Gao amdgpu_ring_set_preempt_cond_exec(ring, false); 14394ed5116aSLikun Gao 14404ed5116aSLikun Gao /* emit the trailing fence */ 14414ed5116aSLikun Gao ring->trail_seq += 1; 14424ed5116aSLikun Gao r = amdgpu_ring_alloc(ring, 10); 14434ed5116aSLikun Gao if (r) { 14444ed5116aSLikun Gao DRM_ERROR("ring %d failed to be allocated \n", ring->idx); 14454ed5116aSLikun Gao return r; 14464ed5116aSLikun Gao } 14474ed5116aSLikun Gao sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr, 14484ed5116aSLikun Gao ring->trail_seq, 0); 14494ed5116aSLikun Gao amdgpu_ring_commit(ring); 14504ed5116aSLikun Gao 14514ed5116aSLikun Gao /* assert IB preemption */ 14524ed5116aSLikun Gao WREG32(sdma_gfx_preempt, 1); 14534ed5116aSLikun Gao 14544ed5116aSLikun Gao /* poll the trailing fence */ 14554ed5116aSLikun Gao for (i = 0; i < adev->usec_timeout; i++) { 14564ed5116aSLikun Gao if (ring->trail_seq == 14574ed5116aSLikun Gao le32_to_cpu(*(ring->trail_fence_cpu_addr))) 14584ed5116aSLikun Gao break; 14594ed5116aSLikun Gao udelay(1); 14604ed5116aSLikun Gao } 14614ed5116aSLikun Gao 14624ed5116aSLikun Gao if (i >= adev->usec_timeout) { 14634ed5116aSLikun Gao r = -EINVAL; 14644ed5116aSLikun Gao DRM_ERROR("ring %d failed to be preempted\n", ring->idx); 14654ed5116aSLikun Gao } 14664ed5116aSLikun Gao 14674ed5116aSLikun Gao /* deassert IB preemption */ 14684ed5116aSLikun Gao WREG32(sdma_gfx_preempt, 0); 14694ed5116aSLikun Gao 14704ed5116aSLikun Gao /* deassert the preemption condition */ 14714ed5116aSLikun Gao amdgpu_ring_set_preempt_cond_exec(ring, true); 14724ed5116aSLikun Gao return r; 14734ed5116aSLikun Gao } 14744ed5116aSLikun Gao 14754ed5116aSLikun Gao static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev, 14764ed5116aSLikun Gao struct amdgpu_irq_src *source, 14774ed5116aSLikun Gao unsigned type, 14784ed5116aSLikun Gao enum amdgpu_interrupt_state state) 14794ed5116aSLikun Gao { 14804ed5116aSLikun Gao u32 sdma_cntl; 14814ed5116aSLikun Gao 14824ed5116aSLikun Gao u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL); 14834ed5116aSLikun Gao 14844ed5116aSLikun Gao sdma_cntl = RREG32(reg_offset); 14854ed5116aSLikun Gao sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE, 14864ed5116aSLikun Gao state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 14874ed5116aSLikun Gao WREG32(reg_offset, sdma_cntl); 14884ed5116aSLikun Gao 14894ed5116aSLikun Gao return 0; 14904ed5116aSLikun Gao } 14914ed5116aSLikun Gao 14924ed5116aSLikun Gao static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev, 14934ed5116aSLikun Gao struct amdgpu_irq_src *source, 14944ed5116aSLikun Gao struct amdgpu_iv_entry *entry) 14954ed5116aSLikun Gao { 149649f47cbfSLikun Gao int inst, instances, queue, xcc_id = 0; 14974ed5116aSLikun Gao 14984ed5116aSLikun Gao DRM_DEBUG("IH: SDMA trap\n"); 14994ed5116aSLikun Gao 1500*f7e06786STvrtko Ursulin if (drm_WARN_ON_ONCE(&adev->ddev, 1501*f7e06786STvrtko Ursulin adev->enable_mes && 1502*f7e06786STvrtko Ursulin (entry->src_data[0] & AMDGPU_FENCE_MES_QUEUE_FLAG))) 15034ed5116aSLikun Gao return 0; 15044ed5116aSLikun Gao 15054ed5116aSLikun Gao queue = entry->ring_id & 0xf; 15064ed5116aSLikun Gao if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc) 15074ed5116aSLikun Gao xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id); 15084ed5116aSLikun Gao else 15094ed5116aSLikun Gao dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n"); 151049f47cbfSLikun Gao inst = ((entry->ring_id & 0xf0) >> 4) + 151149f47cbfSLikun Gao GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc; 151249f47cbfSLikun Gao for (instances = 0; instances < adev->sdma.num_instances; instances++) { 151349f47cbfSLikun Gao if (inst == GET_INST(SDMA0, instances)) 151449f47cbfSLikun Gao break; 151549f47cbfSLikun Gao } 15164ed5116aSLikun Gao if (instances > adev->sdma.num_instances - 1) { 15174ed5116aSLikun Gao DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n"); 15184ed5116aSLikun Gao return -EINVAL; 15194ed5116aSLikun Gao } 15204ed5116aSLikun Gao 15214ed5116aSLikun Gao switch (entry->client_id) { 1522db9ca58eSHawking Zhang case SOC_V1_0_IH_CLIENTID_GFX: 15234ed5116aSLikun Gao switch (queue) { 15244ed5116aSLikun Gao case 0: 15254ed5116aSLikun Gao amdgpu_fence_process(&adev->sdma.instance[instances].ring); 15264ed5116aSLikun Gao break; 15274ed5116aSLikun Gao default: 15284ed5116aSLikun Gao break; 15294ed5116aSLikun Gao } 15304ed5116aSLikun Gao break; 15314ed5116aSLikun Gao } 15324ed5116aSLikun Gao return 0; 15334ed5116aSLikun Gao } 15344ed5116aSLikun Gao 15354ed5116aSLikun Gao static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev, 15364ed5116aSLikun Gao struct amdgpu_irq_src *source, 15374ed5116aSLikun Gao struct amdgpu_iv_entry *entry) 15384ed5116aSLikun Gao { 15394ed5116aSLikun Gao return 0; 15404ed5116aSLikun Gao } 15414ed5116aSLikun Gao 15424ed5116aSLikun Gao static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, 15434ed5116aSLikun Gao enum amd_clockgating_state state) 15444ed5116aSLikun Gao { 15454ed5116aSLikun Gao return 0; 15464ed5116aSLikun Gao } 15474ed5116aSLikun Gao 15484ed5116aSLikun Gao static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block, 15494ed5116aSLikun Gao enum amd_powergating_state state) 15504ed5116aSLikun Gao { 15514ed5116aSLikun Gao return 0; 15524ed5116aSLikun Gao } 15534ed5116aSLikun Gao 15544ed5116aSLikun Gao static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, 15554ed5116aSLikun Gao u64 *flags) 15564ed5116aSLikun Gao { 15574ed5116aSLikun Gao } 15584ed5116aSLikun Gao 15594ed5116aSLikun Gao static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) 15604ed5116aSLikun Gao { 15614ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 15624ed5116aSLikun Gao int i, j; 15634ed5116aSLikun Gao uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1); 15644ed5116aSLikun Gao uint32_t instance_offset; 15654ed5116aSLikun Gao 15664ed5116aSLikun Gao if (!adev->sdma.ip_dump) 15674ed5116aSLikun Gao return; 15684ed5116aSLikun Gao 15694ed5116aSLikun Gao drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances); 15704ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 15714ed5116aSLikun Gao instance_offset = i * reg_count; 15724ed5116aSLikun Gao drm_printf(p, "\nInstance:%d\n", i); 15734ed5116aSLikun Gao 15744ed5116aSLikun Gao for (j = 0; j < reg_count; j++) 15754ed5116aSLikun Gao drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name, 15764ed5116aSLikun Gao adev->sdma.ip_dump[instance_offset + j]); 15774ed5116aSLikun Gao } 15784ed5116aSLikun Gao } 15794ed5116aSLikun Gao 15804ed5116aSLikun Gao static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block) 15814ed5116aSLikun Gao { 15824ed5116aSLikun Gao struct amdgpu_device *adev = ip_block->adev; 15834ed5116aSLikun Gao int i, j; 15844ed5116aSLikun Gao uint32_t instance_offset; 15854ed5116aSLikun Gao uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1); 15864ed5116aSLikun Gao 15874ed5116aSLikun Gao if (!adev->sdma.ip_dump) 15884ed5116aSLikun Gao return; 15894ed5116aSLikun Gao 15904ed5116aSLikun Gao amdgpu_gfx_off_ctrl(adev, false); 15914ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 15924ed5116aSLikun Gao instance_offset = i * reg_count; 15934ed5116aSLikun Gao for (j = 0; j < reg_count; j++) 15944ed5116aSLikun Gao adev->sdma.ip_dump[instance_offset + j] = 15954ed5116aSLikun Gao RREG32(sdma_v7_1_get_reg_offset(adev, i, 15964ed5116aSLikun Gao sdma_reg_list_7_1[j].reg_offset)); 15974ed5116aSLikun Gao } 15984ed5116aSLikun Gao amdgpu_gfx_off_ctrl(adev, true); 15994ed5116aSLikun Gao } 16004ed5116aSLikun Gao 16014ed5116aSLikun Gao const struct amd_ip_funcs sdma_v7_1_ip_funcs = { 16024ed5116aSLikun Gao .name = "sdma_v7_1", 16034ed5116aSLikun Gao .early_init = sdma_v7_1_early_init, 16044ed5116aSLikun Gao .late_init = NULL, 16054ed5116aSLikun Gao .sw_init = sdma_v7_1_sw_init, 16064ed5116aSLikun Gao .sw_fini = sdma_v7_1_sw_fini, 16074ed5116aSLikun Gao .hw_init = sdma_v7_1_hw_init, 16084ed5116aSLikun Gao .hw_fini = sdma_v7_1_hw_fini, 16094ed5116aSLikun Gao .suspend = sdma_v7_1_suspend, 16104ed5116aSLikun Gao .resume = sdma_v7_1_resume, 16114ed5116aSLikun Gao .is_idle = sdma_v7_1_is_idle, 16124ed5116aSLikun Gao .wait_for_idle = sdma_v7_1_wait_for_idle, 16134ed5116aSLikun Gao .soft_reset = sdma_v7_1_soft_reset, 16144ed5116aSLikun Gao .check_soft_reset = sdma_v7_1_check_soft_reset, 16154ed5116aSLikun Gao .set_clockgating_state = sdma_v7_1_set_clockgating_state, 16164ed5116aSLikun Gao .set_powergating_state = sdma_v7_1_set_powergating_state, 16174ed5116aSLikun Gao .get_clockgating_state = sdma_v7_1_get_clockgating_state, 16184ed5116aSLikun Gao .dump_ip_state = sdma_v7_1_dump_ip_state, 16194ed5116aSLikun Gao .print_ip_state = sdma_v7_1_print_ip_state, 16204ed5116aSLikun Gao }; 16214ed5116aSLikun Gao 16224ed5116aSLikun Gao static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = { 16234ed5116aSLikun Gao .type = AMDGPU_RING_TYPE_SDMA, 16244ed5116aSLikun Gao .align_mask = 0xf, 16254ed5116aSLikun Gao .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 16264ed5116aSLikun Gao .support_64bit_ptrs = true, 16274ed5116aSLikun Gao .secure_submission_supported = true, 16284ed5116aSLikun Gao .get_rptr = sdma_v7_1_ring_get_rptr, 16294ed5116aSLikun Gao .get_wptr = sdma_v7_1_ring_get_wptr, 16304ed5116aSLikun Gao .set_wptr = sdma_v7_1_ring_set_wptr, 16314ed5116aSLikun Gao .emit_frame_size = 16324ed5116aSLikun Gao 5 + /* sdma_v7_1_ring_init_cond_exec */ 16334ed5116aSLikun Gao 6 + /* sdma_v7_1_ring_emit_pipeline_sync */ 16344ed5116aSLikun Gao /* sdma_v7_1_ring_emit_vm_flush */ 16354ed5116aSLikun Gao SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 16364ed5116aSLikun Gao SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 16374ed5116aSLikun Gao 10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */ 16384ed5116aSLikun Gao .emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */ 16394ed5116aSLikun Gao .emit_ib = sdma_v7_1_ring_emit_ib, 16404ed5116aSLikun Gao .emit_mem_sync = sdma_v7_1_ring_emit_mem_sync, 16414ed5116aSLikun Gao .emit_fence = sdma_v7_1_ring_emit_fence, 16424ed5116aSLikun Gao .emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync, 16434ed5116aSLikun Gao .emit_vm_flush = sdma_v7_1_ring_emit_vm_flush, 16444ed5116aSLikun Gao .test_ring = sdma_v7_1_ring_test_ring, 16454ed5116aSLikun Gao .test_ib = sdma_v7_1_ring_test_ib, 16464ed5116aSLikun Gao .insert_nop = sdma_v7_1_ring_insert_nop, 16474ed5116aSLikun Gao .pad_ib = sdma_v7_1_ring_pad_ib, 16484ed5116aSLikun Gao .emit_wreg = sdma_v7_1_ring_emit_wreg, 16494ed5116aSLikun Gao .emit_reg_wait = sdma_v7_1_ring_emit_reg_wait, 16504ed5116aSLikun Gao .emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait, 16514ed5116aSLikun Gao .init_cond_exec = sdma_v7_1_ring_init_cond_exec, 16524ed5116aSLikun Gao .preempt_ib = sdma_v7_1_ring_preempt_ib, 16534ed5116aSLikun Gao .reset = sdma_v7_1_reset_queue, 16544ed5116aSLikun Gao }; 16554ed5116aSLikun Gao 16564ed5116aSLikun Gao static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev) 16574ed5116aSLikun Gao { 16584ed5116aSLikun Gao int i, dev_inst; 16594ed5116aSLikun Gao 16604ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 16614ed5116aSLikun Gao adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs; 16624ed5116aSLikun Gao adev->sdma.instance[i].ring.me = i; 16634ed5116aSLikun Gao 16644ed5116aSLikun Gao dev_inst = GET_INST(SDMA0, i); 16654ed5116aSLikun Gao /* XCC to which SDMA belongs depends on physical instance */ 16664ed5116aSLikun Gao adev->sdma.instance[i].xcc_id = 16674ed5116aSLikun Gao dev_inst / adev->sdma.num_inst_per_xcc; 16684ed5116aSLikun Gao } 16694ed5116aSLikun Gao } 16704ed5116aSLikun Gao 16714ed5116aSLikun Gao static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = { 16724ed5116aSLikun Gao .set = sdma_v7_1_set_trap_irq_state, 16734ed5116aSLikun Gao .process = sdma_v7_1_process_trap_irq, 16744ed5116aSLikun Gao }; 16754ed5116aSLikun Gao 16764ed5116aSLikun Gao static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = { 16774ed5116aSLikun Gao .process = sdma_v7_1_process_illegal_inst_irq, 16784ed5116aSLikun Gao }; 16794ed5116aSLikun Gao 16804ed5116aSLikun Gao static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev) 16814ed5116aSLikun Gao { 16824ed5116aSLikun Gao adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + 16834ed5116aSLikun Gao adev->sdma.num_instances; 16844ed5116aSLikun Gao adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs; 16854ed5116aSLikun Gao adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs; 16864ed5116aSLikun Gao } 16874ed5116aSLikun Gao 16884ed5116aSLikun Gao /** 16894ed5116aSLikun Gao * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine 16904ed5116aSLikun Gao * 16914ed5116aSLikun Gao * @ib: indirect buffer to fill with commands 16924ed5116aSLikun Gao * @src_offset: src GPU address 16934ed5116aSLikun Gao * @dst_offset: dst GPU address 16944ed5116aSLikun Gao * @byte_count: number of bytes to xfer 16954ed5116aSLikun Gao * @copy_flags: copy flags for the buffers 16964ed5116aSLikun Gao * 16974ed5116aSLikun Gao * Copy GPU buffers using the DMA engine. 16984ed5116aSLikun Gao * Used by the amdgpu ttm implementation to move pages if 16994ed5116aSLikun Gao * registered as the asic copy callback. 17004ed5116aSLikun Gao */ 17014ed5116aSLikun Gao static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib, 17024ed5116aSLikun Gao uint64_t src_offset, 17034ed5116aSLikun Gao uint64_t dst_offset, 17044ed5116aSLikun Gao uint32_t byte_count, 17054ed5116aSLikun Gao uint32_t copy_flags) 17064ed5116aSLikun Gao { 17074ed5116aSLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | 17084ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | 17094ed5116aSLikun Gao SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); 17104ed5116aSLikun Gao 17114ed5116aSLikun Gao ib->ptr[ib->length_dw++] = byte_count - 1; 17124ed5116aSLikun Gao ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ 17134ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); 17144ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); 17154ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 17164ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 17174ed5116aSLikun Gao } 17184ed5116aSLikun Gao 17194ed5116aSLikun Gao /** 17204ed5116aSLikun Gao * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine 17214ed5116aSLikun Gao * 17224ed5116aSLikun Gao * @ib: indirect buffer to fill 17234ed5116aSLikun Gao * @src_data: value to write to buffer 17244ed5116aSLikun Gao * @dst_offset: dst GPU address 17254ed5116aSLikun Gao * @byte_count: number of bytes to xfer 17264ed5116aSLikun Gao * 17274ed5116aSLikun Gao * Fill GPU buffers using the DMA engine. 17284ed5116aSLikun Gao */ 17294ed5116aSLikun Gao static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib, 17304ed5116aSLikun Gao uint32_t src_data, 17314ed5116aSLikun Gao uint64_t dst_offset, 17324ed5116aSLikun Gao uint32_t byte_count) 17334ed5116aSLikun Gao { 17344ed5116aSLikun Gao ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL); 17354ed5116aSLikun Gao ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); 17364ed5116aSLikun Gao ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); 17374ed5116aSLikun Gao ib->ptr[ib->length_dw++] = src_data; 17384ed5116aSLikun Gao ib->ptr[ib->length_dw++] = byte_count - 1; 17394ed5116aSLikun Gao } 17404ed5116aSLikun Gao 17414ed5116aSLikun Gao static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = { 17424ed5116aSLikun Gao .copy_max_bytes = 0x400000, 17434ed5116aSLikun Gao .copy_num_dw = 8, 17444ed5116aSLikun Gao .emit_copy_buffer = sdma_v7_1_emit_copy_buffer, 17454ed5116aSLikun Gao .fill_max_bytes = 0x400000, 17464ed5116aSLikun Gao .fill_num_dw = 5, 17474ed5116aSLikun Gao .emit_fill_buffer = sdma_v7_1_emit_fill_buffer, 17484ed5116aSLikun Gao }; 17494ed5116aSLikun Gao 17504ed5116aSLikun Gao static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev) 17514ed5116aSLikun Gao { 17524ed5116aSLikun Gao adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs; 17534ed5116aSLikun Gao adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; 17544ed5116aSLikun Gao } 17554ed5116aSLikun Gao 17564ed5116aSLikun Gao static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = { 17574ed5116aSLikun Gao .copy_pte_num_dw = 8, 17584ed5116aSLikun Gao .copy_pte = sdma_v7_1_vm_copy_pte, 17594ed5116aSLikun Gao .write_pte = sdma_v7_1_vm_write_pte, 17604ed5116aSLikun Gao .set_pte_pde = sdma_v7_1_vm_set_pte_pde, 17614ed5116aSLikun Gao }; 17624ed5116aSLikun Gao 17634ed5116aSLikun Gao static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev) 17644ed5116aSLikun Gao { 17654ed5116aSLikun Gao unsigned i; 17664ed5116aSLikun Gao 17674ed5116aSLikun Gao adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs; 17684ed5116aSLikun Gao for (i = 0; i < adev->sdma.num_instances; i++) { 17694ed5116aSLikun Gao adev->vm_manager.vm_pte_scheds[i] = 17704ed5116aSLikun Gao &adev->sdma.instance[i].ring.sched; 17714ed5116aSLikun Gao } 17724ed5116aSLikun Gao adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; 17734ed5116aSLikun Gao } 17744ed5116aSLikun Gao 17754ed5116aSLikun Gao const struct amdgpu_ip_block_version sdma_v7_1_ip_block = { 17764ed5116aSLikun Gao .type = AMD_IP_BLOCK_TYPE_SDMA, 17774ed5116aSLikun Gao .major = 7, 17784ed5116aSLikun Gao .minor = 1, 17794ed5116aSLikun Gao .rev = 0, 17804ed5116aSLikun Gao .funcs = &sdma_v7_1_ip_funcs, 17814ed5116aSLikun Gao }; 17824ed5116aSLikun Gao 17834ed5116aSLikun Gao static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask) 17844ed5116aSLikun Gao { 17854ed5116aSLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle; 17864ed5116aSLikun Gao int r; 17874ed5116aSLikun Gao 17884ed5116aSLikun Gao r = sdma_v7_1_inst_start(adev, inst_mask); 17894ed5116aSLikun Gao 17904ed5116aSLikun Gao return r; 17914ed5116aSLikun Gao } 17924ed5116aSLikun Gao 17934ed5116aSLikun Gao static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask) 17944ed5116aSLikun Gao { 17954ed5116aSLikun Gao struct amdgpu_device *adev = (struct amdgpu_device *)handle; 17964ed5116aSLikun Gao 17974ed5116aSLikun Gao sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask); 17984ed5116aSLikun Gao sdma_v7_1_inst_enable(adev, false, inst_mask); 17994ed5116aSLikun Gao 18004ed5116aSLikun Gao return 0; 18014ed5116aSLikun Gao } 18024ed5116aSLikun Gao 18034ed5116aSLikun Gao struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = { 18044ed5116aSLikun Gao .suspend = &sdma_v7_1_xcp_suspend, 18054ed5116aSLikun Gao .resume = &sdma_v7_1_xcp_resume 18064ed5116aSLikun Gao }; 1807