15056b75fSHawking Zhang /* 25056b75fSHawking Zhang * Copyright 2025 Advanced Micro Devices, Inc. 35056b75fSHawking Zhang * 45056b75fSHawking Zhang * Permission is hereby granted, free of charge, to any person obtaining a 55056b75fSHawking Zhang * copy of this software and associated documentation files (the "Software"), 65056b75fSHawking Zhang * to deal in the Software without restriction, including without limitation 75056b75fSHawking Zhang * the rights to use, copy, modify, merge, publish, distribute, sublicense, 85056b75fSHawking Zhang * and/or sell copies of the Software, and to permit persons to whom the 95056b75fSHawking Zhang * Software is furnished to do so, subject to the following conditions: 105056b75fSHawking Zhang * 115056b75fSHawking Zhang * The above copyright notice and this permission notice shall be included in 125056b75fSHawking Zhang * all copies or substantial portions of the Software. 135056b75fSHawking Zhang * 145056b75fSHawking Zhang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 155056b75fSHawking Zhang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 165056b75fSHawking Zhang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 175056b75fSHawking Zhang * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 185056b75fSHawking Zhang * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 195056b75fSHawking Zhang * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 205056b75fSHawking Zhang * OTHER DEALINGS IN THE SOFTWARE. 215056b75fSHawking Zhang * 225056b75fSHawking Zhang */ 235056b75fSHawking Zhang #include "amdgpu.h" 245056b75fSHawking Zhang #include "gmc_v12_1.h" 255056b75fSHawking Zhang #include "soc15_common.h" 265056b75fSHawking Zhang #include "soc_v1_0_enum.h" 275056b75fSHawking Zhang #include "oss/osssys_7_1_0_offset.h" 285056b75fSHawking Zhang #include "oss/osssys_7_1_0_sh_mask.h" 294c5f7d73SMukul Joshi #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" 304c5f7d73SMukul Joshi 314c5f7d73SMukul Joshi static int gmc_v12_1_vm_fault_interrupt_state(struct amdgpu_device *adev, 324c5f7d73SMukul Joshi struct amdgpu_irq_src *src, 334c5f7d73SMukul Joshi unsigned int type, 344c5f7d73SMukul Joshi enum amdgpu_interrupt_state state) 354c5f7d73SMukul Joshi { 364c5f7d73SMukul Joshi struct amdgpu_vmhub *hub; 374c5f7d73SMukul Joshi u32 tmp, reg, i, j; 384c5f7d73SMukul Joshi 394c5f7d73SMukul Joshi switch (state) { 404c5f7d73SMukul Joshi case AMDGPU_IRQ_STATE_DISABLE: 414c5f7d73SMukul Joshi for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { 424c5f7d73SMukul Joshi hub = &adev->vmhub[j]; 434c5f7d73SMukul Joshi for (i = 0; i < 16; i++) { 444c5f7d73SMukul Joshi reg = hub->vm_context0_cntl + i; 454c5f7d73SMukul Joshi 464c5f7d73SMukul Joshi /* This works because this interrupt is only 474c5f7d73SMukul Joshi * enabled at init/resume and disabled in 484c5f7d73SMukul Joshi * fini/suspend, so the overall state doesn't 494c5f7d73SMukul Joshi * change over the course of suspend/resume. 504c5f7d73SMukul Joshi */ 514c5f7d73SMukul Joshi if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) 524c5f7d73SMukul Joshi continue; 534c5f7d73SMukul Joshi 544c5f7d73SMukul Joshi if (j >= AMDGPU_MMHUB0(0)) 554c5f7d73SMukul Joshi tmp = RREG32_SOC15_IP(MMHUB, reg); 564c5f7d73SMukul Joshi else 574c5f7d73SMukul Joshi tmp = RREG32_XCC(reg, j); 584c5f7d73SMukul Joshi 594c5f7d73SMukul Joshi tmp &= ~hub->vm_cntx_cntl_vm_fault; 604c5f7d73SMukul Joshi 614c5f7d73SMukul Joshi if (j >= AMDGPU_MMHUB0(0)) 624c5f7d73SMukul Joshi WREG32_SOC15_IP(MMHUB, reg, tmp); 634c5f7d73SMukul Joshi else 644c5f7d73SMukul Joshi WREG32_XCC(reg, tmp, j); 654c5f7d73SMukul Joshi } 664c5f7d73SMukul Joshi } 674c5f7d73SMukul Joshi break; 684c5f7d73SMukul Joshi case AMDGPU_IRQ_STATE_ENABLE: 694c5f7d73SMukul Joshi for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { 704c5f7d73SMukul Joshi hub = &adev->vmhub[j]; 714c5f7d73SMukul Joshi for (i = 0; i < 16; i++) { 724c5f7d73SMukul Joshi reg = hub->vm_context0_cntl + i; 734c5f7d73SMukul Joshi 744c5f7d73SMukul Joshi /* This works because this interrupt is only 754c5f7d73SMukul Joshi * enabled at init/resume and disabled in 764c5f7d73SMukul Joshi * fini/suspend, so the overall state doesn't 774c5f7d73SMukul Joshi * change over the course of suspend/resume. 784c5f7d73SMukul Joshi */ 794c5f7d73SMukul Joshi if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) 804c5f7d73SMukul Joshi continue; 814c5f7d73SMukul Joshi 824c5f7d73SMukul Joshi if (j >= AMDGPU_MMHUB0(0)) 834c5f7d73SMukul Joshi tmp = RREG32_SOC15_IP(MMHUB, reg); 844c5f7d73SMukul Joshi else 854c5f7d73SMukul Joshi tmp = RREG32_XCC(reg, j); 864c5f7d73SMukul Joshi 874c5f7d73SMukul Joshi tmp |= hub->vm_cntx_cntl_vm_fault; 884c5f7d73SMukul Joshi 894c5f7d73SMukul Joshi if (j >= AMDGPU_MMHUB0(0)) 904c5f7d73SMukul Joshi WREG32_SOC15_IP(MMHUB, reg, tmp); 914c5f7d73SMukul Joshi else 924c5f7d73SMukul Joshi WREG32_XCC(reg, tmp, j); 934c5f7d73SMukul Joshi } 944c5f7d73SMukul Joshi } 954c5f7d73SMukul Joshi break; 964c5f7d73SMukul Joshi default: 974c5f7d73SMukul Joshi break; 984c5f7d73SMukul Joshi } 994c5f7d73SMukul Joshi 1004c5f7d73SMukul Joshi return 0; 1014c5f7d73SMukul Joshi } 1024c5f7d73SMukul Joshi 1034c5f7d73SMukul Joshi static int gmc_v12_1_process_interrupt(struct amdgpu_device *adev, 1044c5f7d73SMukul Joshi struct amdgpu_irq_src *source, 1054c5f7d73SMukul Joshi struct amdgpu_iv_entry *entry) 1064c5f7d73SMukul Joshi { 1074c5f7d73SMukul Joshi struct amdgpu_task_info *task_info; 1084c5f7d73SMukul Joshi bool retry_fault = false, write_fault = false; 1094c5f7d73SMukul Joshi unsigned int vmhub, node_id; 1104c5f7d73SMukul Joshi struct amdgpu_vmhub *hub; 1114c5f7d73SMukul Joshi uint32_t cam_index = 0; 1124c5f7d73SMukul Joshi const char *hub_name; 1134c5f7d73SMukul Joshi int ret, xcc_id = 0; 1144c5f7d73SMukul Joshi uint32_t status = 0; 1154c5f7d73SMukul Joshi u64 addr; 1164c5f7d73SMukul Joshi 1174c5f7d73SMukul Joshi node_id = entry->node_id; 1184c5f7d73SMukul Joshi 1194c5f7d73SMukul Joshi addr = (u64)entry->src_data[0] << 12; 1208efa1a11SPhilip Yang addr |= ((u64)entry->src_data[1] & 0x1fff) << 44; 1214c5f7d73SMukul Joshi 1224c5f7d73SMukul Joshi if (entry->src_id == UTCL2_1_0__SRCID__RETRY) { 1234c5f7d73SMukul Joshi retry_fault = true; 1244c5f7d73SMukul Joshi write_fault = !!(entry->src_data[1] & 0x200000); 1254c5f7d73SMukul Joshi } 1264c5f7d73SMukul Joshi 127db9ca58eSHawking Zhang if (entry->client_id == SOC_V1_0_IH_CLIENTID_VMC) { 1284c5f7d73SMukul Joshi hub_name = "mmhub0"; 1294c5f7d73SMukul Joshi vmhub = AMDGPU_MMHUB0(node_id / 4); 1304c5f7d73SMukul Joshi } else { 1314c5f7d73SMukul Joshi hub_name = "gfxhub0"; 1324c5f7d73SMukul Joshi if (adev->gfx.funcs->ih_node_to_logical_xcc) { 1334c5f7d73SMukul Joshi xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, 1344c5f7d73SMukul Joshi node_id); 1354c5f7d73SMukul Joshi if (xcc_id < 0) 1364c5f7d73SMukul Joshi xcc_id = 0; 1374c5f7d73SMukul Joshi } 1384c5f7d73SMukul Joshi vmhub = xcc_id; 1394c5f7d73SMukul Joshi } 1404c5f7d73SMukul Joshi 1414c5f7d73SMukul Joshi hub = &adev->vmhub[vmhub]; 1424c5f7d73SMukul Joshi 1434c5f7d73SMukul Joshi if (retry_fault) { 1444c5f7d73SMukul Joshi if (adev->irq.retry_cam_enabled) { 1454c5f7d73SMukul Joshi /* Delegate it to a different ring if the hardware hasn't 1464c5f7d73SMukul Joshi * already done it. 1474c5f7d73SMukul Joshi */ 1484c5f7d73SMukul Joshi if (entry->ih == &adev->irq.ih) { 1494c5f7d73SMukul Joshi amdgpu_irq_delegate(adev, entry, 8); 1504c5f7d73SMukul Joshi return 1; 1514c5f7d73SMukul Joshi } 1524c5f7d73SMukul Joshi 1534c5f7d73SMukul Joshi cam_index = entry->src_data[3] & 0x3ff; 1544c5f7d73SMukul Joshi 1554c5f7d73SMukul Joshi ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, 1564c5f7d73SMukul Joshi addr, entry->timestamp, write_fault); 1574c5f7d73SMukul Joshi WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); 1584c5f7d73SMukul Joshi if (ret) 1594c5f7d73SMukul Joshi return 1; 1604c5f7d73SMukul Joshi } else { 1614c5f7d73SMukul Joshi /* Process it onyl if it's the first fault for this address */ 1624c5f7d73SMukul Joshi if (entry->ih != &adev->irq.ih_soft && 1634c5f7d73SMukul Joshi amdgpu_gmc_filter_faults(adev, entry->ih, addr, entry->pasid, 1644c5f7d73SMukul Joshi entry->timestamp)) 1654c5f7d73SMukul Joshi return 1; 1664c5f7d73SMukul Joshi 1674c5f7d73SMukul Joshi /* Delegate it to a different ring if the hardware hasn't 1684c5f7d73SMukul Joshi * already done it. 1694c5f7d73SMukul Joshi */ 1704c5f7d73SMukul Joshi if (entry->ih == &adev->irq.ih) { 1714c5f7d73SMukul Joshi amdgpu_irq_delegate(adev, entry, 8); 1724c5f7d73SMukul Joshi return 1; 1734c5f7d73SMukul Joshi } 1744c5f7d73SMukul Joshi 1754c5f7d73SMukul Joshi /* Try to handle the recoverable page faults by filling page 1764c5f7d73SMukul Joshi * tables 1774c5f7d73SMukul Joshi */ 1784c5f7d73SMukul Joshi if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, 1794c5f7d73SMukul Joshi addr, entry->timestamp, write_fault)) 1804c5f7d73SMukul Joshi return 1; 1814c5f7d73SMukul Joshi } 1824c5f7d73SMukul Joshi } 1834c5f7d73SMukul Joshi 1844c5f7d73SMukul Joshi if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) 1854c5f7d73SMukul Joshi return 1; 1864c5f7d73SMukul Joshi 1874c5f7d73SMukul Joshi if (!printk_ratelimit()) 1884c5f7d73SMukul Joshi return 0; 1894c5f7d73SMukul Joshi 1904c5f7d73SMukul Joshi dev_err(adev->dev, 1914c5f7d73SMukul Joshi "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, 1924c5f7d73SMukul Joshi retry_fault ? "retry" : "no-retry", 1934c5f7d73SMukul Joshi entry->src_id, entry->ring_id, entry->vmid, entry->pasid); 1944c5f7d73SMukul Joshi 1954c5f7d73SMukul Joshi task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); 1964c5f7d73SMukul Joshi if (task_info) { 1974c5f7d73SMukul Joshi amdgpu_vm_print_task_info(adev, task_info); 1984c5f7d73SMukul Joshi amdgpu_vm_put_task_info(task_info); 1994c5f7d73SMukul Joshi } 2004c5f7d73SMukul Joshi 201db9ca58eSHawking Zhang dev_err(adev->dev, " in page starting at address 0x%016llx from IH client %d (%s)\n", 202db9ca58eSHawking Zhang addr, entry->client_id, soc_v1_0_ih_clientid_name[entry->client_id]); 2034c5f7d73SMukul Joshi 2044c5f7d73SMukul Joshi if (amdgpu_sriov_vf(adev)) 2054c5f7d73SMukul Joshi return 0; 2064c5f7d73SMukul Joshi 2074c5f7d73SMukul Joshi /* 2084c5f7d73SMukul Joshi * Issue a dummy read to wait for the status register to 2094c5f7d73SMukul Joshi * be updated to avoid reading an incorrect value due to 2104c5f7d73SMukul Joshi * the new fast GRBM interface. 2114c5f7d73SMukul Joshi */ 2124c5f7d73SMukul Joshi if (entry->vmid_src == AMDGPU_GFXHUB(0)) 2134c5f7d73SMukul Joshi RREG32(hub->vm_l2_pro_fault_status); 2144c5f7d73SMukul Joshi 2154c5f7d73SMukul Joshi status = RREG32(hub->vm_l2_pro_fault_status); 2164c5f7d73SMukul Joshi 2174c5f7d73SMukul Joshi /* Only print L2 fault status if the status register could be read and 2184c5f7d73SMukul Joshi * contains useful information 2194c5f7d73SMukul Joshi */ 2204c5f7d73SMukul Joshi if (!status) 2214c5f7d73SMukul Joshi return 0; 2224c5f7d73SMukul Joshi 2234c5f7d73SMukul Joshi WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); 2244c5f7d73SMukul Joshi 2254c5f7d73SMukul Joshi amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); 2264c5f7d73SMukul Joshi 2274c5f7d73SMukul Joshi hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); 2284c5f7d73SMukul Joshi 2294c5f7d73SMukul Joshi return 0; 2304c5f7d73SMukul Joshi } 2315056b75fSHawking Zhang 2325056b75fSHawking Zhang static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev, 23344fc86f2SMukul Joshi uint8_t vmid, uint8_t inst, 23444fc86f2SMukul Joshi uint16_t *p_pasid) 2355056b75fSHawking Zhang { 23644fc86f2SMukul Joshi uint16_t index; 23744fc86f2SMukul Joshi 23844fc86f2SMukul Joshi if (inst/4) 23944fc86f2SMukul Joshi index = 0xA + inst%4; 24044fc86f2SMukul Joshi else 24144fc86f2SMukul Joshi index = 0x2 + inst%4; 24244fc86f2SMukul Joshi 24344fc86f2SMukul Joshi WREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_LUT_INDEX), index); 24444fc86f2SMukul Joshi 2455056b75fSHawking Zhang *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff; 2465056b75fSHawking Zhang 2475056b75fSHawking Zhang return !!(*p_pasid); 2485056b75fSHawking Zhang } 2495056b75fSHawking Zhang 2505056b75fSHawking Zhang /* 2515056b75fSHawking Zhang * GART 2525056b75fSHawking Zhang * VMID 0 is the physical GPU addresses as used by the kernel. 2535056b75fSHawking Zhang * VMIDs 1-15 are used for userspace clients and are handled 2545056b75fSHawking Zhang * by the amdgpu vm/hsa code. 2555056b75fSHawking Zhang */ 2565056b75fSHawking Zhang 2575056b75fSHawking Zhang static void gmc_v12_1_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, 2585056b75fSHawking Zhang unsigned int vmhub, uint32_t flush_type) 2595056b75fSHawking Zhang { 2605056b75fSHawking Zhang struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 2615056b75fSHawking Zhang u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); 2625056b75fSHawking Zhang u32 tmp; 2635056b75fSHawking Zhang /* Use register 17 for GART */ 2645056b75fSHawking Zhang const unsigned eng = 17; 2655056b75fSHawking Zhang unsigned int i; 2665056b75fSHawking Zhang unsigned char hub_ip = 0; 2675056b75fSHawking Zhang 268a848986aSLe Ma hub_ip = (AMDGPU_IS_GFXHUB(vmhub)) ? 2695056b75fSHawking Zhang GC_HWIP : MMHUB_HWIP; 2705056b75fSHawking Zhang 2715056b75fSHawking Zhang spin_lock(&adev->gmc.invalidate_lock); 2725056b75fSHawking Zhang 2735056b75fSHawking Zhang WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); 2745056b75fSHawking Zhang 2755056b75fSHawking Zhang /* Wait for ACK with a delay.*/ 2765056b75fSHawking Zhang for (i = 0; i < adev->usec_timeout; i++) { 2775056b75fSHawking Zhang tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + 2785056b75fSHawking Zhang hub->eng_distance * eng, hub_ip); 2795056b75fSHawking Zhang tmp &= 1 << vmid; 2805056b75fSHawking Zhang if (tmp) 2815056b75fSHawking Zhang break; 2825056b75fSHawking Zhang 2835056b75fSHawking Zhang udelay(1); 2845056b75fSHawking Zhang } 2855056b75fSHawking Zhang 2865056b75fSHawking Zhang /* Issue additional private vm invalidation to MMHUB */ 287a848986aSLe Ma if (!AMDGPU_IS_GFXHUB(vmhub) && 2885056b75fSHawking Zhang (hub->vm_l2_bank_select_reserved_cid2) && 2895056b75fSHawking Zhang !amdgpu_sriov_vf(adev)) { 2905056b75fSHawking Zhang inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); 2915056b75fSHawking Zhang /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ 2925056b75fSHawking Zhang inv_req |= (1 << 25); 2935056b75fSHawking Zhang /* Issue private invalidation */ 2945056b75fSHawking Zhang WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req); 2955056b75fSHawking Zhang /* Read back to ensure invalidation is done*/ 2965056b75fSHawking Zhang RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); 2975056b75fSHawking Zhang } 2985056b75fSHawking Zhang 2995056b75fSHawking Zhang spin_unlock(&adev->gmc.invalidate_lock); 3005056b75fSHawking Zhang 3015056b75fSHawking Zhang if (i < adev->usec_timeout) 3025056b75fSHawking Zhang return; 3035056b75fSHawking Zhang 3045056b75fSHawking Zhang dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); 3055056b75fSHawking Zhang } 3065056b75fSHawking Zhang 3075056b75fSHawking Zhang /** 3085056b75fSHawking Zhang * gmc_v12_1_flush_gpu_tlb - gart tlb flush callback 3095056b75fSHawking Zhang * 3105056b75fSHawking Zhang * @adev: amdgpu_device pointer 3115056b75fSHawking Zhang * @vmid: vm instance to flush 3125056b75fSHawking Zhang * @vmhub: which hub to flush 3135056b75fSHawking Zhang * @flush_type: the flush type 3145056b75fSHawking Zhang * 3155056b75fSHawking Zhang * Flush the TLB for the requested page table. 3165056b75fSHawking Zhang */ 3175056b75fSHawking Zhang static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, 3185056b75fSHawking Zhang uint32_t vmhub, uint32_t flush_type) 3195056b75fSHawking Zhang { 320b7c4040dSMukul Joshi u32 inst; 321b7c4040dSMukul Joshi 322acf07acfSLikun Gao if (AMDGPU_IS_GFXHUB(vmhub) && 323acf07acfSLikun Gao !adev->gfx.is_poweron) 324acf07acfSLikun Gao return; 325acf07acfSLikun Gao 326b7c4040dSMukul Joshi if (vmhub >= AMDGPU_MMHUB0(0)) 327b7c4040dSMukul Joshi inst = 0; 328b7c4040dSMukul Joshi else 329b7c4040dSMukul Joshi inst = vmhub; 330b7c4040dSMukul Joshi 3315056b75fSHawking Zhang /* This is necessary for SRIOV as well as for GFXOFF to function 3325056b75fSHawking Zhang * properly under bare metal 3335056b75fSHawking Zhang */ 334b7c4040dSMukul Joshi if (((adev->gfx.kiq[inst].ring.sched.ready || 335b7c4040dSMukul Joshi adev->mes.ring[MES_PIPE_INST(inst, 0)].sched.ready) && 3365056b75fSHawking Zhang (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)))) { 3375056b75fSHawking Zhang struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 3385056b75fSHawking Zhang const unsigned eng = 17; 3395056b75fSHawking Zhang u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); 3405056b75fSHawking Zhang u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; 3415056b75fSHawking Zhang u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 3425056b75fSHawking Zhang 3435056b75fSHawking Zhang amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 344b7c4040dSMukul Joshi 1 << vmid, inst); 3455056b75fSHawking Zhang return; 3465056b75fSHawking Zhang } 3475056b75fSHawking Zhang 3485056b75fSHawking Zhang mutex_lock(&adev->mman.gtt_window_lock); 3495056b75fSHawking Zhang gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0); 3505056b75fSHawking Zhang mutex_unlock(&adev->mman.gtt_window_lock); 3515056b75fSHawking Zhang return; 3525056b75fSHawking Zhang } 3535056b75fSHawking Zhang 3545056b75fSHawking Zhang /** 3555056b75fSHawking Zhang * gmc_v12_1_flush_gpu_tlb_pasid - tlb flush via pasid 3565056b75fSHawking Zhang * 3575056b75fSHawking Zhang * @adev: amdgpu_device pointer 3585056b75fSHawking Zhang * @pasid: pasid to be flush 3595056b75fSHawking Zhang * @flush_type: the flush type 3605056b75fSHawking Zhang * @all_hub: flush all hubs 3615056b75fSHawking Zhang * @inst: is used to select which instance of KIQ to use for the invalidation 3625056b75fSHawking Zhang * 3635056b75fSHawking Zhang * Flush the TLB for the requested pasid. 3645056b75fSHawking Zhang */ 3655056b75fSHawking Zhang static void gmc_v12_1_flush_gpu_tlb_pasid(struct amdgpu_device *adev, 3665056b75fSHawking Zhang uint16_t pasid, uint32_t flush_type, 3675056b75fSHawking Zhang bool all_hub, uint32_t inst) 3685056b75fSHawking Zhang { 3695056b75fSHawking Zhang uint16_t queried; 3705056b75fSHawking Zhang int vmid, i; 3715056b75fSHawking Zhang 372*d0c989a0SShaoyun Liu if (adev->enable_uni_mes && adev->mes.ring[0].sched.ready && 373*d0c989a0SShaoyun Liu (adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x6f) { 374*d0c989a0SShaoyun Liu struct mes_inv_tlbs_pasid_input input = {0}; 375*d0c989a0SShaoyun Liu input.xcc_id = inst; 376*d0c989a0SShaoyun Liu input.pasid = pasid; 377*d0c989a0SShaoyun Liu input.flush_type = flush_type; 378*d0c989a0SShaoyun Liu 379*d0c989a0SShaoyun Liu /* MES will invalidate hubs for the device(including slave xcc) from master, ignore request from slave */ 380*d0c989a0SShaoyun Liu if (!amdgpu_gfx_is_master_xcc(adev, inst)) 381*d0c989a0SShaoyun Liu return; 382*d0c989a0SShaoyun Liu 383*d0c989a0SShaoyun Liu input.hub_id = AMDGPU_GFXHUB(0); 384*d0c989a0SShaoyun Liu adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); 385*d0c989a0SShaoyun Liu 386*d0c989a0SShaoyun Liu if (all_hub) { 387*d0c989a0SShaoyun Liu /* invalidate mm_hub */ 388*d0c989a0SShaoyun Liu if (test_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask)) { 389*d0c989a0SShaoyun Liu input.hub_id = AMDGPU_MMHUB0(0); 390*d0c989a0SShaoyun Liu adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); 391*d0c989a0SShaoyun Liu } 392*d0c989a0SShaoyun Liu if (test_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask)) { 393*d0c989a0SShaoyun Liu input.hub_id = AMDGPU_MMHUB1(0); 394*d0c989a0SShaoyun Liu adev->mes.funcs->invalidate_tlbs_pasid(&adev->mes, &input); 395*d0c989a0SShaoyun Liu } 396*d0c989a0SShaoyun Liu } 397*d0c989a0SShaoyun Liu return; 398*d0c989a0SShaoyun Liu } 399*d0c989a0SShaoyun Liu 4005056b75fSHawking Zhang for (vmid = 1; vmid < 16; vmid++) { 4015056b75fSHawking Zhang bool valid; 4025056b75fSHawking Zhang 40344fc86f2SMukul Joshi valid = gmc_v12_1_get_vmid_pasid_mapping_info(adev, vmid, inst, 4045056b75fSHawking Zhang &queried); 4055056b75fSHawking Zhang if (!valid || queried != pasid) 4065056b75fSHawking Zhang continue; 4075056b75fSHawking Zhang 4085056b75fSHawking Zhang if (all_hub) { 4095056b75fSHawking Zhang for_each_set_bit(i, adev->vmhubs_mask, 4105056b75fSHawking Zhang AMDGPU_MAX_VMHUBS) 4115056b75fSHawking Zhang gmc_v12_1_flush_gpu_tlb(adev, vmid, i, 4125056b75fSHawking Zhang flush_type); 4135056b75fSHawking Zhang } else { 4140c9ad472SMukul Joshi gmc_v12_1_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(inst), 4155056b75fSHawking Zhang flush_type); 4165056b75fSHawking Zhang } 4175056b75fSHawking Zhang } 4185056b75fSHawking Zhang } 4195056b75fSHawking Zhang 4205056b75fSHawking Zhang static uint64_t gmc_v12_1_emit_flush_gpu_tlb(struct amdgpu_ring *ring, 4215056b75fSHawking Zhang unsigned vmid, uint64_t pd_addr) 4225056b75fSHawking Zhang { 4235056b75fSHawking Zhang struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 4245056b75fSHawking Zhang uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); 4255056b75fSHawking Zhang unsigned eng = ring->vm_inv_eng; 4265056b75fSHawking Zhang 4275056b75fSHawking Zhang amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + 4285056b75fSHawking Zhang (hub->ctx_addr_distance * vmid), 4295056b75fSHawking Zhang lower_32_bits(pd_addr)); 4305056b75fSHawking Zhang 4315056b75fSHawking Zhang amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + 4325056b75fSHawking Zhang (hub->ctx_addr_distance * vmid), 4335056b75fSHawking Zhang upper_32_bits(pd_addr)); 4345056b75fSHawking Zhang 4355056b75fSHawking Zhang amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + 4365056b75fSHawking Zhang hub->eng_distance * eng, 4375056b75fSHawking Zhang hub->vm_inv_eng0_ack + 4385056b75fSHawking Zhang hub->eng_distance * eng, 4395056b75fSHawking Zhang req, 1 << vmid); 4405056b75fSHawking Zhang 4415056b75fSHawking Zhang return pd_addr; 4425056b75fSHawking Zhang } 4435056b75fSHawking Zhang 4445056b75fSHawking Zhang static void gmc_v12_1_emit_pasid_mapping(struct amdgpu_ring *ring, 4455056b75fSHawking Zhang unsigned vmid, unsigned pasid) 4465056b75fSHawking Zhang { 4475056b75fSHawking Zhang struct amdgpu_device *adev = ring->adev; 4485056b75fSHawking Zhang uint32_t reg; 4495056b75fSHawking Zhang 4505056b75fSHawking Zhang if (ring->vm_hub == AMDGPU_GFXHUB(0)) 4515056b75fSHawking Zhang reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; 4525056b75fSHawking Zhang else 4535056b75fSHawking Zhang reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; 4545056b75fSHawking Zhang 4555056b75fSHawking Zhang amdgpu_ring_emit_wreg(ring, reg, pasid); 4565056b75fSHawking Zhang } 4575056b75fSHawking Zhang 4585056b75fSHawking Zhang /* 4595056b75fSHawking Zhang * PTE format: 4605056b75fSHawking Zhang * 63 P 4615056b75fSHawking Zhang * 62:59 reserved 4625056b75fSHawking Zhang * 58 D 4635056b75fSHawking Zhang * 57 G 4645056b75fSHawking Zhang * 56 T 4655056b75fSHawking Zhang * 55:54 M 4665056b75fSHawking Zhang * 53:52 SW 4675056b75fSHawking Zhang * 51:48 reserved for future 4685056b75fSHawking Zhang * 47:12 4k physical page base address 4695056b75fSHawking Zhang * 11:7 fragment 4705056b75fSHawking Zhang * 6 write 4715056b75fSHawking Zhang * 5 read 4725056b75fSHawking Zhang * 4 exe 4735056b75fSHawking Zhang * 3 Z 4745056b75fSHawking Zhang * 2 snooped 4755056b75fSHawking Zhang * 1 system 4765056b75fSHawking Zhang * 0 valid 4775056b75fSHawking Zhang * 4785056b75fSHawking Zhang * PDE format: 4795056b75fSHawking Zhang * 63 P 4805056b75fSHawking Zhang * 62:58 block fragment size 4815056b75fSHawking Zhang * 57 reserved 4825056b75fSHawking Zhang * 56 A 4835056b75fSHawking Zhang * 55:54 M 4845056b75fSHawking Zhang * 53:52 reserved 4855056b75fSHawking Zhang * 51:48 reserved for future 4865056b75fSHawking Zhang * 47:6 physical base address of PD or PTE 4875056b75fSHawking Zhang * 5:3 reserved 4885056b75fSHawking Zhang * 2 C 4895056b75fSHawking Zhang * 1 system 4905056b75fSHawking Zhang * 0 valid 4915056b75fSHawking Zhang */ 4925056b75fSHawking Zhang 4935056b75fSHawking Zhang static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level, 4945056b75fSHawking Zhang uint64_t *addr, uint64_t *flags) 4955056b75fSHawking Zhang { 4965056b75fSHawking Zhang if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM)) 4975056b75fSHawking Zhang *addr = adev->vm_manager.vram_base_offset + *addr - 4985056b75fSHawking Zhang adev->gmc.vram_start; 4995056b75fSHawking Zhang BUG_ON(*addr & 0xFFFF00000000003FULL); 5005056b75fSHawking Zhang 501b8c27208SMukul Joshi *flags |= AMDGPU_PTE_SNOOPED; 502b8c27208SMukul Joshi 5035056b75fSHawking Zhang if (!adev->gmc.translate_further) 5045056b75fSHawking Zhang return; 5055056b75fSHawking Zhang 5065056b75fSHawking Zhang if (level == AMDGPU_VM_PDB1) { 5075056b75fSHawking Zhang /* Set the block fragment size */ 5085056b75fSHawking Zhang if (!(*flags & AMDGPU_PDE_PTE_GFX12)) 5095056b75fSHawking Zhang *flags |= AMDGPU_PDE_BFS_GFX12(0x9); 5105056b75fSHawking Zhang 5115056b75fSHawking Zhang } else if (level == AMDGPU_VM_PDB0) { 5125056b75fSHawking Zhang if (*flags & AMDGPU_PDE_PTE_GFX12) 5135056b75fSHawking Zhang *flags &= ~AMDGPU_PDE_PTE_GFX12; 5145056b75fSHawking Zhang } 5155056b75fSHawking Zhang } 5165056b75fSHawking Zhang 517c93f9b7dSMukul Joshi static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev, 518c93f9b7dSMukul Joshi struct amdgpu_bo *bo, 519c93f9b7dSMukul Joshi uint64_t *flags) 520c93f9b7dSMukul Joshi { 521c93f9b7dSMukul Joshi struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); 522c93f9b7dSMukul Joshi bool is_vram = bo->tbo.resource && 523c93f9b7dSMukul Joshi bo->tbo.resource->mem_type == TTM_PL_VRAM; 524c93f9b7dSMukul Joshi bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 525c93f9b7dSMukul Joshi AMDGPU_GEM_CREATE_EXT_COHERENT); 526c93f9b7dSMukul Joshi bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; 527c93f9b7dSMukul Joshi uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); 528c93f9b7dSMukul Joshi bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; 529c93f9b7dSMukul Joshi unsigned int mtype, mtype_local; 530c93f9b7dSMukul Joshi bool snoop = false; 5312d70a10dSMukul Joshi bool is_local = false; 532c93f9b7dSMukul Joshi 533c93f9b7dSMukul Joshi switch (gc_ip_version) { 534c93f9b7dSMukul Joshi case IP_VERSION(12, 1, 0): 535c93f9b7dSMukul Joshi mtype_local = MTYPE_RW; 536c93f9b7dSMukul Joshi if (amdgpu_mtype_local == 1) { 537c93f9b7dSMukul Joshi DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); 538c93f9b7dSMukul Joshi mtype_local = MTYPE_NC; 539c93f9b7dSMukul Joshi } else if (amdgpu_mtype_local == 2) { 540c93f9b7dSMukul Joshi DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n"); 541c93f9b7dSMukul Joshi } else { 542c93f9b7dSMukul Joshi DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); 543c93f9b7dSMukul Joshi } 544c93f9b7dSMukul Joshi 545c93f9b7dSMukul Joshi is_local = (is_vram && adev == bo_adev); 546c93f9b7dSMukul Joshi snoop = true; 547c93f9b7dSMukul Joshi if (uncached) { 548c93f9b7dSMukul Joshi mtype = MTYPE_UC; 549c93f9b7dSMukul Joshi } else if (ext_coherent) { 550c93f9b7dSMukul Joshi mtype = is_local ? mtype_local : MTYPE_UC; 551c93f9b7dSMukul Joshi } else { 552c93f9b7dSMukul Joshi if (is_local) 553c93f9b7dSMukul Joshi mtype = mtype_local; 554c93f9b7dSMukul Joshi else 555c93f9b7dSMukul Joshi mtype = MTYPE_NC; 556c93f9b7dSMukul Joshi } 557c93f9b7dSMukul Joshi break; 558c93f9b7dSMukul Joshi default: 559c93f9b7dSMukul Joshi if (uncached || coherent) 560c93f9b7dSMukul Joshi mtype = MTYPE_UC; 561c93f9b7dSMukul Joshi else 562c93f9b7dSMukul Joshi mtype = MTYPE_NC; 563c93f9b7dSMukul Joshi } 564c93f9b7dSMukul Joshi 565c93f9b7dSMukul Joshi if (mtype != MTYPE_NC) 566c93f9b7dSMukul Joshi *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype); 567c93f9b7dSMukul Joshi 5682d70a10dSMukul Joshi if (is_local || adev->have_atomics_support) 5692d70a10dSMukul Joshi *flags |= AMDGPU_PTE_BUS_ATOMICS; 5702d70a10dSMukul Joshi 571c93f9b7dSMukul Joshi *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; 572c93f9b7dSMukul Joshi } 573c93f9b7dSMukul Joshi 5745056b75fSHawking Zhang static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev, 5755056b75fSHawking Zhang struct amdgpu_vm *vm, 5765056b75fSHawking Zhang struct amdgpu_bo *bo, 5775056b75fSHawking Zhang uint32_t vm_flags, 5785056b75fSHawking Zhang uint64_t *flags) 5795056b75fSHawking Zhang { 5805056b75fSHawking Zhang if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) 5815056b75fSHawking Zhang *flags |= AMDGPU_PTE_EXECUTABLE; 5825056b75fSHawking Zhang else 5835056b75fSHawking Zhang *flags &= ~AMDGPU_PTE_EXECUTABLE; 5845056b75fSHawking Zhang 5855056b75fSHawking Zhang switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { 5865056b75fSHawking Zhang case AMDGPU_VM_MTYPE_DEFAULT: 5875056b75fSHawking Zhang *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); 5885056b75fSHawking Zhang break; 5895056b75fSHawking Zhang case AMDGPU_VM_MTYPE_NC: 5905056b75fSHawking Zhang default: 5915056b75fSHawking Zhang *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); 5925056b75fSHawking Zhang break; 5935a8c343dSMukul Joshi case AMDGPU_VM_MTYPE_RW: 5945a8c343dSMukul Joshi *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_RW); 5955a8c343dSMukul Joshi break; 5965056b75fSHawking Zhang case AMDGPU_VM_MTYPE_UC: 5975056b75fSHawking Zhang *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); 5985056b75fSHawking Zhang break; 5995056b75fSHawking Zhang } 6005056b75fSHawking Zhang 601382dd7d2SMukul Joshi if ((*flags & AMDGPU_PTE_VALID) && bo) 602382dd7d2SMukul Joshi gmc_v12_1_get_coherence_flags(adev, bo, flags); 6035056b75fSHawking Zhang } 6045056b75fSHawking Zhang 6055056b75fSHawking Zhang static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = { 6065056b75fSHawking Zhang .flush_gpu_tlb = gmc_v12_1_flush_gpu_tlb, 6075056b75fSHawking Zhang .flush_gpu_tlb_pasid = gmc_v12_1_flush_gpu_tlb_pasid, 6085056b75fSHawking Zhang .emit_flush_gpu_tlb = gmc_v12_1_emit_flush_gpu_tlb, 6095056b75fSHawking Zhang .emit_pasid_mapping = gmc_v12_1_emit_pasid_mapping, 6105056b75fSHawking Zhang .get_vm_pde = gmc_v12_1_get_vm_pde, 6115056b75fSHawking Zhang .get_vm_pte = gmc_v12_1_get_vm_pte, 61251b9bb61SHawking Zhang .query_mem_partition_mode = &amdgpu_gmc_query_memory_partition, 61351b9bb61SHawking Zhang .request_mem_partition_mode = &amdgpu_gmc_request_memory_partition, 6145056b75fSHawking Zhang }; 6155056b75fSHawking Zhang 6165056b75fSHawking Zhang void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev) 6175056b75fSHawking Zhang { 6185056b75fSHawking Zhang adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs; 6195056b75fSHawking Zhang } 6204c5f7d73SMukul Joshi 6214c5f7d73SMukul Joshi static const struct amdgpu_irq_src_funcs gmc_v12_1_irq_funcs = { 6224c5f7d73SMukul Joshi .set = gmc_v12_1_vm_fault_interrupt_state, 6234c5f7d73SMukul Joshi .process = gmc_v12_1_process_interrupt, 6244c5f7d73SMukul Joshi }; 6254c5f7d73SMukul Joshi 6264c5f7d73SMukul Joshi void gmc_v12_1_set_irq_funcs(struct amdgpu_device *adev) 6274c5f7d73SMukul Joshi { 6284c5f7d73SMukul Joshi adev->gmc.vm_fault.num_types = 1; 6294c5f7d73SMukul Joshi adev->gmc.vm_fault.funcs = &gmc_v12_1_irq_funcs; 6304c5f7d73SMukul Joshi } 631e2a6a4e6SHawking Zhang 632e2a6a4e6SHawking Zhang void gmc_v12_1_init_vram_info(struct amdgpu_device *adev) 633e2a6a4e6SHawking Zhang { 634e2a6a4e6SHawking Zhang /* TODO: query vram_info from ip discovery binary */ 635e2a6a4e6SHawking Zhang adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM4; 636e2a6a4e6SHawking Zhang adev->gmc.vram_width = 384 * 64; 637e2a6a4e6SHawking Zhang } 638