1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include "amdgpu.h" 24 #include "gmc_v12_1.h" 25 #include "soc15_common.h" 26 #include "soc_v1_0_enum.h" 27 #include "oss/osssys_7_1_0_offset.h" 28 #include "oss/osssys_7_1_0_sh_mask.h" 29 30 static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev, 31 uint8_t vmid, uint16_t *p_pasid) 32 { 33 *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff; 34 35 return !!(*p_pasid); 36 } 37 38 /* 39 * GART 40 * VMID 0 is the physical GPU addresses as used by the kernel. 41 * VMIDs 1-15 are used for userspace clients and are handled 42 * by the amdgpu vm/hsa code. 43 */ 44 45 static void gmc_v12_1_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, 46 unsigned int vmhub, uint32_t flush_type) 47 { 48 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 49 u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); 50 u32 tmp; 51 /* Use register 17 for GART */ 52 const unsigned eng = 17; 53 unsigned int i; 54 unsigned char hub_ip = 0; 55 56 hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? 57 GC_HWIP : MMHUB_HWIP; 58 59 spin_lock(&adev->gmc.invalidate_lock); 60 61 WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); 62 63 /* Wait for ACK with a delay.*/ 64 for (i = 0; i < adev->usec_timeout; i++) { 65 tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + 66 hub->eng_distance * eng, hub_ip); 67 tmp &= 1 << vmid; 68 if (tmp) 69 break; 70 71 udelay(1); 72 } 73 74 /* Issue additional private vm invalidation to MMHUB */ 75 if ((vmhub != AMDGPU_GFXHUB(0)) && 76 (hub->vm_l2_bank_select_reserved_cid2) && 77 !amdgpu_sriov_vf(adev)) { 78 inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); 79 /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ 80 inv_req |= (1 << 25); 81 /* Issue private invalidation */ 82 WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req); 83 /* Read back to ensure invalidation is done*/ 84 RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); 85 } 86 87 spin_unlock(&adev->gmc.invalidate_lock); 88 89 if (i < adev->usec_timeout) 90 return; 91 92 dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); 93 } 94 95 /** 96 * gmc_v12_1_flush_gpu_tlb - gart tlb flush callback 97 * 98 * @adev: amdgpu_device pointer 99 * @vmid: vm instance to flush 100 * @vmhub: which hub to flush 101 * @flush_type: the flush type 102 * 103 * Flush the TLB for the requested page table. 104 */ 105 static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, 106 uint32_t vmhub, uint32_t flush_type) 107 { 108 if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) 109 return; 110 111 /* This is necessary for SRIOV as well as for GFXOFF to function 112 * properly under bare metal 113 */ 114 if (((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && 115 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)))) { 116 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 117 const unsigned eng = 17; 118 u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); 119 u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; 120 u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 121 122 amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 123 1 << vmid, GET_INST(GC, 0)); 124 return; 125 } 126 127 mutex_lock(&adev->mman.gtt_window_lock); 128 gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0); 129 mutex_unlock(&adev->mman.gtt_window_lock); 130 return; 131 } 132 133 /** 134 * gmc_v12_1_flush_gpu_tlb_pasid - tlb flush via pasid 135 * 136 * @adev: amdgpu_device pointer 137 * @pasid: pasid to be flush 138 * @flush_type: the flush type 139 * @all_hub: flush all hubs 140 * @inst: is used to select which instance of KIQ to use for the invalidation 141 * 142 * Flush the TLB for the requested pasid. 143 */ 144 static void gmc_v12_1_flush_gpu_tlb_pasid(struct amdgpu_device *adev, 145 uint16_t pasid, uint32_t flush_type, 146 bool all_hub, uint32_t inst) 147 { 148 uint16_t queried; 149 int vmid, i; 150 151 for (vmid = 1; vmid < 16; vmid++) { 152 bool valid; 153 154 valid = gmc_v12_1_get_vmid_pasid_mapping_info(adev, vmid, 155 &queried); 156 if (!valid || queried != pasid) 157 continue; 158 159 if (all_hub) { 160 for_each_set_bit(i, adev->vmhubs_mask, 161 AMDGPU_MAX_VMHUBS) 162 gmc_v12_1_flush_gpu_tlb(adev, vmid, i, 163 flush_type); 164 } else { 165 gmc_v12_1_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 166 flush_type); 167 } 168 } 169 } 170 171 static uint64_t gmc_v12_1_emit_flush_gpu_tlb(struct amdgpu_ring *ring, 172 unsigned vmid, uint64_t pd_addr) 173 { 174 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 175 uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); 176 unsigned eng = ring->vm_inv_eng; 177 178 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + 179 (hub->ctx_addr_distance * vmid), 180 lower_32_bits(pd_addr)); 181 182 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + 183 (hub->ctx_addr_distance * vmid), 184 upper_32_bits(pd_addr)); 185 186 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + 187 hub->eng_distance * eng, 188 hub->vm_inv_eng0_ack + 189 hub->eng_distance * eng, 190 req, 1 << vmid); 191 192 return pd_addr; 193 } 194 195 static void gmc_v12_1_emit_pasid_mapping(struct amdgpu_ring *ring, 196 unsigned vmid, unsigned pasid) 197 { 198 struct amdgpu_device *adev = ring->adev; 199 uint32_t reg; 200 201 if (ring->vm_hub == AMDGPU_GFXHUB(0)) 202 reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; 203 else 204 reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; 205 206 amdgpu_ring_emit_wreg(ring, reg, pasid); 207 } 208 209 /* 210 * PTE format: 211 * 63 P 212 * 62:59 reserved 213 * 58 D 214 * 57 G 215 * 56 T 216 * 55:54 M 217 * 53:52 SW 218 * 51:48 reserved for future 219 * 47:12 4k physical page base address 220 * 11:7 fragment 221 * 6 write 222 * 5 read 223 * 4 exe 224 * 3 Z 225 * 2 snooped 226 * 1 system 227 * 0 valid 228 * 229 * PDE format: 230 * 63 P 231 * 62:58 block fragment size 232 * 57 reserved 233 * 56 A 234 * 55:54 M 235 * 53:52 reserved 236 * 51:48 reserved for future 237 * 47:6 physical base address of PD or PTE 238 * 5:3 reserved 239 * 2 C 240 * 1 system 241 * 0 valid 242 */ 243 244 static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level, 245 uint64_t *addr, uint64_t *flags) 246 { 247 if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM)) 248 *addr = adev->vm_manager.vram_base_offset + *addr - 249 adev->gmc.vram_start; 250 BUG_ON(*addr & 0xFFFF00000000003FULL); 251 252 *flags |= AMDGPU_PTE_SNOOPED; 253 254 if (!adev->gmc.translate_further) 255 return; 256 257 if (level == AMDGPU_VM_PDB1) { 258 /* Set the block fragment size */ 259 if (!(*flags & AMDGPU_PDE_PTE_GFX12)) 260 *flags |= AMDGPU_PDE_BFS_GFX12(0x9); 261 262 } else if (level == AMDGPU_VM_PDB0) { 263 if (*flags & AMDGPU_PDE_PTE_GFX12) 264 *flags &= ~AMDGPU_PDE_PTE_GFX12; 265 } 266 } 267 268 static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev, 269 struct amdgpu_vm *vm, 270 struct amdgpu_bo *bo, 271 uint32_t vm_flags, 272 uint64_t *flags) 273 { 274 if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) 275 *flags |= AMDGPU_PTE_EXECUTABLE; 276 else 277 *flags &= ~AMDGPU_PTE_EXECUTABLE; 278 279 switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { 280 case AMDGPU_VM_MTYPE_DEFAULT: 281 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); 282 break; 283 case AMDGPU_VM_MTYPE_NC: 284 default: 285 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); 286 break; 287 case AMDGPU_VM_MTYPE_UC: 288 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); 289 break; 290 } 291 292 if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) 293 *flags |= AMDGPU_PTE_NOALLOC; 294 else 295 *flags &= ~AMDGPU_PTE_NOALLOC; 296 297 if (vm_flags & AMDGPU_VM_PAGE_PRT) { 298 *flags |= AMDGPU_PTE_SNOOPED; 299 *flags |= AMDGPU_PTE_SYSTEM; 300 *flags |= AMDGPU_PTE_IS_PTE; 301 *flags &= ~AMDGPU_PTE_VALID; 302 } 303 304 if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 305 AMDGPU_GEM_CREATE_EXT_COHERENT | 306 AMDGPU_GEM_CREATE_UNCACHED)) 307 *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); 308 309 if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) 310 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); 311 312 if (adev->have_atomics_support) 313 *flags |= AMDGPU_PTE_BUS_ATOMICS; 314 } 315 316 static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = { 317 .flush_gpu_tlb = gmc_v12_1_flush_gpu_tlb, 318 .flush_gpu_tlb_pasid = gmc_v12_1_flush_gpu_tlb_pasid, 319 .emit_flush_gpu_tlb = gmc_v12_1_emit_flush_gpu_tlb, 320 .emit_pasid_mapping = gmc_v12_1_emit_pasid_mapping, 321 .get_vm_pde = gmc_v12_1_get_vm_pde, 322 .get_vm_pte = gmc_v12_1_get_vm_pte, 323 }; 324 325 void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev) 326 { 327 adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs; 328 } 329