1 /* 2 * Copyright 2025 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include "amdgpu.h" 24 #include "gmc_v12_1.h" 25 #include "soc15_common.h" 26 #include "soc_v1_0_enum.h" 27 #include "oss/osssys_7_1_0_offset.h" 28 #include "oss/osssys_7_1_0_sh_mask.h" 29 30 static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev, 31 uint8_t vmid, uint16_t *p_pasid) 32 { 33 *p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff; 34 35 return !!(*p_pasid); 36 } 37 38 /* 39 * GART 40 * VMID 0 is the physical GPU addresses as used by the kernel. 41 * VMIDs 1-15 are used for userspace clients and are handled 42 * by the amdgpu vm/hsa code. 43 */ 44 45 static void gmc_v12_1_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, 46 unsigned int vmhub, uint32_t flush_type) 47 { 48 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 49 u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); 50 u32 tmp; 51 /* Use register 17 for GART */ 52 const unsigned eng = 17; 53 unsigned int i; 54 unsigned char hub_ip = 0; 55 56 hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? 57 GC_HWIP : MMHUB_HWIP; 58 59 spin_lock(&adev->gmc.invalidate_lock); 60 61 WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); 62 63 /* Wait for ACK with a delay.*/ 64 for (i = 0; i < adev->usec_timeout; i++) { 65 tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + 66 hub->eng_distance * eng, hub_ip); 67 tmp &= 1 << vmid; 68 if (tmp) 69 break; 70 71 udelay(1); 72 } 73 74 /* Issue additional private vm invalidation to MMHUB */ 75 if ((vmhub != AMDGPU_GFXHUB(0)) && 76 (hub->vm_l2_bank_select_reserved_cid2) && 77 !amdgpu_sriov_vf(adev)) { 78 inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); 79 /* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */ 80 inv_req |= (1 << 25); 81 /* Issue private invalidation */ 82 WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req); 83 /* Read back to ensure invalidation is done*/ 84 RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); 85 } 86 87 spin_unlock(&adev->gmc.invalidate_lock); 88 89 if (i < adev->usec_timeout) 90 return; 91 92 dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); 93 } 94 95 /** 96 * gmc_v12_1_flush_gpu_tlb - gart tlb flush callback 97 * 98 * @adev: amdgpu_device pointer 99 * @vmid: vm instance to flush 100 * @vmhub: which hub to flush 101 * @flush_type: the flush type 102 * 103 * Flush the TLB for the requested page table. 104 */ 105 static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, 106 uint32_t vmhub, uint32_t flush_type) 107 { 108 if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) 109 return; 110 111 /* This is necessary for SRIOV as well as for GFXOFF to function 112 * properly under bare metal 113 */ 114 if (((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && 115 (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)))) { 116 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; 117 const unsigned eng = 17; 118 u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); 119 u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; 120 u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; 121 122 amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 123 1 << vmid, GET_INST(GC, 0)); 124 return; 125 } 126 127 mutex_lock(&adev->mman.gtt_window_lock); 128 gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0); 129 mutex_unlock(&adev->mman.gtt_window_lock); 130 return; 131 } 132 133 /** 134 * gmc_v12_1_flush_gpu_tlb_pasid - tlb flush via pasid 135 * 136 * @adev: amdgpu_device pointer 137 * @pasid: pasid to be flush 138 * @flush_type: the flush type 139 * @all_hub: flush all hubs 140 * @inst: is used to select which instance of KIQ to use for the invalidation 141 * 142 * Flush the TLB for the requested pasid. 143 */ 144 static void gmc_v12_1_flush_gpu_tlb_pasid(struct amdgpu_device *adev, 145 uint16_t pasid, uint32_t flush_type, 146 bool all_hub, uint32_t inst) 147 { 148 uint16_t queried; 149 int vmid, i; 150 151 for (vmid = 1; vmid < 16; vmid++) { 152 bool valid; 153 154 valid = gmc_v12_1_get_vmid_pasid_mapping_info(adev, vmid, 155 &queried); 156 if (!valid || queried != pasid) 157 continue; 158 159 if (all_hub) { 160 for_each_set_bit(i, adev->vmhubs_mask, 161 AMDGPU_MAX_VMHUBS) 162 gmc_v12_1_flush_gpu_tlb(adev, vmid, i, 163 flush_type); 164 } else { 165 gmc_v12_1_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 166 flush_type); 167 } 168 } 169 } 170 171 static uint64_t gmc_v12_1_emit_flush_gpu_tlb(struct amdgpu_ring *ring, 172 unsigned vmid, uint64_t pd_addr) 173 { 174 struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; 175 uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); 176 unsigned eng = ring->vm_inv_eng; 177 178 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + 179 (hub->ctx_addr_distance * vmid), 180 lower_32_bits(pd_addr)); 181 182 amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + 183 (hub->ctx_addr_distance * vmid), 184 upper_32_bits(pd_addr)); 185 186 amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + 187 hub->eng_distance * eng, 188 hub->vm_inv_eng0_ack + 189 hub->eng_distance * eng, 190 req, 1 << vmid); 191 192 return pd_addr; 193 } 194 195 static void gmc_v12_1_emit_pasid_mapping(struct amdgpu_ring *ring, 196 unsigned vmid, unsigned pasid) 197 { 198 struct amdgpu_device *adev = ring->adev; 199 uint32_t reg; 200 201 if (ring->vm_hub == AMDGPU_GFXHUB(0)) 202 reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; 203 else 204 reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; 205 206 amdgpu_ring_emit_wreg(ring, reg, pasid); 207 } 208 209 /* 210 * PTE format: 211 * 63 P 212 * 62:59 reserved 213 * 58 D 214 * 57 G 215 * 56 T 216 * 55:54 M 217 * 53:52 SW 218 * 51:48 reserved for future 219 * 47:12 4k physical page base address 220 * 11:7 fragment 221 * 6 write 222 * 5 read 223 * 4 exe 224 * 3 Z 225 * 2 snooped 226 * 1 system 227 * 0 valid 228 * 229 * PDE format: 230 * 63 P 231 * 62:58 block fragment size 232 * 57 reserved 233 * 56 A 234 * 55:54 M 235 * 53:52 reserved 236 * 51:48 reserved for future 237 * 47:6 physical base address of PD or PTE 238 * 5:3 reserved 239 * 2 C 240 * 1 system 241 * 0 valid 242 */ 243 244 static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level, 245 uint64_t *addr, uint64_t *flags) 246 { 247 if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM)) 248 *addr = adev->vm_manager.vram_base_offset + *addr - 249 adev->gmc.vram_start; 250 BUG_ON(*addr & 0xFFFF00000000003FULL); 251 252 *flags |= AMDGPU_PTE_SNOOPED; 253 254 if (!adev->gmc.translate_further) 255 return; 256 257 if (level == AMDGPU_VM_PDB1) { 258 /* Set the block fragment size */ 259 if (!(*flags & AMDGPU_PDE_PTE_GFX12)) 260 *flags |= AMDGPU_PDE_BFS_GFX12(0x9); 261 262 } else if (level == AMDGPU_VM_PDB0) { 263 if (*flags & AMDGPU_PDE_PTE_GFX12) 264 *flags &= ~AMDGPU_PDE_PTE_GFX12; 265 } 266 } 267 268 #if 0 269 static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev, 270 struct amdgpu_bo *bo, 271 uint64_t *flags) 272 { 273 struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); 274 bool is_vram = bo->tbo.resource && 275 bo->tbo.resource->mem_type == TTM_PL_VRAM; 276 bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 277 AMDGPU_GEM_CREATE_EXT_COHERENT); 278 bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; 279 uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); 280 bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; 281 unsigned int mtype, mtype_local; 282 bool snoop = false; 283 bool is_local; 284 285 switch (gc_ip_version) { 286 case IP_VERSION(12, 1, 0): 287 mtype_local = MTYPE_RW; 288 if (amdgpu_mtype_local == 1) { 289 DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); 290 mtype_local = MTYPE_NC; 291 } else if (amdgpu_mtype_local == 2) { 292 DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n"); 293 } else { 294 DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); 295 } 296 297 is_local = (is_vram && adev == bo_adev); 298 snoop = true; 299 if (uncached) { 300 mtype = MTYPE_UC; 301 } else if (ext_coherent) { 302 mtype = is_local ? mtype_local : MTYPE_UC; 303 } else { 304 if (is_local) 305 mtype = mtype_local; 306 else 307 mtype = MTYPE_NC; 308 } 309 break; 310 default: 311 if (uncached || coherent) 312 mtype = MTYPE_UC; 313 else 314 mtype = MTYPE_NC; 315 } 316 317 if (mtype != MTYPE_NC) 318 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype); 319 320 *flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; 321 } 322 #endif 323 324 static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev, 325 struct amdgpu_vm *vm, 326 struct amdgpu_bo *bo, 327 uint32_t vm_flags, 328 uint64_t *flags) 329 { 330 if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE) 331 *flags |= AMDGPU_PTE_EXECUTABLE; 332 else 333 *flags &= ~AMDGPU_PTE_EXECUTABLE; 334 335 switch (vm_flags & AMDGPU_VM_MTYPE_MASK) { 336 case AMDGPU_VM_MTYPE_DEFAULT: 337 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); 338 break; 339 case AMDGPU_VM_MTYPE_NC: 340 default: 341 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); 342 break; 343 case AMDGPU_VM_MTYPE_UC: 344 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); 345 break; 346 } 347 348 if (vm_flags & AMDGPU_VM_PAGE_NOALLOC) 349 *flags |= AMDGPU_PTE_NOALLOC; 350 else 351 *flags &= ~AMDGPU_PTE_NOALLOC; 352 353 if (vm_flags & AMDGPU_VM_PAGE_PRT) { 354 *flags |= AMDGPU_PTE_SNOOPED; 355 *flags |= AMDGPU_PTE_SYSTEM; 356 *flags |= AMDGPU_PTE_IS_PTE; 357 *flags &= ~AMDGPU_PTE_VALID; 358 } 359 360 if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | 361 AMDGPU_GEM_CREATE_EXT_COHERENT | 362 AMDGPU_GEM_CREATE_UNCACHED)) 363 *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC); 364 365 if (adev->have_atomics_support) 366 *flags |= AMDGPU_PTE_BUS_ATOMICS; 367 368 if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) 369 *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); 370 } 371 372 static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = { 373 .flush_gpu_tlb = gmc_v12_1_flush_gpu_tlb, 374 .flush_gpu_tlb_pasid = gmc_v12_1_flush_gpu_tlb_pasid, 375 .emit_flush_gpu_tlb = gmc_v12_1_emit_flush_gpu_tlb, 376 .emit_pasid_mapping = gmc_v12_1_emit_pasid_mapping, 377 .get_vm_pde = gmc_v12_1_get_vm_pde, 378 .get_vm_pte = gmc_v12_1_get_vm_pte, 379 }; 380 381 void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev) 382 { 383 adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs; 384 } 385