xref: /linux/drivers/gpu/drm/amd/amdgpu/gmc_v12_1.c (revision 00e08fb2e7ce88e2ae366cbc79997d71d014b0ac)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include "amdgpu.h"
24 #include "gmc_v12_1.h"
25 #include "soc15_common.h"
26 #include "soc_v1_0_enum.h"
27 #include "oss/osssys_7_1_0_offset.h"
28 #include "oss/osssys_7_1_0_sh_mask.h"
29 
30 static bool gmc_v12_1_get_vmid_pasid_mapping_info(struct amdgpu_device *adev,
31 						  uint8_t vmid, uint16_t *p_pasid)
32 {
33 	*p_pasid = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid) & 0xffff;
34 
35 	return !!(*p_pasid);
36 }
37 
38 /*
39  * GART
40  * VMID 0 is the physical GPU addresses as used by the kernel.
41  * VMIDs 1-15 are used for userspace clients and are handled
42  * by the amdgpu vm/hsa code.
43  */
44 
45 static void gmc_v12_1_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
46 				   unsigned int vmhub, uint32_t flush_type)
47 {
48 	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
49 	u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
50 	u32 tmp;
51 	/* Use register 17 for GART */
52 	const unsigned eng = 17;
53 	unsigned int i;
54 	unsigned char hub_ip = 0;
55 
56 	hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
57 		   GC_HWIP : MMHUB_HWIP;
58 
59 	spin_lock(&adev->gmc.invalidate_lock);
60 
61 	WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip);
62 
63 	/* Wait for ACK with a delay.*/
64 	for (i = 0; i < adev->usec_timeout; i++) {
65 		tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
66 				    hub->eng_distance * eng, hub_ip);
67 		tmp &= 1 << vmid;
68 		if (tmp)
69 			break;
70 
71 		udelay(1);
72 	}
73 
74 	/* Issue additional private vm invalidation to MMHUB */
75 	if ((vmhub != AMDGPU_GFXHUB(0)) &&
76 	    (hub->vm_l2_bank_select_reserved_cid2) &&
77 		!amdgpu_sriov_vf(adev)) {
78 		inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
79 		/* bit 25: RSERVED_CACHE_PRIVATE_INVALIDATION */
80 		inv_req |= (1 << 25);
81 		/* Issue private invalidation */
82 		WREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2, inv_req);
83 		/* Read back to ensure invalidation is done*/
84 		RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2);
85 	}
86 
87 	spin_unlock(&adev->gmc.invalidate_lock);
88 
89 	if (i < adev->usec_timeout)
90 		return;
91 
92 	dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
93 }
94 
95 /**
96  * gmc_v12_1_flush_gpu_tlb - gart tlb flush callback
97  *
98  * @adev: amdgpu_device pointer
99  * @vmid: vm instance to flush
100  * @vmhub: which hub to flush
101  * @flush_type: the flush type
102  *
103  * Flush the TLB for the requested page table.
104  */
105 static void gmc_v12_1_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
106 				    uint32_t vmhub, uint32_t flush_type)
107 {
108 	if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron)
109 		return;
110 
111 	/* This is necessary for SRIOV as well as for GFXOFF to function
112 	 * properly under bare metal
113 	 */
114 	if (((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) &&
115 	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)))) {
116 		struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
117 		const unsigned eng = 17;
118 		u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
119 		u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
120 		u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
121 
122 		amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
123 				1 << vmid, GET_INST(GC, 0));
124 		return;
125 	}
126 
127 	mutex_lock(&adev->mman.gtt_window_lock);
128 	gmc_v12_1_flush_vm_hub(adev, vmid, vmhub, 0);
129 	mutex_unlock(&adev->mman.gtt_window_lock);
130 	return;
131 }
132 
133 /**
134  * gmc_v12_1_flush_gpu_tlb_pasid - tlb flush via pasid
135  *
136  * @adev: amdgpu_device pointer
137  * @pasid: pasid to be flush
138  * @flush_type: the flush type
139  * @all_hub: flush all hubs
140  * @inst: is used to select which instance of KIQ to use for the invalidation
141  *
142  * Flush the TLB for the requested pasid.
143  */
144 static void gmc_v12_1_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
145 					  uint16_t pasid, uint32_t flush_type,
146 					  bool all_hub, uint32_t inst)
147 {
148 	uint16_t queried;
149 	int vmid, i;
150 
151 	for (vmid = 1; vmid < 16; vmid++) {
152 		bool valid;
153 
154 		valid = gmc_v12_1_get_vmid_pasid_mapping_info(adev, vmid,
155 							      &queried);
156 		if (!valid || queried != pasid)
157 			continue;
158 
159 		if (all_hub) {
160 			for_each_set_bit(i, adev->vmhubs_mask,
161 					 AMDGPU_MAX_VMHUBS)
162 				gmc_v12_1_flush_gpu_tlb(adev, vmid, i,
163 							flush_type);
164 		} else {
165 			gmc_v12_1_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0),
166 						flush_type);
167 		}
168 	}
169 }
170 
171 static uint64_t gmc_v12_1_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
172 					     unsigned vmid, uint64_t pd_addr)
173 {
174 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub];
175 	uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
176 	unsigned eng = ring->vm_inv_eng;
177 
178 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
179 			      (hub->ctx_addr_distance * vmid),
180 			      lower_32_bits(pd_addr));
181 
182 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
183 			      (hub->ctx_addr_distance * vmid),
184 			      upper_32_bits(pd_addr));
185 
186 	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
187 					    hub->eng_distance * eng,
188 					    hub->vm_inv_eng0_ack +
189 					    hub->eng_distance * eng,
190 					    req, 1 << vmid);
191 
192 	return pd_addr;
193 }
194 
195 static void gmc_v12_1_emit_pasid_mapping(struct amdgpu_ring *ring,
196 					 unsigned vmid, unsigned pasid)
197 {
198 	struct amdgpu_device *adev = ring->adev;
199 	uint32_t reg;
200 
201 	if (ring->vm_hub == AMDGPU_GFXHUB(0))
202 		reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid;
203 	else
204 		reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid;
205 
206 	amdgpu_ring_emit_wreg(ring, reg, pasid);
207 }
208 
209 /*
210  * PTE format:
211  * 63 P
212  * 62:59 reserved
213  * 58 D
214  * 57 G
215  * 56 T
216  * 55:54 M
217  * 53:52 SW
218  * 51:48 reserved for future
219  * 47:12 4k physical page base address
220  * 11:7 fragment
221  * 6 write
222  * 5 read
223  * 4 exe
224  * 3 Z
225  * 2 snooped
226  * 1 system
227  * 0 valid
228  *
229  * PDE format:
230  * 63 P
231  * 62:58 block fragment size
232  * 57 reserved
233  * 56 A
234  * 55:54 M
235  * 53:52 reserved
236  * 51:48 reserved for future
237  * 47:6 physical base address of PD or PTE
238  * 5:3 reserved
239  * 2 C
240  * 1 system
241  * 0 valid
242  */
243 
244 static void gmc_v12_1_get_vm_pde(struct amdgpu_device *adev, int level,
245 				 uint64_t *addr, uint64_t *flags)
246 {
247 	if (!(*flags & AMDGPU_PDE_PTE_GFX12) && !(*flags & AMDGPU_PTE_SYSTEM))
248 		*addr = adev->vm_manager.vram_base_offset + *addr -
249 			adev->gmc.vram_start;
250 	BUG_ON(*addr & 0xFFFF00000000003FULL);
251 
252 	*flags |= AMDGPU_PTE_SNOOPED;
253 
254 	if (!adev->gmc.translate_further)
255 		return;
256 
257 	if (level == AMDGPU_VM_PDB1) {
258 		/* Set the block fragment size */
259 		if (!(*flags & AMDGPU_PDE_PTE_GFX12))
260 			*flags |= AMDGPU_PDE_BFS_GFX12(0x9);
261 
262 	} else if (level == AMDGPU_VM_PDB0) {
263 		if (*flags & AMDGPU_PDE_PTE_GFX12)
264 			*flags &= ~AMDGPU_PDE_PTE_GFX12;
265 	}
266 }
267 
268 #if 0
269 static void gmc_v12_1_get_coherence_flags(struct amdgpu_device *adev,
270 					  struct amdgpu_bo *bo,
271 					  uint64_t *flags)
272 {
273 	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
274 	bool is_vram = bo->tbo.resource &&
275 		       bo->tbo.resource->mem_type == TTM_PL_VRAM;
276 	bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
277 				     AMDGPU_GEM_CREATE_EXT_COHERENT);
278 	bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT;
279 	uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0);
280 	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
281 	unsigned int mtype, mtype_local;
282 	bool snoop = false;
283 	bool is_local;
284 
285 	switch (gc_ip_version) {
286 	case IP_VERSION(12, 1, 0):
287 		mtype_local = MTYPE_RW;
288 		if (amdgpu_mtype_local == 1) {
289 			DRM_INFO_ONCE("Using MTYPE_NC for local memory\n");
290 			mtype_local = MTYPE_NC;
291 		} else if (amdgpu_mtype_local == 2) {
292 			DRM_INFO_ONCE("MTYPE_CC not supported, using MTYPE_RW instead for local memory\n");
293 		} else {
294 			DRM_INFO_ONCE("Using MTYPE_RW for local memory\n");
295 		}
296 
297 		is_local = (is_vram && adev == bo_adev);
298 		snoop = true;
299 		if (uncached) {
300 			mtype = MTYPE_UC;
301 		} else if (ext_coherent) {
302 			mtype = is_local ? mtype_local : MTYPE_UC;
303 		} else {
304 			if (is_local)
305 				mtype = mtype_local;
306 			else
307 				mtype = MTYPE_NC;
308 		}
309 		break;
310 	default:
311 		if (uncached || coherent)
312 			mtype = MTYPE_UC;
313 		else
314 			mtype = MTYPE_NC;
315 	}
316 
317 	if (mtype != MTYPE_NC)
318 		*flags = AMDGPU_PTE_MTYPE_GFX12(*flags, mtype);
319 
320 	*flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
321 }
322 #endif
323 
324 static void gmc_v12_1_get_vm_pte(struct amdgpu_device *adev,
325 				 struct amdgpu_vm *vm,
326 				 struct amdgpu_bo *bo,
327 				 uint32_t vm_flags,
328 				 uint64_t *flags)
329 {
330 	if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
331 		*flags |= AMDGPU_PTE_EXECUTABLE;
332 	else
333 		*flags &= ~AMDGPU_PTE_EXECUTABLE;
334 
335 	switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
336 	case AMDGPU_VM_MTYPE_DEFAULT:
337 		*flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
338 		break;
339 	case AMDGPU_VM_MTYPE_NC:
340 	default:
341 		*flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
342 		break;
343 	case AMDGPU_VM_MTYPE_UC:
344 		*flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
345 		break;
346 	}
347 
348 	if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
349 		*flags |= AMDGPU_PTE_NOALLOC;
350 	else
351 		*flags &= ~AMDGPU_PTE_NOALLOC;
352 
353 	if (vm_flags & AMDGPU_VM_PAGE_PRT) {
354 		*flags |= AMDGPU_PTE_SNOOPED;
355 		*flags |= AMDGPU_PTE_SYSTEM;
356 		*flags |= AMDGPU_PTE_IS_PTE;
357 		*flags &= ~AMDGPU_PTE_VALID;
358 	}
359 
360 	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
361 			       AMDGPU_GEM_CREATE_EXT_COHERENT |
362 			       AMDGPU_GEM_CREATE_UNCACHED))
363 		*flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
364 
365 	if (adev->have_atomics_support)
366 		*flags |= AMDGPU_PTE_BUS_ATOMICS;
367 
368 	if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
369 		*flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
370 }
371 
372 static const struct amdgpu_gmc_funcs gmc_v12_1_gmc_funcs = {
373 	.flush_gpu_tlb = gmc_v12_1_flush_gpu_tlb,
374 	.flush_gpu_tlb_pasid = gmc_v12_1_flush_gpu_tlb_pasid,
375 	.emit_flush_gpu_tlb = gmc_v12_1_emit_flush_gpu_tlb,
376 	.emit_pasid_mapping = gmc_v12_1_emit_pasid_mapping,
377 	.get_vm_pde = gmc_v12_1_get_vm_pde,
378 	.get_vm_pte = gmc_v12_1_get_vm_pte,
379 };
380 
381 void gmc_v12_1_set_gmc_funcs(struct amdgpu_device *adev)
382 {
383 	adev->gmc.gmc_funcs = &gmc_v12_1_gmc_funcs;
384 }
385