xref: /linux/drivers/gpu/drm/xe/xe_tile_sriov_vf.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "regs/xe_gtt_defs.h"
9 
10 #include "xe_assert.h"
11 #include "xe_ggtt.h"
12 #include "xe_sriov.h"
13 #include "xe_sriov_printk.h"
14 #include "xe_tile_sriov_vf.h"
15 #include "xe_wopcm.h"
16 
17 static int vf_init_ggtt_balloons(struct xe_tile *tile)
18 {
19 	struct xe_ggtt *ggtt = tile->mem.ggtt;
20 
21 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
22 
23 	tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt);
24 	if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
25 		return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
26 
27 	tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt);
28 	if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
29 		xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
30 		return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
31 	}
32 
33 	return 0;
34 }
35 
36 /**
37  * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range.
38  * @tile: the &xe_tile struct instance
39  *
40  * Return: 0 on success or a negative error code on failure.
41  */
42 static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile)
43 {
44 	u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base;
45 	u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size;
46 	struct xe_device *xe = tile_to_xe(tile);
47 	u64 wopcm = xe_wopcm_size(xe);
48 	u64 start, end;
49 	int err;
50 
51 	xe_tile_assert(tile, IS_SRIOV_VF(xe));
52 	xe_tile_assert(tile, ggtt_size);
53 	lockdep_assert_held(&tile->mem.ggtt->lock);
54 
55 	/*
56 	 * VF can only use part of the GGTT as allocated by the PF:
57 	 *
58 	 *      WOPCM                                  GUC_GGTT_TOP
59 	 *      |<------------ Total GGTT size ------------------>|
60 	 *
61 	 *           VF GGTT base -->|<- size ->|
62 	 *
63 	 *      +--------------------+----------+-----------------+
64 	 *      |////////////////////|   block  |\\\\\\\\\\\\\\\\\|
65 	 *      +--------------------+----------+-----------------+
66 	 *
67 	 *      |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
68 	 */
69 
70 	if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP ||
71 	    ggtt_size > GUC_GGTT_TOP - ggtt_base) {
72 		xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n",
73 			     tile->id, ggtt_base, ggtt_base + ggtt_size - 1);
74 		return -ERANGE;
75 	}
76 
77 	start = wopcm;
78 	end = ggtt_base;
79 	if (end != start) {
80 		err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0],
81 							 start, end);
82 		if (err)
83 			return err;
84 	}
85 
86 	start = ggtt_base + ggtt_size;
87 	end = GUC_GGTT_TOP;
88 	if (end != start) {
89 		err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1],
90 							 start, end);
91 		if (err) {
92 			xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
93 			return err;
94 		}
95 	}
96 
97 	return 0;
98 }
99 
100 static int vf_balloon_ggtt(struct xe_tile *tile)
101 {
102 	struct xe_ggtt *ggtt = tile->mem.ggtt;
103 	int err;
104 
105 	mutex_lock(&ggtt->lock);
106 	err = xe_tile_sriov_vf_balloon_ggtt_locked(tile);
107 	mutex_unlock(&ggtt->lock);
108 
109 	return err;
110 }
111 
112 /**
113  * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes.
114  * @tile: the &xe_tile struct instance
115  */
116 void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile)
117 {
118 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
119 
120 	xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]);
121 	xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
122 }
123 
124 static void vf_deballoon_ggtt(struct xe_tile *tile)
125 {
126 	mutex_lock(&tile->mem.ggtt->lock);
127 	xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
128 	mutex_unlock(&tile->mem.ggtt->lock);
129 }
130 
131 static void vf_fini_ggtt_balloons(struct xe_tile *tile)
132 {
133 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
134 
135 	xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]);
136 	xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
137 }
138 
139 static void cleanup_ggtt(struct drm_device *drm, void *arg)
140 {
141 	struct xe_tile *tile = arg;
142 
143 	vf_deballoon_ggtt(tile);
144 	vf_fini_ggtt_balloons(tile);
145 }
146 
147 /**
148  * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
149  * @tile: the &xe_tile
150  *
151  * This function is for VF use only.
152  *
153  * Return: 0 on success or a negative error code on failure.
154  */
155 int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile)
156 {
157 	struct xe_device *xe = tile_to_xe(tile);
158 	int err;
159 
160 	err = vf_init_ggtt_balloons(tile);
161 	if (err)
162 		return err;
163 
164 	err = vf_balloon_ggtt(tile);
165 	if (err) {
166 		vf_fini_ggtt_balloons(tile);
167 		return err;
168 	}
169 
170 	return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile);
171 }
172 
173 /**
174  * DOC: GGTT nodes shifting during VF post-migration recovery
175  *
176  * The first fixup applied to the VF KMD structures as part of post-migration
177  * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved
178  * from range previously assigned to this VF, into newly provisioned area.
179  * The changes include balloons, which are resized accordingly.
180  *
181  * The balloon nodes are there to eliminate unavailable ranges from use: one
182  * reserves the GGTT area below the range for current VF, and another one
183  * reserves area above.
184  *
185  * Below is a GGTT layout of example VF, with a certain address range assigned to
186  * said VF, and inaccessible areas above and below:
187  *
188  *  0                                                                        4GiB
189  *  |<--------------------------- Total GGTT size ----------------------------->|
190  *      WOPCM                                                         GUC_TOP
191  *      |<-------------- Area mappable by xe_ggtt instance ---------------->|
192  *
193  *  +---+---------------------------------+----------+----------------------+---+
194  *  |\\\|/////////////////////////////////|  VF mem  |//////////////////////|\\\|
195  *  +---+---------------------------------+----------+----------------------+---+
196  *
197  * Hardware enforced access rules before migration:
198  *
199  *  |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->|
200  *
201  * GGTT nodes used for tracking allocations:
202  *
203  *      |<---------- balloon ------------>|<- nodes->|<----- balloon ------>|
204  *
205  * After the migration, GGTT area assigned to the VF might have shifted, either
206  * to lower or to higher address. But we expect the total size and extra areas to
207  * be identical, as migration can only happen between matching platforms.
208  * Below is an example of GGTT layout of the VF after migration. Content of the
209  * GGTT for VF has been moved to a new area, and we receive its address from GuC:
210  *
211  *  +---+----------------------+----------+---------------------------------+---+
212  *  |\\\|//////////////////////|  VF mem  |/////////////////////////////////|\\\|
213  *  +---+----------------------+----------+---------------------------------+---+
214  *
215  * Hardware enforced access rules after migration:
216  *
217  *  |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->|
218  *
219  * So the VF has a new slice of GGTT assigned, and during migration process, the
220  * memory content was copied to that new area. But the &xe_ggtt nodes are still
221  * tracking allocations using the old addresses. The nodes within VF owned area
222  * have to be shifted, and balloon nodes need to be resized to properly mask out
223  * areas not owned by the VF.
224  *
225  * Fixed &xe_ggtt nodes used for tracking allocations:
226  *
227  *     |<------ balloon ------>|<- nodes->|<----------- balloon ----------->|
228  *
229  * Due to use of GPU profiles, we do not expect the old and new GGTT ares to
230  * overlap; but our node shifting will fix addresses properly regardless.
231  */
232 
233 /**
234  * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range.
235  * @tile: the &xe_tile struct instance
236  * @shift: the shift value
237  *
238  * Since Global GTT is not virtualized, each VF has an assigned range
239  * within the global space. This range might have changed during migration,
240  * which requires all memory addresses pointing to GGTT to be shifted.
241  */
242 void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift)
243 {
244 	struct xe_ggtt *ggtt = tile->mem.ggtt;
245 
246 	lockdep_assert_held(&ggtt->lock);
247 
248 	xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
249 	xe_ggtt_shift_nodes_locked(ggtt, shift);
250 	xe_tile_sriov_vf_balloon_ggtt_locked(tile);
251 }
252 
253 /**
254  * xe_tile_sriov_vf_lmem - VF LMEM configuration.
255  * @tile: the &xe_tile
256  *
257  * This function is for VF use only.
258  *
259  * Return: size of the LMEM assigned to VF.
260  */
261 u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile)
262 {
263 	struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
264 
265 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
266 
267 	return config->lmem_size;
268 }
269 
270 /**
271  * xe_tile_sriov_vf_lmem_store - Store VF LMEM configuration
272  * @tile: the &xe_tile
273  * @lmem_size: VF LMEM size to store
274  *
275  * This function is for VF use only.
276  */
277 void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size)
278 {
279 	struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
280 
281 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
282 
283 	config->lmem_size = lmem_size;
284 }
285 
286 /**
287  * xe_tile_sriov_vf_ggtt - VF GGTT configuration.
288  * @tile: the &xe_tile
289  *
290  * This function is for VF use only.
291  *
292  * Return: size of the GGTT assigned to VF.
293  */
294 u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile)
295 {
296 	struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
297 
298 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
299 
300 	return config->ggtt_size;
301 }
302 
303 /**
304  * xe_tile_sriov_vf_ggtt_store - Store VF GGTT configuration
305  * @tile: the &xe_tile
306  * @ggtt_size: VF GGTT size to store
307  *
308  * This function is for VF use only.
309  */
310 void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size)
311 {
312 	struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
313 
314 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
315 
316 	config->ggtt_size = ggtt_size;
317 }
318 
319 /**
320  * xe_tile_sriov_vf_ggtt_base - VF GGTT base configuration.
321  * @tile: the &xe_tile
322  *
323  * This function is for VF use only.
324  *
325  * Return: base of the GGTT assigned to VF.
326  */
327 u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile)
328 {
329 	struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
330 
331 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
332 
333 	return config->ggtt_base;
334 }
335 
336 /**
337  * xe_tile_sriov_vf_ggtt_base_store - Store VF GGTT base configuration
338  * @tile: the &xe_tile
339  * @ggtt_base: VF GGTT base to store
340  *
341  * This function is for VF use only.
342  */
343 void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base)
344 {
345 	struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
346 
347 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
348 
349 	config->ggtt_base = ggtt_base;
350 }
351