xref: /linux/drivers/gpu/drm/xe/xe_tile_sriov_vf.c (revision 8d2b0853add1d7534dc0794e3c8e0b9e8c4ec640)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "regs/xe_gtt_defs.h"
9 
10 #include "xe_assert.h"
11 #include "xe_ggtt.h"
12 #include "xe_gt_sriov_vf.h"
13 #include "xe_sriov.h"
14 #include "xe_sriov_printk.h"
15 #include "xe_tile_sriov_vf.h"
16 #include "xe_wopcm.h"
17 
18 static int vf_init_ggtt_balloons(struct xe_tile *tile)
19 {
20 	struct xe_ggtt *ggtt = tile->mem.ggtt;
21 
22 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
23 
24 	tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt);
25 	if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
26 		return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
27 
28 	tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt);
29 	if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
30 		xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
31 		return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
32 	}
33 
34 	return 0;
35 }
36 
37 /**
38  * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range.
39  * @tile: the &xe_tile struct instance
40  *
41  * Return: 0 on success or a negative error code on failure.
42  */
43 int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile)
44 {
45 	u64 ggtt_base = xe_gt_sriov_vf_ggtt_base(tile->primary_gt);
46 	u64 ggtt_size = xe_gt_sriov_vf_ggtt(tile->primary_gt);
47 	struct xe_device *xe = tile_to_xe(tile);
48 	u64 wopcm = xe_wopcm_size(xe);
49 	u64 start, end;
50 	int err;
51 
52 	xe_tile_assert(tile, IS_SRIOV_VF(xe));
53 	xe_tile_assert(tile, ggtt_size);
54 	lockdep_assert_held(&tile->mem.ggtt->lock);
55 
56 	/*
57 	 * VF can only use part of the GGTT as allocated by the PF:
58 	 *
59 	 *      WOPCM                                  GUC_GGTT_TOP
60 	 *      |<------------ Total GGTT size ------------------>|
61 	 *
62 	 *           VF GGTT base -->|<- size ->|
63 	 *
64 	 *      +--------------------+----------+-----------------+
65 	 *      |////////////////////|   block  |\\\\\\\\\\\\\\\\\|
66 	 *      +--------------------+----------+-----------------+
67 	 *
68 	 *      |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
69 	 */
70 
71 	if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP ||
72 	    ggtt_size > GUC_GGTT_TOP - ggtt_base) {
73 		xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n",
74 			     tile->id, ggtt_base, ggtt_base + ggtt_size - 1);
75 		return -ERANGE;
76 	}
77 
78 	start = wopcm;
79 	end = ggtt_base;
80 	if (end != start) {
81 		err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0],
82 							 start, end);
83 		if (err)
84 			return err;
85 	}
86 
87 	start = ggtt_base + ggtt_size;
88 	end = GUC_GGTT_TOP;
89 	if (end != start) {
90 		err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1],
91 							 start, end);
92 		if (err) {
93 			xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
94 			return err;
95 		}
96 	}
97 
98 	return 0;
99 }
100 
101 static int vf_balloon_ggtt(struct xe_tile *tile)
102 {
103 	struct xe_ggtt *ggtt = tile->mem.ggtt;
104 	int err;
105 
106 	mutex_lock(&ggtt->lock);
107 	err = xe_tile_sriov_vf_balloon_ggtt_locked(tile);
108 	mutex_unlock(&ggtt->lock);
109 
110 	return err;
111 }
112 
113 /**
114  * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes.
115  * @tile: the &xe_tile struct instance
116  */
117 void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile)
118 {
119 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
120 
121 	xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]);
122 	xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
123 }
124 
125 static void vf_deballoon_ggtt(struct xe_tile *tile)
126 {
127 	mutex_lock(&tile->mem.ggtt->lock);
128 	xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
129 	mutex_unlock(&tile->mem.ggtt->lock);
130 }
131 
132 static void vf_fini_ggtt_balloons(struct xe_tile *tile)
133 {
134 	xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
135 
136 	xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]);
137 	xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
138 }
139 
140 static void cleanup_ggtt(struct drm_device *drm, void *arg)
141 {
142 	struct xe_tile *tile = arg;
143 
144 	vf_deballoon_ggtt(tile);
145 	vf_fini_ggtt_balloons(tile);
146 }
147 
148 /**
149  * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
150  * @tile: the &xe_tile
151  *
152  * This function is for VF use only.
153  *
154  * Return: 0 on success or a negative error code on failure.
155  */
156 int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile)
157 {
158 	struct xe_device *xe = tile_to_xe(tile);
159 	int err;
160 
161 	err = vf_init_ggtt_balloons(tile);
162 	if (err)
163 		return err;
164 
165 	err = vf_balloon_ggtt(tile);
166 	if (err) {
167 		vf_fini_ggtt_balloons(tile);
168 		return err;
169 	}
170 
171 	return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile);
172 }
173 
174 /**
175  * DOC: GGTT nodes shifting during VF post-migration recovery
176  *
177  * The first fixup applied to the VF KMD structures as part of post-migration
178  * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved
179  * from range previously assigned to this VF, into newly provisioned area.
180  * The changes include balloons, which are resized accordingly.
181  *
182  * The balloon nodes are there to eliminate unavailable ranges from use: one
183  * reserves the GGTT area below the range for current VF, and another one
184  * reserves area above.
185  *
186  * Below is a GGTT layout of example VF, with a certain address range assigned to
187  * said VF, and inaccessible areas above and below:
188  *
189  *  0                                                                        4GiB
190  *  |<--------------------------- Total GGTT size ----------------------------->|
191  *      WOPCM                                                         GUC_TOP
192  *      |<-------------- Area mappable by xe_ggtt instance ---------------->|
193  *
194  *  +---+---------------------------------+----------+----------------------+---+
195  *  |\\\|/////////////////////////////////|  VF mem  |//////////////////////|\\\|
196  *  +---+---------------------------------+----------+----------------------+---+
197  *
198  * Hardware enforced access rules before migration:
199  *
200  *  |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->|
201  *
202  * GGTT nodes used for tracking allocations:
203  *
204  *      |<---------- balloon ------------>|<- nodes->|<----- balloon ------>|
205  *
206  * After the migration, GGTT area assigned to the VF might have shifted, either
207  * to lower or to higher address. But we expect the total size and extra areas to
208  * be identical, as migration can only happen between matching platforms.
209  * Below is an example of GGTT layout of the VF after migration. Content of the
210  * GGTT for VF has been moved to a new area, and we receive its address from GuC:
211  *
212  *  +---+----------------------+----------+---------------------------------+---+
213  *  |\\\|//////////////////////|  VF mem  |/////////////////////////////////|\\\|
214  *  +---+----------------------+----------+---------------------------------+---+
215  *
216  * Hardware enforced access rules after migration:
217  *
218  *  |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->|
219  *
220  * So the VF has a new slice of GGTT assigned, and during migration process, the
221  * memory content was copied to that new area. But the &xe_ggtt nodes are still
222  * tracking allocations using the old addresses. The nodes within VF owned area
223  * have to be shifted, and balloon nodes need to be resized to properly mask out
224  * areas not owned by the VF.
225  *
226  * Fixed &xe_ggtt nodes used for tracking allocations:
227  *
228  *     |<------ balloon ------>|<- nodes->|<----------- balloon ----------->|
229  *
230  * Due to use of GPU profiles, we do not expect the old and new GGTT ares to
231  * overlap; but our node shifting will fix addresses properly regardless.
232  */
233 
234 /**
235  * xe_tile_sriov_vf_fixup_ggtt_nodes - Shift GGTT allocations to match assigned range.
236  * @tile: the &xe_tile struct instance
237  * @shift: the shift value
238  *
239  * Since Global GTT is not virtualized, each VF has an assigned range
240  * within the global space. This range might have changed during migration,
241  * which requires all memory addresses pointing to GGTT to be shifted.
242  */
243 void xe_tile_sriov_vf_fixup_ggtt_nodes(struct xe_tile *tile, s64 shift)
244 {
245 	struct xe_ggtt *ggtt = tile->mem.ggtt;
246 
247 	mutex_lock(&ggtt->lock);
248 
249 	xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
250 	xe_ggtt_shift_nodes_locked(ggtt, shift);
251 	xe_tile_sriov_vf_balloon_ggtt_locked(tile);
252 
253 	mutex_unlock(&ggtt->lock);
254 }
255