xref: /linux/drivers/gpu/drm/xe/xe_hw_engine_group.c (revision 1b5d39e6672fdee158c3306f5cb2df8975c77e5a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "xe_assert.h"
9 #include "xe_device.h"
10 #include "xe_exec_queue.h"
11 #include "xe_gt.h"
12 #include "xe_gt_stats.h"
13 #include "xe_hw_engine_group.h"
14 #include "xe_vm.h"
15 
16 static void
17 hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
18 {
19 	struct xe_exec_queue *q;
20 	struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work);
21 	int err;
22 	enum xe_hw_engine_group_execution_mode previous_mode;
23 
24 	err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
25 	if (err)
26 		return;
27 
28 	if (previous_mode == EXEC_MODE_LR)
29 		goto put;
30 
31 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
32 		if (!xe_vm_in_fault_mode(q->vm))
33 			continue;
34 
35 		q->ops->resume(q);
36 	}
37 
38 put:
39 	xe_hw_engine_group_put(group);
40 }
41 
42 static struct xe_hw_engine_group *
43 hw_engine_group_alloc(struct xe_device *xe)
44 {
45 	struct xe_hw_engine_group *group;
46 	int err;
47 
48 	group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL);
49 	if (!group)
50 		return ERR_PTR(-ENOMEM);
51 
52 	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
53 	if (!group->resume_wq)
54 		return ERR_PTR(-ENOMEM);
55 
56 	err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq);
57 	if (err)
58 		return ERR_PTR(err);
59 
60 	init_rwsem(&group->mode_sem);
61 	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
62 	INIT_LIST_HEAD(&group->exec_queue_list);
63 
64 	return group;
65 }
66 
67 /**
68  * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
69  * @gt: The gt for which groups are setup
70  *
71  * Return: 0 on success, negative error code on error.
72  */
73 int xe_hw_engine_setup_groups(struct xe_gt *gt)
74 {
75 	struct xe_hw_engine *hwe;
76 	enum xe_hw_engine_id id;
77 	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
78 	struct xe_device *xe = gt_to_xe(gt);
79 
80 	group_rcs_ccs = hw_engine_group_alloc(xe);
81 	if (IS_ERR(group_rcs_ccs))
82 		return PTR_ERR(group_rcs_ccs);
83 
84 	group_bcs = hw_engine_group_alloc(xe);
85 	if (IS_ERR(group_bcs))
86 		return PTR_ERR(group_bcs);
87 
88 	group_vcs_vecs = hw_engine_group_alloc(xe);
89 	if (IS_ERR(group_vcs_vecs))
90 		return PTR_ERR(group_vcs_vecs);
91 
92 	for_each_hw_engine(hwe, gt, id) {
93 		switch (hwe->class) {
94 		case XE_ENGINE_CLASS_COPY:
95 			hwe->hw_engine_group = group_bcs;
96 			break;
97 		case XE_ENGINE_CLASS_RENDER:
98 		case XE_ENGINE_CLASS_COMPUTE:
99 			hwe->hw_engine_group = group_rcs_ccs;
100 			break;
101 		case XE_ENGINE_CLASS_VIDEO_DECODE:
102 		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
103 			hwe->hw_engine_group = group_vcs_vecs;
104 			break;
105 		case XE_ENGINE_CLASS_OTHER:
106 			break;
107 		case XE_ENGINE_CLASS_MAX:
108 			xe_gt_assert(gt, false);
109 		}
110 	}
111 
112 	return 0;
113 }
114 
115 /**
116  * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
117  * @group: The hw engine group
118  * @q: The exec_queue
119  *
120  * Return: 0 on success,
121  *	    -EINTR if the lock could not be acquired
122  */
123 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
124 {
125 	int err;
126 	struct xe_device *xe = gt_to_xe(q->gt);
127 
128 	xe_assert(xe, group);
129 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
130 	xe_assert(xe, q->vm);
131 
132 	if (xe_vm_in_preempt_fence_mode(q->vm))
133 		return 0;
134 
135 	err = down_write_killable(&group->mode_sem);
136 	if (err)
137 		return err;
138 
139 	if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
140 		q->ops->suspend(q);
141 		err = q->ops->suspend_wait(q);
142 		if (err)
143 			goto err_suspend;
144 
145 		xe_hw_engine_group_resume_faulting_lr_jobs(group);
146 	}
147 
148 	list_add(&q->hw_engine_group_link, &group->exec_queue_list);
149 	up_write(&group->mode_sem);
150 
151 	return 0;
152 
153 err_suspend:
154 	up_write(&group->mode_sem);
155 	return err;
156 }
157 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO);
158 
159 /**
160  * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
161  * @group: The hw engine group
162  * @q: The exec_queue
163  */
164 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
165 {
166 	struct xe_device *xe = gt_to_xe(q->gt);
167 
168 	xe_assert(xe, group);
169 	xe_assert(xe, q->vm);
170 
171 	down_write(&group->mode_sem);
172 
173 	if (!list_empty(&q->hw_engine_group_link))
174 		list_del(&q->hw_engine_group_link);
175 
176 	up_write(&group->mode_sem);
177 }
178 
179 /**
180  * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's
181  * faulting LR jobs
182  * @group: The hw engine group
183  */
184 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group)
185 {
186 	queue_work(group->resume_wq, &group->resume_work);
187 }
188 
189 /**
190  * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
191  * @group: The hw engine group
192  *
193  * Return: 0 on success, negative error code on error.
194  */
195 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group)
196 {
197 	int err;
198 	struct xe_exec_queue *q;
199 	bool need_resume = false;
200 
201 	lockdep_assert_held_write(&group->mode_sem);
202 
203 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
204 		if (!xe_vm_in_fault_mode(q->vm))
205 			continue;
206 
207 		xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1);
208 		need_resume = true;
209 		q->ops->suspend(q);
210 	}
211 
212 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
213 		if (!xe_vm_in_fault_mode(q->vm))
214 			continue;
215 
216 		err = q->ops->suspend_wait(q);
217 		if (err)
218 			return err;
219 	}
220 
221 	if (need_resume)
222 		xe_hw_engine_group_resume_faulting_lr_jobs(group);
223 
224 	return 0;
225 }
226 
227 /**
228  * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete
229  * @group: The hw engine group
230  *
231  * This function is not meant to be called directly from a user IOCTL as dma_fence_wait()
232  * is not interruptible.
233  *
234  * Return: 0 on success,
235  *	   -ETIME if waiting for one job failed
236  */
237 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group)
238 {
239 	long timeout;
240 	struct xe_exec_queue *q;
241 	struct dma_fence *fence;
242 
243 	lockdep_assert_held_write(&group->mode_sem);
244 
245 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
246 		if (xe_vm_in_lr_mode(q->vm))
247 			continue;
248 
249 		xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT, 1);
250 		fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm);
251 		timeout = dma_fence_wait(fence, false);
252 		dma_fence_put(fence);
253 
254 		if (timeout < 0)
255 			return -ETIME;
256 	}
257 
258 	return 0;
259 }
260 
261 static int switch_mode(struct xe_hw_engine_group *group)
262 {
263 	int err = 0;
264 	enum xe_hw_engine_group_execution_mode new_mode;
265 
266 	lockdep_assert_held_write(&group->mode_sem);
267 
268 	switch (group->cur_mode) {
269 	case EXEC_MODE_LR:
270 		new_mode = EXEC_MODE_DMA_FENCE;
271 		err = xe_hw_engine_group_suspend_faulting_lr_jobs(group);
272 		break;
273 	case EXEC_MODE_DMA_FENCE:
274 		new_mode = EXEC_MODE_LR;
275 		err = xe_hw_engine_group_wait_for_dma_fence_jobs(group);
276 		break;
277 	}
278 
279 	if (err)
280 		return err;
281 
282 	group->cur_mode = new_mode;
283 
284 	return 0;
285 }
286 
287 /**
288  * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode
289  * @group: The hw engine group
290  * @new_mode: The new execution mode
291  * @previous_mode: Pointer to the previous mode provided for use by caller
292  *
293  * Return: 0 if successful, -EINTR if locking failed.
294  */
295 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
296 				enum xe_hw_engine_group_execution_mode new_mode,
297 				enum xe_hw_engine_group_execution_mode *previous_mode)
298 __acquires(&group->mode_sem)
299 {
300 	int err = down_read_interruptible(&group->mode_sem);
301 
302 	if (err)
303 		return err;
304 
305 	*previous_mode = group->cur_mode;
306 
307 	if (new_mode != group->cur_mode) {
308 		up_read(&group->mode_sem);
309 		err = down_write_killable(&group->mode_sem);
310 		if (err)
311 			return err;
312 
313 		if (new_mode != group->cur_mode) {
314 			err = switch_mode(group);
315 			if (err) {
316 				up_write(&group->mode_sem);
317 				return err;
318 			}
319 		}
320 		downgrade_write(&group->mode_sem);
321 	}
322 
323 	return err;
324 }
325 
326 /**
327  * xe_hw_engine_group_put() - Put the group
328  * @group: The hw engine group
329  */
330 void xe_hw_engine_group_put(struct xe_hw_engine_group *group)
331 __releases(&group->mode_sem)
332 {
333 	up_read(&group->mode_sem);
334 }
335 
336 /**
337  * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue
338  * @q: The exec_queue
339  */
340 enum xe_hw_engine_group_execution_mode
341 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q)
342 {
343 	if (xe_vm_in_fault_mode(q->vm))
344 		return EXEC_MODE_LR;
345 	else
346 		return EXEC_MODE_DMA_FENCE;
347 }
348