xref: /linux/drivers/gpu/drm/xe/xe_hw_engine_group.c (revision e6c2b0f23221ed43c4cc6f636e9ab7862954d562)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2024 Intel Corporation
4  */
5 
6 #include <drm/drm_managed.h>
7 
8 #include "xe_assert.h"
9 #include "xe_device.h"
10 #include "xe_exec_queue.h"
11 #include "xe_gt.h"
12 #include "xe_hw_engine_group.h"
13 #include "xe_vm.h"
14 
15 static void
16 hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
17 {
18 	struct xe_exec_queue *q;
19 	struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work);
20 	int err;
21 	enum xe_hw_engine_group_execution_mode previous_mode;
22 
23 	err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode);
24 	if (err)
25 		return;
26 
27 	if (previous_mode == EXEC_MODE_LR)
28 		goto put;
29 
30 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
31 		if (!xe_vm_in_fault_mode(q->vm))
32 			continue;
33 
34 		q->ops->resume(q);
35 	}
36 
37 put:
38 	xe_hw_engine_group_put(group);
39 }
40 
41 static struct xe_hw_engine_group *
42 hw_engine_group_alloc(struct xe_device *xe)
43 {
44 	struct xe_hw_engine_group *group;
45 	int err;
46 
47 	group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL);
48 	if (!group)
49 		return ERR_PTR(-ENOMEM);
50 
51 	group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0);
52 	if (!group->resume_wq)
53 		return ERR_PTR(-ENOMEM);
54 
55 	err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq);
56 	if (err)
57 		return ERR_PTR(err);
58 
59 	init_rwsem(&group->mode_sem);
60 	INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
61 	INIT_LIST_HEAD(&group->exec_queue_list);
62 
63 	return group;
64 }
65 
66 /**
67  * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt
68  * @gt: The gt for which groups are setup
69  *
70  * Return: 0 on success, negative error code on error.
71  */
72 int xe_hw_engine_setup_groups(struct xe_gt *gt)
73 {
74 	struct xe_hw_engine *hwe;
75 	enum xe_hw_engine_id id;
76 	struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
77 	struct xe_device *xe = gt_to_xe(gt);
78 	int err;
79 
80 	group_rcs_ccs = hw_engine_group_alloc(xe);
81 	if (IS_ERR(group_rcs_ccs)) {
82 		err = PTR_ERR(group_rcs_ccs);
83 		goto err_group_rcs_ccs;
84 	}
85 
86 	group_bcs = hw_engine_group_alloc(xe);
87 	if (IS_ERR(group_bcs)) {
88 		err = PTR_ERR(group_bcs);
89 		goto err_group_bcs;
90 	}
91 
92 	group_vcs_vecs = hw_engine_group_alloc(xe);
93 	if (IS_ERR(group_vcs_vecs)) {
94 		err = PTR_ERR(group_vcs_vecs);
95 		goto err_group_vcs_vecs;
96 	}
97 
98 	for_each_hw_engine(hwe, gt, id) {
99 		switch (hwe->class) {
100 		case XE_ENGINE_CLASS_COPY:
101 			hwe->hw_engine_group = group_bcs;
102 			break;
103 		case XE_ENGINE_CLASS_RENDER:
104 		case XE_ENGINE_CLASS_COMPUTE:
105 			hwe->hw_engine_group = group_rcs_ccs;
106 			break;
107 		case XE_ENGINE_CLASS_VIDEO_DECODE:
108 		case XE_ENGINE_CLASS_VIDEO_ENHANCE:
109 			hwe->hw_engine_group = group_vcs_vecs;
110 			break;
111 		case XE_ENGINE_CLASS_OTHER:
112 			break;
113 		default:
114 			drm_warn(&xe->drm, "NOT POSSIBLE");
115 		}
116 	}
117 
118 	return 0;
119 
120 err_group_vcs_vecs:
121 	kfree(group_vcs_vecs);
122 err_group_bcs:
123 	kfree(group_bcs);
124 err_group_rcs_ccs:
125 	kfree(group_rcs_ccs);
126 
127 	return err;
128 }
129 
130 /**
131  * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group
132  * @group: The hw engine group
133  * @q: The exec_queue
134  *
135  * Return: 0 on success,
136  *	    -EINTR if the lock could not be acquired
137  */
138 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
139 {
140 	int err;
141 	struct xe_device *xe = gt_to_xe(q->gt);
142 
143 	xe_assert(xe, group);
144 	xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM));
145 	xe_assert(xe, q->vm);
146 
147 	if (xe_vm_in_preempt_fence_mode(q->vm))
148 		return 0;
149 
150 	err = down_write_killable(&group->mode_sem);
151 	if (err)
152 		return err;
153 
154 	if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) {
155 		q->ops->suspend(q);
156 		err = q->ops->suspend_wait(q);
157 		if (err)
158 			goto err_suspend;
159 
160 		xe_hw_engine_group_resume_faulting_lr_jobs(group);
161 	}
162 
163 	list_add(&q->hw_engine_group_link, &group->exec_queue_list);
164 	up_write(&group->mode_sem);
165 
166 	return 0;
167 
168 err_suspend:
169 	up_write(&group->mode_sem);
170 	return err;
171 }
172 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO);
173 
174 /**
175  * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group
176  * @group: The hw engine group
177  * @q: The exec_queue
178  */
179 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q)
180 {
181 	struct xe_device *xe = gt_to_xe(q->gt);
182 
183 	xe_assert(xe, group);
184 	xe_assert(xe, q->vm);
185 
186 	down_write(&group->mode_sem);
187 
188 	if (!list_empty(&q->hw_engine_group_link))
189 		list_del(&q->hw_engine_group_link);
190 
191 	up_write(&group->mode_sem);
192 }
193 
194 /**
195  * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's
196  * faulting LR jobs
197  * @group: The hw engine group
198  */
199 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group)
200 {
201 	queue_work(group->resume_wq, &group->resume_work);
202 }
203 
204 /**
205  * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group
206  * @group: The hw engine group
207  *
208  * Return: 0 on success, negative error code on error.
209  */
210 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group)
211 {
212 	int err;
213 	struct xe_exec_queue *q;
214 	bool need_resume = false;
215 
216 	lockdep_assert_held_write(&group->mode_sem);
217 
218 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
219 		if (!xe_vm_in_fault_mode(q->vm))
220 			continue;
221 
222 		need_resume = true;
223 		q->ops->suspend(q);
224 	}
225 
226 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
227 		if (!xe_vm_in_fault_mode(q->vm))
228 			continue;
229 
230 		err = q->ops->suspend_wait(q);
231 		if (err)
232 			goto err_suspend;
233 	}
234 
235 	if (need_resume)
236 		xe_hw_engine_group_resume_faulting_lr_jobs(group);
237 
238 	return 0;
239 
240 err_suspend:
241 	up_write(&group->mode_sem);
242 	return err;
243 }
244 
245 /**
246  * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete
247  * @group: The hw engine group
248  *
249  * This function is not meant to be called directly from a user IOCTL as dma_fence_wait()
250  * is not interruptible.
251  *
252  * Return: 0 on success,
253  *	   -ETIME if waiting for one job failed
254  */
255 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group)
256 {
257 	long timeout;
258 	struct xe_exec_queue *q;
259 	struct dma_fence *fence;
260 
261 	lockdep_assert_held_write(&group->mode_sem);
262 
263 	list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) {
264 		if (xe_vm_in_lr_mode(q->vm))
265 			continue;
266 
267 		fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm);
268 		timeout = dma_fence_wait(fence, false);
269 		dma_fence_put(fence);
270 
271 		if (timeout < 0)
272 			return -ETIME;
273 	}
274 
275 	return 0;
276 }
277 
278 static int switch_mode(struct xe_hw_engine_group *group)
279 {
280 	int err = 0;
281 	enum xe_hw_engine_group_execution_mode new_mode;
282 
283 	lockdep_assert_held_write(&group->mode_sem);
284 
285 	switch (group->cur_mode) {
286 	case EXEC_MODE_LR:
287 		new_mode = EXEC_MODE_DMA_FENCE;
288 		err = xe_hw_engine_group_suspend_faulting_lr_jobs(group);
289 		break;
290 	case EXEC_MODE_DMA_FENCE:
291 		new_mode = EXEC_MODE_LR;
292 		err = xe_hw_engine_group_wait_for_dma_fence_jobs(group);
293 		break;
294 	}
295 
296 	if (err)
297 		return err;
298 
299 	group->cur_mode = new_mode;
300 
301 	return 0;
302 }
303 
304 /**
305  * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode
306  * @group: The hw engine group
307  * @new_mode: The new execution mode
308  * @previous_mode: Pointer to the previous mode provided for use by caller
309  *
310  * Return: 0 if successful, -EINTR if locking failed.
311  */
312 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group,
313 				enum xe_hw_engine_group_execution_mode new_mode,
314 				enum xe_hw_engine_group_execution_mode *previous_mode)
315 __acquires(&group->mode_sem)
316 {
317 	int err = down_read_interruptible(&group->mode_sem);
318 
319 	if (err)
320 		return err;
321 
322 	*previous_mode = group->cur_mode;
323 
324 	if (new_mode != group->cur_mode) {
325 		up_read(&group->mode_sem);
326 		err = down_write_killable(&group->mode_sem);
327 		if (err)
328 			return err;
329 
330 		if (new_mode != group->cur_mode) {
331 			err = switch_mode(group);
332 			if (err) {
333 				up_write(&group->mode_sem);
334 				return err;
335 			}
336 		}
337 		downgrade_write(&group->mode_sem);
338 	}
339 
340 	return err;
341 }
342 
343 /**
344  * xe_hw_engine_group_put() - Put the group
345  * @group: The hw engine group
346  */
347 void xe_hw_engine_group_put(struct xe_hw_engine_group *group)
348 __releases(&group->mode_sem)
349 {
350 	up_read(&group->mode_sem);
351 }
352 
353 /**
354  * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue
355  * @q: The exec_queue
356  */
357 enum xe_hw_engine_group_execution_mode
358 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q)
359 {
360 	if (xe_vm_in_fault_mode(q->vm))
361 		return EXEC_MODE_LR;
362 	else
363 		return EXEC_MODE_DMA_FENCE;
364 }
365