xref: /linux/drivers/gpu/drm/scheduler/tests/mock_scheduler.c (revision e7e86d7697c6ed1dbbde18d7185c35b6967945ed)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2025 Valve Corporation */
3 
4 #include "sched_tests.h"
5 
6 /*
7  * Here we implement the mock "GPU" (or the scheduler backend) which is used by
8  * the DRM scheduler unit tests in order to exercise the core functionality.
9  *
10  * Test cases are implemented in a separate file.
11  */
12 
13 /**
14  * drm_mock_sched_entity_new - Create a new mock scheduler entity
15  *
16  * @test: KUnit test owning the entity
17  * @priority: Scheduling priority
18  * @sched: Mock scheduler on which the entity can be scheduled
19  *
20  * Returns: New mock scheduler entity with allocation managed by the test
21  */
22 struct drm_mock_sched_entity *
23 drm_mock_sched_entity_new(struct kunit *test,
24 			  enum drm_sched_priority priority,
25 			  struct drm_mock_scheduler *sched)
26 {
27 	struct drm_mock_sched_entity *entity;
28 	struct drm_gpu_scheduler *drm_sched;
29 	int ret;
30 
31 	entity = kunit_kzalloc(test, sizeof(*entity), GFP_KERNEL);
32 	KUNIT_ASSERT_NOT_NULL(test, entity);
33 
34 	drm_sched = &sched->base;
35 	ret = drm_sched_entity_init(&entity->base,
36 				    priority,
37 				    &drm_sched, 1,
38 				    NULL);
39 	KUNIT_ASSERT_EQ(test, ret, 0);
40 
41 	entity->test = test;
42 
43 	return entity;
44 }
45 
46 /**
47  * drm_mock_sched_entity_free - Destroys a mock scheduler entity
48  *
49  * @entity: Entity to destroy
50  *
51  * To be used from the test cases once done with the entity.
52  */
53 void drm_mock_sched_entity_free(struct drm_mock_sched_entity *entity)
54 {
55 	drm_sched_entity_destroy(&entity->base);
56 }
57 
58 static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job)
59 {
60 	struct drm_mock_scheduler *sched =
61 		drm_sched_to_mock_sched(job->base.sched);
62 
63 	lockdep_assert_held(&sched->lock);
64 
65 	job->flags |= DRM_MOCK_SCHED_JOB_DONE;
66 	list_del(&job->link);
67 	dma_fence_signal_locked(&job->hw_fence);
68 	complete(&job->done);
69 }
70 
71 static enum hrtimer_restart
72 drm_mock_sched_job_signal_timer(struct hrtimer *hrtimer)
73 {
74 	struct drm_mock_sched_job *job =
75 		container_of(hrtimer, typeof(*job), timer);
76 	struct drm_mock_scheduler *sched =
77 		drm_sched_to_mock_sched(job->base.sched);
78 	struct drm_mock_sched_job *next;
79 	ktime_t now = ktime_get();
80 	unsigned long flags;
81 	LIST_HEAD(signal);
82 
83 	spin_lock_irqsave(&sched->lock, flags);
84 	list_for_each_entry_safe(job, next, &sched->job_list, link) {
85 		if (!job->duration_us)
86 			break;
87 
88 		if (ktime_before(now, job->finish_at))
89 			break;
90 
91 		sched->hw_timeline.cur_seqno = job->hw_fence.seqno;
92 		drm_mock_sched_job_complete(job);
93 	}
94 	spin_unlock_irqrestore(&sched->lock, flags);
95 
96 	return HRTIMER_NORESTART;
97 }
98 
99 /**
100  * drm_mock_sched_job_new - Create a new mock scheduler job
101  *
102  * @test: KUnit test owning the job
103  * @entity: Scheduler entity of the job
104  *
105  * Returns: New mock scheduler job with allocation managed by the test
106  */
107 struct drm_mock_sched_job *
108 drm_mock_sched_job_new(struct kunit *test,
109 		       struct drm_mock_sched_entity *entity)
110 {
111 	struct drm_mock_sched_job *job;
112 	int ret;
113 
114 	job = kunit_kzalloc(test, sizeof(*job), GFP_KERNEL);
115 	KUNIT_ASSERT_NOT_NULL(test, job);
116 
117 	ret = drm_sched_job_init(&job->base,
118 				 &entity->base,
119 				 1,
120 				 NULL,
121 				 1);
122 	KUNIT_ASSERT_EQ(test, ret, 0);
123 
124 	job->test = test;
125 
126 	init_completion(&job->done);
127 	INIT_LIST_HEAD(&job->link);
128 	hrtimer_setup(&job->timer, drm_mock_sched_job_signal_timer,
129 		      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
130 
131 	return job;
132 }
133 
134 static const char *drm_mock_sched_hw_fence_driver_name(struct dma_fence *fence)
135 {
136 	return "drm_mock_sched";
137 }
138 
139 static const char *
140 drm_mock_sched_hw_fence_timeline_name(struct dma_fence *fence)
141 {
142 	struct drm_mock_sched_job *job =
143 		container_of(fence, typeof(*job), hw_fence);
144 
145 	return (const char *)job->base.sched->name;
146 }
147 
148 static void drm_mock_sched_hw_fence_release(struct dma_fence *fence)
149 {
150 	struct drm_mock_sched_job *job =
151 		container_of(fence, typeof(*job), hw_fence);
152 
153 	hrtimer_cancel(&job->timer);
154 
155 	/* Containing job is freed by the kunit framework */
156 }
157 
158 static const struct dma_fence_ops drm_mock_sched_hw_fence_ops = {
159 	.get_driver_name = drm_mock_sched_hw_fence_driver_name,
160 	.get_timeline_name = drm_mock_sched_hw_fence_timeline_name,
161 	.release = drm_mock_sched_hw_fence_release,
162 };
163 
164 static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job)
165 {
166 	struct drm_mock_scheduler *sched =
167 		drm_sched_to_mock_sched(sched_job->sched);
168 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
169 
170 	dma_fence_init(&job->hw_fence,
171 		       &drm_mock_sched_hw_fence_ops,
172 		       &sched->lock,
173 		       sched->hw_timeline.context,
174 		       atomic_inc_return(&sched->hw_timeline.next_seqno));
175 
176 	dma_fence_get(&job->hw_fence); /* Reference for the job_list */
177 
178 	spin_lock_irq(&sched->lock);
179 	if (job->duration_us) {
180 		ktime_t prev_finish_at = 0;
181 
182 		if (!list_empty(&sched->job_list)) {
183 			struct drm_mock_sched_job *prev =
184 				list_last_entry(&sched->job_list, typeof(*prev),
185 						link);
186 
187 			prev_finish_at = prev->finish_at;
188 		}
189 
190 		if (!prev_finish_at)
191 			prev_finish_at = ktime_get();
192 
193 		job->finish_at = ktime_add_us(prev_finish_at, job->duration_us);
194 	}
195 	list_add_tail(&job->link, &sched->job_list);
196 	if (job->finish_at)
197 		hrtimer_start(&job->timer, job->finish_at, HRTIMER_MODE_ABS);
198 	spin_unlock_irq(&sched->lock);
199 
200 	return &job->hw_fence;
201 }
202 
203 /*
204  * Normally, drivers would take appropriate measures in this callback, such as
205  * killing the entity the faulty job is associated with, resetting the hardware
206  * and / or resubmitting non-faulty jobs.
207  *
208  * For the mock scheduler, there are no hardware rings to be resetted nor jobs
209  * to be resubmitted. Thus, this function merely ensures that
210  *   a) timedout fences get signaled properly and removed from the pending list
211  *   b) the mock scheduler framework gets informed about the timeout via a flag
212  *   c) The drm_sched_job, not longer needed, gets freed
213  */
214 static enum drm_gpu_sched_stat
215 mock_sched_timedout_job(struct drm_sched_job *sched_job)
216 {
217 	struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched);
218 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
219 	unsigned long flags;
220 
221 	if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) {
222 		job->flags &= ~DRM_MOCK_SCHED_JOB_DONT_RESET;
223 		return DRM_GPU_SCHED_STAT_NO_HANG;
224 	}
225 
226 	spin_lock_irqsave(&sched->lock, flags);
227 	if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
228 		list_del(&job->link);
229 		job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT;
230 		dma_fence_set_error(&job->hw_fence, -ETIMEDOUT);
231 		dma_fence_signal_locked(&job->hw_fence);
232 	}
233 	spin_unlock_irqrestore(&sched->lock, flags);
234 
235 	dma_fence_put(&job->hw_fence);
236 	drm_sched_job_cleanup(sched_job);
237 	/* Mock job itself is freed by the kunit framework. */
238 
239 	return DRM_GPU_SCHED_STAT_RESET;
240 }
241 
242 static void mock_sched_free_job(struct drm_sched_job *sched_job)
243 {
244 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
245 
246 	dma_fence_put(&job->hw_fence);
247 	drm_sched_job_cleanup(sched_job);
248 
249 	/* Mock job itself is freed by the kunit framework. */
250 }
251 
252 static void mock_sched_cancel_job(struct drm_sched_job *sched_job)
253 {
254 	struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched);
255 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
256 	unsigned long flags;
257 
258 	hrtimer_cancel(&job->timer);
259 
260 	spin_lock_irqsave(&sched->lock, flags);
261 	if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
262 		list_del(&job->link);
263 		dma_fence_set_error(&job->hw_fence, -ECANCELED);
264 		dma_fence_signal_locked(&job->hw_fence);
265 	}
266 	spin_unlock_irqrestore(&sched->lock, flags);
267 
268 	/*
269 	 * The GPU Scheduler will call drm_sched_backend_ops.free_job(), still.
270 	 * Mock job itself is freed by the kunit framework.
271 	 */
272 }
273 
274 static const struct drm_sched_backend_ops drm_mock_scheduler_ops = {
275 	.run_job = mock_sched_run_job,
276 	.timedout_job = mock_sched_timedout_job,
277 	.free_job = mock_sched_free_job,
278 	.cancel_job = mock_sched_cancel_job,
279 };
280 
281 /**
282  * drm_mock_sched_new - Create a new mock scheduler
283  *
284  * @test: KUnit test owning the job
285  * @timeout: Job timeout to set
286  *
287  * Returns: New mock scheduler with allocation managed by the test
288  */
289 struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout)
290 {
291 	struct drm_sched_init_args args = {
292 		.ops		= &drm_mock_scheduler_ops,
293 		.num_rqs	= DRM_SCHED_PRIORITY_COUNT,
294 		.credit_limit	= U32_MAX,
295 		.hang_limit	= 1,
296 		.timeout	= timeout,
297 		.name		= "drm-mock-scheduler",
298 	};
299 	struct drm_mock_scheduler *sched;
300 	int ret;
301 
302 	sched = kunit_kzalloc(test, sizeof(*sched), GFP_KERNEL);
303 	KUNIT_ASSERT_NOT_NULL(test, sched);
304 
305 	ret = drm_sched_init(&sched->base, &args);
306 	KUNIT_ASSERT_EQ(test, ret, 0);
307 
308 	sched->test = test;
309 	sched->hw_timeline.context = dma_fence_context_alloc(1);
310 	atomic_set(&sched->hw_timeline.next_seqno, 0);
311 	INIT_LIST_HEAD(&sched->job_list);
312 	spin_lock_init(&sched->lock);
313 
314 	return sched;
315 }
316 
317 /**
318  * drm_mock_sched_fini - Destroys a mock scheduler
319  *
320  * @sched: Scheduler to destroy
321  *
322  * To be used from the test cases once done with the scheduler.
323  */
324 void drm_mock_sched_fini(struct drm_mock_scheduler *sched)
325 {
326 	drm_sched_fini(&sched->base);
327 }
328 
329 /**
330  * drm_mock_sched_advance - Advances the mock scheduler timeline
331  *
332  * @sched: Scheduler timeline to advance
333  * @num: By how many jobs to advance
334  *
335  * Advancing the scheduler timeline by a number of seqnos will trigger
336  * signalling of the hardware fences and unlinking the jobs from the internal
337  * scheduler tracking.
338  *
339  * This can be used from test cases which want complete control of the simulated
340  * job execution timing. For example submitting one job with no set duration
341  * would never complete it before test cases advances the timeline by one.
342  */
343 unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched,
344 				    unsigned int num)
345 {
346 	struct drm_mock_sched_job *job, *next;
347 	unsigned int found = 0;
348 	unsigned long flags;
349 	LIST_HEAD(signal);
350 
351 	spin_lock_irqsave(&sched->lock, flags);
352 	if (WARN_ON_ONCE(sched->hw_timeline.cur_seqno + num <
353 			 sched->hw_timeline.cur_seqno))
354 		goto unlock;
355 	sched->hw_timeline.cur_seqno += num;
356 	list_for_each_entry_safe(job, next, &sched->job_list, link) {
357 		if (sched->hw_timeline.cur_seqno < job->hw_fence.seqno)
358 			break;
359 
360 		drm_mock_sched_job_complete(job);
361 		found++;
362 	}
363 unlock:
364 	spin_unlock_irqrestore(&sched->lock, flags);
365 
366 	return found;
367 }
368 
369 MODULE_DESCRIPTION("DRM mock scheduler and tests");
370 MODULE_LICENSE("GPL");
371