xref: /linux/drivers/gpu/drm/scheduler/tests/mock_scheduler.c (revision 917b10d90990fd2138b5dbc2d22cfa428c070ade)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2025 Valve Corporation */
3 
4 #include "sched_tests.h"
5 
6 /*
7  * Here we implement the mock "GPU" (or the scheduler backend) which is used by
8  * the DRM scheduler unit tests in order to exercise the core functionality.
9  *
10  * Test cases are implemented in a separate file.
11  */
12 
13 /**
14  * drm_mock_sched_entity_new - Create a new mock scheduler entity
15  *
16  * @test: KUnit test owning the entity
17  * @priority: Scheduling priority
18  * @sched: Mock scheduler on which the entity can be scheduled
19  *
20  * Returns: New mock scheduler entity with allocation managed by the test
21  */
22 struct drm_mock_sched_entity *
23 drm_mock_sched_entity_new(struct kunit *test,
24 			  enum drm_sched_priority priority,
25 			  struct drm_mock_scheduler *sched)
26 {
27 	struct drm_mock_sched_entity *entity;
28 	struct drm_gpu_scheduler *drm_sched;
29 	int ret;
30 
31 	entity = kunit_kzalloc(test, sizeof(*entity), GFP_KERNEL);
32 	KUNIT_ASSERT_NOT_NULL(test, entity);
33 
34 	drm_sched = &sched->base;
35 	ret = drm_sched_entity_init(&entity->base,
36 				    priority,
37 				    &drm_sched, 1,
38 				    NULL);
39 	KUNIT_ASSERT_EQ(test, ret, 0);
40 
41 	entity->test = test;
42 
43 	return entity;
44 }
45 
46 /**
47  * drm_mock_sched_entity_free - Destroys a mock scheduler entity
48  *
49  * @entity: Entity to destroy
50  *
51  * To be used from the test cases once done with the entity.
52  */
53 void drm_mock_sched_entity_free(struct drm_mock_sched_entity *entity)
54 {
55 	drm_sched_entity_destroy(&entity->base);
56 }
57 
58 static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job)
59 {
60 	struct drm_mock_scheduler *sched =
61 		drm_sched_to_mock_sched(job->base.sched);
62 
63 	lockdep_assert_held(&sched->lock);
64 
65 	job->flags |= DRM_MOCK_SCHED_JOB_DONE;
66 	list_del(&job->link);
67 	dma_fence_signal_locked(&job->hw_fence);
68 	complete(&job->done);
69 }
70 
71 static enum hrtimer_restart
72 drm_mock_sched_job_signal_timer(struct hrtimer *hrtimer)
73 {
74 	struct drm_mock_sched_job *job =
75 		container_of(hrtimer, typeof(*job), timer);
76 	struct drm_mock_scheduler *sched =
77 		drm_sched_to_mock_sched(job->base.sched);
78 	struct drm_mock_sched_job *next;
79 	ktime_t now = ktime_get();
80 	unsigned long flags;
81 	LIST_HEAD(signal);
82 
83 	spin_lock_irqsave(&sched->lock, flags);
84 	list_for_each_entry_safe(job, next, &sched->job_list, link) {
85 		if (!job->duration_us)
86 			break;
87 
88 		if (ktime_before(now, job->finish_at))
89 			break;
90 
91 		sched->hw_timeline.cur_seqno = job->hw_fence.seqno;
92 		drm_mock_sched_job_complete(job);
93 	}
94 	spin_unlock_irqrestore(&sched->lock, flags);
95 
96 	return HRTIMER_NORESTART;
97 }
98 
99 /**
100  * drm_mock_sched_job_new - Create a new mock scheduler job
101  *
102  * @test: KUnit test owning the job
103  * @entity: Scheduler entity of the job
104  *
105  * Returns: New mock scheduler job with allocation managed by the test
106  */
107 struct drm_mock_sched_job *
108 drm_mock_sched_job_new(struct kunit *test,
109 		       struct drm_mock_sched_entity *entity)
110 {
111 	struct drm_mock_sched_job *job;
112 	int ret;
113 
114 	job = kunit_kzalloc(test, sizeof(*job), GFP_KERNEL);
115 	KUNIT_ASSERT_NOT_NULL(test, job);
116 
117 	ret = drm_sched_job_init(&job->base,
118 				 &entity->base,
119 				 1,
120 				 NULL,
121 				 1);
122 	KUNIT_ASSERT_EQ(test, ret, 0);
123 
124 	job->test = test;
125 
126 	init_completion(&job->done);
127 	INIT_LIST_HEAD(&job->link);
128 	hrtimer_setup(&job->timer, drm_mock_sched_job_signal_timer,
129 		      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
130 
131 	return job;
132 }
133 
134 static const char *drm_mock_sched_hw_fence_driver_name(struct dma_fence *fence)
135 {
136 	return "drm_mock_sched";
137 }
138 
139 static const char *
140 drm_mock_sched_hw_fence_timeline_name(struct dma_fence *fence)
141 {
142 	struct drm_mock_sched_job *job =
143 		container_of(fence, typeof(*job), hw_fence);
144 
145 	return (const char *)job->base.sched->name;
146 }
147 
148 static void drm_mock_sched_hw_fence_release(struct dma_fence *fence)
149 {
150 	struct drm_mock_sched_job *job =
151 		container_of(fence, typeof(*job), hw_fence);
152 
153 	hrtimer_cancel(&job->timer);
154 
155 	/* Containing job is freed by the kunit framework */
156 }
157 
158 static const struct dma_fence_ops drm_mock_sched_hw_fence_ops = {
159 	.get_driver_name = drm_mock_sched_hw_fence_driver_name,
160 	.get_timeline_name = drm_mock_sched_hw_fence_timeline_name,
161 	.release = drm_mock_sched_hw_fence_release,
162 };
163 
164 static struct dma_fence *mock_sched_run_job(struct drm_sched_job *sched_job)
165 {
166 	struct drm_mock_scheduler *sched =
167 		drm_sched_to_mock_sched(sched_job->sched);
168 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
169 
170 	dma_fence_init(&job->hw_fence,
171 		       &drm_mock_sched_hw_fence_ops,
172 		       &sched->lock,
173 		       sched->hw_timeline.context,
174 		       atomic_inc_return(&sched->hw_timeline.next_seqno));
175 
176 	dma_fence_get(&job->hw_fence); /* Reference for the job_list */
177 
178 	spin_lock_irq(&sched->lock);
179 	if (job->duration_us) {
180 		ktime_t prev_finish_at = 0;
181 
182 		if (!list_empty(&sched->job_list)) {
183 			struct drm_mock_sched_job *prev =
184 				list_last_entry(&sched->job_list, typeof(*prev),
185 						link);
186 
187 			prev_finish_at = prev->finish_at;
188 		}
189 
190 		if (!prev_finish_at)
191 			prev_finish_at = ktime_get();
192 
193 		job->finish_at = ktime_add_us(prev_finish_at, job->duration_us);
194 	}
195 	list_add_tail(&job->link, &sched->job_list);
196 	if (job->finish_at)
197 		hrtimer_start(&job->timer, job->finish_at, HRTIMER_MODE_ABS);
198 	spin_unlock_irq(&sched->lock);
199 
200 	return &job->hw_fence;
201 }
202 
203 /*
204  * Normally, drivers would take appropriate measures in this callback, such as
205  * killing the entity the faulty job is associated with, resetting the hardware
206  * and / or resubmitting non-faulty jobs.
207  *
208  * For the mock scheduler, there are no hardware rings to be resetted nor jobs
209  * to be resubmitted. Thus, this function merely ensures that
210  *   a) timedout fences get signaled properly and removed from the pending list
211  *   b) the mock scheduler framework gets informed about the timeout via a flag
212  *   c) The drm_sched_job, not longer needed, gets freed
213  */
214 static enum drm_gpu_sched_stat
215 mock_sched_timedout_job(struct drm_sched_job *sched_job)
216 {
217 	struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched);
218 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
219 	unsigned long flags;
220 
221 	spin_lock_irqsave(&sched->lock, flags);
222 	if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
223 		list_del(&job->link);
224 		job->flags |= DRM_MOCK_SCHED_JOB_TIMEDOUT;
225 		dma_fence_set_error(&job->hw_fence, -ETIMEDOUT);
226 		dma_fence_signal_locked(&job->hw_fence);
227 	}
228 	spin_unlock_irqrestore(&sched->lock, flags);
229 
230 	dma_fence_put(&job->hw_fence);
231 	drm_sched_job_cleanup(sched_job);
232 	/* Mock job itself is freed by the kunit framework. */
233 
234 	return DRM_GPU_SCHED_STAT_NOMINAL;
235 }
236 
237 static void mock_sched_free_job(struct drm_sched_job *sched_job)
238 {
239 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
240 
241 	dma_fence_put(&job->hw_fence);
242 	drm_sched_job_cleanup(sched_job);
243 
244 	/* Mock job itself is freed by the kunit framework. */
245 }
246 
247 static void mock_sched_cancel_job(struct drm_sched_job *sched_job)
248 {
249 	struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched);
250 	struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job);
251 	unsigned long flags;
252 
253 	hrtimer_cancel(&job->timer);
254 
255 	spin_lock_irqsave(&sched->lock, flags);
256 	if (!dma_fence_is_signaled_locked(&job->hw_fence)) {
257 		list_del(&job->link);
258 		dma_fence_set_error(&job->hw_fence, -ECANCELED);
259 		dma_fence_signal_locked(&job->hw_fence);
260 	}
261 	spin_unlock_irqrestore(&sched->lock, flags);
262 
263 	/*
264 	 * The GPU Scheduler will call drm_sched_backend_ops.free_job(), still.
265 	 * Mock job itself is freed by the kunit framework.
266 	 */
267 }
268 
269 static const struct drm_sched_backend_ops drm_mock_scheduler_ops = {
270 	.run_job = mock_sched_run_job,
271 	.timedout_job = mock_sched_timedout_job,
272 	.free_job = mock_sched_free_job,
273 	.cancel_job = mock_sched_cancel_job,
274 };
275 
276 /**
277  * drm_mock_sched_new - Create a new mock scheduler
278  *
279  * @test: KUnit test owning the job
280  * @timeout: Job timeout to set
281  *
282  * Returns: New mock scheduler with allocation managed by the test
283  */
284 struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout)
285 {
286 	struct drm_sched_init_args args = {
287 		.ops		= &drm_mock_scheduler_ops,
288 		.num_rqs	= DRM_SCHED_PRIORITY_COUNT,
289 		.credit_limit	= U32_MAX,
290 		.hang_limit	= 1,
291 		.timeout	= timeout,
292 		.name		= "drm-mock-scheduler",
293 	};
294 	struct drm_mock_scheduler *sched;
295 	int ret;
296 
297 	sched = kunit_kzalloc(test, sizeof(*sched), GFP_KERNEL);
298 	KUNIT_ASSERT_NOT_NULL(test, sched);
299 
300 	ret = drm_sched_init(&sched->base, &args);
301 	KUNIT_ASSERT_EQ(test, ret, 0);
302 
303 	sched->test = test;
304 	sched->hw_timeline.context = dma_fence_context_alloc(1);
305 	atomic_set(&sched->hw_timeline.next_seqno, 0);
306 	INIT_LIST_HEAD(&sched->job_list);
307 	spin_lock_init(&sched->lock);
308 
309 	return sched;
310 }
311 
312 /**
313  * drm_mock_sched_fini - Destroys a mock scheduler
314  *
315  * @sched: Scheduler to destroy
316  *
317  * To be used from the test cases once done with the scheduler.
318  */
319 void drm_mock_sched_fini(struct drm_mock_scheduler *sched)
320 {
321 	drm_sched_fini(&sched->base);
322 }
323 
324 /**
325  * drm_mock_sched_advance - Advances the mock scheduler timeline
326  *
327  * @sched: Scheduler timeline to advance
328  * @num: By how many jobs to advance
329  *
330  * Advancing the scheduler timeline by a number of seqnos will trigger
331  * signalling of the hardware fences and unlinking the jobs from the internal
332  * scheduler tracking.
333  *
334  * This can be used from test cases which want complete control of the simulated
335  * job execution timing. For example submitting one job with no set duration
336  * would never complete it before test cases advances the timeline by one.
337  */
338 unsigned int drm_mock_sched_advance(struct drm_mock_scheduler *sched,
339 				    unsigned int num)
340 {
341 	struct drm_mock_sched_job *job, *next;
342 	unsigned int found = 0;
343 	unsigned long flags;
344 	LIST_HEAD(signal);
345 
346 	spin_lock_irqsave(&sched->lock, flags);
347 	if (WARN_ON_ONCE(sched->hw_timeline.cur_seqno + num <
348 			 sched->hw_timeline.cur_seqno))
349 		goto unlock;
350 	sched->hw_timeline.cur_seqno += num;
351 	list_for_each_entry_safe(job, next, &sched->job_list, link) {
352 		if (sched->hw_timeline.cur_seqno < job->hw_fence.seqno)
353 			break;
354 
355 		drm_mock_sched_job_complete(job);
356 		found++;
357 	}
358 unlock:
359 	spin_unlock_irqrestore(&sched->lock, flags);
360 
361 	return found;
362 }
363 
364 MODULE_DESCRIPTION("DRM mock scheduler and tests");
365 MODULE_LICENSE("GPL");
366