xref: /linux/drivers/gpu/drm/xe/xe_sched_job.c (revision b00f7f4f8e936da55f2e6c7fd96391ef54c145fc)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_sched_job.h"
7 
8 #include <drm/xe_drm.h>
9 #include <linux/dma-fence-chain.h>
10 #include <linux/slab.h>
11 
12 #include "xe_device.h"
13 #include "xe_exec_queue.h"
14 #include "xe_gt.h"
15 #include "xe_hw_engine_types.h"
16 #include "xe_hw_fence.h"
17 #include "xe_lrc.h"
18 #include "xe_macros.h"
19 #include "xe_pm.h"
20 #include "xe_sync_types.h"
21 #include "xe_trace.h"
22 #include "xe_vm.h"
23 
24 static struct kmem_cache *xe_sched_job_slab;
25 static struct kmem_cache *xe_sched_job_parallel_slab;
26 
27 int __init xe_sched_job_module_init(void)
28 {
29 	xe_sched_job_slab =
30 		kmem_cache_create("xe_sched_job",
31 				  sizeof(struct xe_sched_job) +
32 				  sizeof(struct xe_job_ptrs), 0,
33 				  SLAB_HWCACHE_ALIGN, NULL);
34 	if (!xe_sched_job_slab)
35 		return -ENOMEM;
36 
37 	xe_sched_job_parallel_slab =
38 		kmem_cache_create("xe_sched_job_parallel",
39 				  sizeof(struct xe_sched_job) +
40 				  sizeof(struct xe_job_ptrs) *
41 				  XE_HW_ENGINE_MAX_INSTANCE, 0,
42 				  SLAB_HWCACHE_ALIGN, NULL);
43 	if (!xe_sched_job_parallel_slab) {
44 		kmem_cache_destroy(xe_sched_job_slab);
45 		return -ENOMEM;
46 	}
47 
48 	return 0;
49 }
50 
51 void xe_sched_job_module_exit(void)
52 {
53 	kmem_cache_destroy(xe_sched_job_slab);
54 	kmem_cache_destroy(xe_sched_job_parallel_slab);
55 }
56 
57 static struct xe_sched_job *job_alloc(bool parallel)
58 {
59 	return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
60 				 xe_sched_job_slab, GFP_KERNEL);
61 }
62 
63 bool xe_sched_job_is_migration(struct xe_exec_queue *q)
64 {
65 	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
66 }
67 
68 static void job_free(struct xe_sched_job *job)
69 {
70 	struct xe_exec_queue *q = job->q;
71 	bool is_migration = xe_sched_job_is_migration(q);
72 
73 	kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
74 			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
75 }
76 
77 static struct xe_device *job_to_xe(struct xe_sched_job *job)
78 {
79 	return gt_to_xe(job->q->gt);
80 }
81 
82 /* Free unused pre-allocated fences */
83 static void xe_sched_job_free_fences(struct xe_sched_job *job)
84 {
85 	int i;
86 
87 	for (i = 0; i < job->q->width; ++i) {
88 		struct xe_job_ptrs *ptrs = &job->ptrs[i];
89 
90 		if (ptrs->lrc_fence)
91 			xe_lrc_free_seqno_fence(ptrs->lrc_fence);
92 		if (ptrs->chain_fence)
93 			dma_fence_chain_free(ptrs->chain_fence);
94 	}
95 }
96 
97 struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
98 					 u64 *batch_addr)
99 {
100 	bool is_migration = xe_sched_job_is_migration(q);
101 	struct xe_sched_job *job;
102 	int err;
103 	int i;
104 	u32 width;
105 
106 	/* only a kernel context can submit a vm-less job */
107 	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
108 
109 	job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
110 	if (!job)
111 		return ERR_PTR(-ENOMEM);
112 
113 	job->q = q;
114 	kref_init(&job->refcount);
115 	xe_exec_queue_get(job->q);
116 
117 	err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
118 	if (err)
119 		goto err_free;
120 
121 	for (i = 0; i < q->width; ++i) {
122 		struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
123 		struct dma_fence_chain *chain;
124 
125 		if (IS_ERR(fence)) {
126 			err = PTR_ERR(fence);
127 			goto err_sched_job;
128 		}
129 		job->ptrs[i].lrc_fence = fence;
130 
131 		if (i + 1 == q->width)
132 			continue;
133 
134 		chain = dma_fence_chain_alloc();
135 		if (!chain) {
136 			err = -ENOMEM;
137 			goto err_sched_job;
138 		}
139 		job->ptrs[i].chain_fence = chain;
140 	}
141 
142 	width = q->width;
143 	if (is_migration)
144 		width = 2;
145 
146 	for (i = 0; i < width; ++i)
147 		job->ptrs[i].batch_addr = batch_addr[i];
148 
149 	xe_pm_runtime_get_noresume(job_to_xe(job));
150 	trace_xe_sched_job_create(job);
151 	return job;
152 
153 err_sched_job:
154 	xe_sched_job_free_fences(job);
155 	drm_sched_job_cleanup(&job->drm);
156 err_free:
157 	xe_exec_queue_put(q);
158 	job_free(job);
159 	return ERR_PTR(err);
160 }
161 
162 /**
163  * xe_sched_job_destroy - Destroy XE schedule job
164  * @ref: reference to XE schedule job
165  *
166  * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
167  * base DRM schedule job, and free memory for XE schedule job.
168  */
169 void xe_sched_job_destroy(struct kref *ref)
170 {
171 	struct xe_sched_job *job =
172 		container_of(ref, struct xe_sched_job, refcount);
173 	struct xe_device *xe = job_to_xe(job);
174 	struct xe_exec_queue *q = job->q;
175 
176 	xe_sched_job_free_fences(job);
177 	dma_fence_put(job->fence);
178 	drm_sched_job_cleanup(&job->drm);
179 	job_free(job);
180 	xe_exec_queue_put(q);
181 	xe_pm_runtime_put(xe);
182 }
183 
184 /* Set the error status under the fence to avoid racing with signaling */
185 static bool xe_fence_set_error(struct dma_fence *fence, int error)
186 {
187 	unsigned long irq_flags;
188 	bool signaled;
189 
190 	spin_lock_irqsave(fence->lock, irq_flags);
191 	signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
192 	if (!signaled)
193 		dma_fence_set_error(fence, error);
194 	spin_unlock_irqrestore(fence->lock, irq_flags);
195 
196 	return signaled;
197 }
198 
199 void xe_sched_job_set_error(struct xe_sched_job *job, int error)
200 {
201 	if (xe_fence_set_error(job->fence, error))
202 		return;
203 
204 	if (dma_fence_is_chain(job->fence)) {
205 		struct dma_fence *iter;
206 
207 		dma_fence_chain_for_each(iter, job->fence)
208 			xe_fence_set_error(dma_fence_chain_contained(iter),
209 					   error);
210 	}
211 
212 	trace_xe_sched_job_set_error(job);
213 
214 	dma_fence_enable_sw_signaling(job->fence);
215 	xe_hw_fence_irq_run(job->q->fence_irq);
216 }
217 
218 bool xe_sched_job_started(struct xe_sched_job *job)
219 {
220 	struct xe_lrc *lrc = job->q->lrc[0];
221 
222 	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
223 				     xe_lrc_start_seqno(lrc),
224 				     dma_fence_chain_contained(job->fence)->ops);
225 }
226 
227 bool xe_sched_job_completed(struct xe_sched_job *job)
228 {
229 	struct xe_lrc *lrc = job->q->lrc[0];
230 
231 	/*
232 	 * Can safely check just LRC[0] seqno as that is last seqno written when
233 	 * parallel handshake is done.
234 	 */
235 
236 	return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
237 				     xe_lrc_seqno(lrc),
238 				     dma_fence_chain_contained(job->fence)->ops);
239 }
240 
241 void xe_sched_job_arm(struct xe_sched_job *job)
242 {
243 	struct xe_exec_queue *q = job->q;
244 	struct dma_fence *fence, *prev;
245 	struct xe_vm *vm = q->vm;
246 	u64 seqno = 0;
247 	int i;
248 
249 	/* Migration and kernel engines have their own locking */
250 	if (IS_ENABLED(CONFIG_LOCKDEP) &&
251 	    !(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
252 		lockdep_assert_held(&q->vm->lock);
253 		if (!xe_vm_in_lr_mode(q->vm))
254 			xe_vm_assert_held(q->vm);
255 	}
256 
257 	if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
258 	    (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
259 		xe_vm_assert_held(vm);
260 		q->tlb_flush_seqno = vm->tlb_flush_seqno;
261 		job->ring_ops_flush_tlb = true;
262 	}
263 
264 	/* Arm the pre-allocated fences */
265 	for (i = 0; i < q->width; prev = fence, ++i) {
266 		struct dma_fence_chain *chain;
267 
268 		fence = job->ptrs[i].lrc_fence;
269 		xe_lrc_init_seqno_fence(q->lrc[i], fence);
270 		job->ptrs[i].lrc_fence = NULL;
271 		if (!i) {
272 			job->lrc_seqno = fence->seqno;
273 			continue;
274 		} else {
275 			xe_assert(gt_to_xe(q->gt), job->lrc_seqno == fence->seqno);
276 		}
277 
278 		chain = job->ptrs[i - 1].chain_fence;
279 		dma_fence_chain_init(chain, prev, fence, seqno++);
280 		job->ptrs[i - 1].chain_fence = NULL;
281 		fence = &chain->base;
282 	}
283 
284 	job->fence = fence;
285 	drm_sched_job_arm(&job->drm);
286 }
287 
288 void xe_sched_job_push(struct xe_sched_job *job)
289 {
290 	xe_sched_job_get(job);
291 	trace_xe_sched_job_exec(job);
292 	drm_sched_entity_push_job(&job->drm);
293 	xe_sched_job_put(job);
294 }
295 
296 /**
297  * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
298  * @job:job to add the last fence dependency to
299  * @vm: virtual memory job belongs to
300  *
301  * Returns:
302  * 0 on success, or an error on failing to expand the array.
303  */
304 int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
305 {
306 	struct dma_fence *fence;
307 
308 	fence = xe_exec_queue_last_fence_get(job->q, vm);
309 
310 	return drm_sched_job_add_dependency(&job->drm, fence);
311 }
312 
313 /**
314  * xe_sched_job_init_user_fence - Initialize user_fence for the job
315  * @job: job whose user_fence needs an init
316  * @sync: sync to be use to init user_fence
317  */
318 void xe_sched_job_init_user_fence(struct xe_sched_job *job,
319 				  struct xe_sync_entry *sync)
320 {
321 	if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE)
322 		return;
323 
324 	job->user_fence.used = true;
325 	job->user_fence.addr = sync->addr;
326 	job->user_fence.value = sync->timeline_value;
327 }
328 
329 struct xe_sched_job_snapshot *
330 xe_sched_job_snapshot_capture(struct xe_sched_job *job)
331 {
332 	struct xe_exec_queue *q = job->q;
333 	struct xe_device *xe = q->gt->tile->xe;
334 	struct xe_sched_job_snapshot *snapshot;
335 	size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
336 	u16 i;
337 
338 	snapshot = kzalloc(len, GFP_ATOMIC);
339 	if (!snapshot)
340 		return NULL;
341 
342 	snapshot->batch_addr_len = q->width;
343 	for (i = 0; i < q->width; i++)
344 		snapshot->batch_addr[i] =
345 			xe_device_uncanonicalize_addr(xe, job->ptrs[i].batch_addr);
346 
347 	return snapshot;
348 }
349 
350 void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
351 {
352 	kfree(snapshot);
353 }
354 
355 void
356 xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
357 			    struct drm_printer *p)
358 {
359 	u16 i;
360 
361 	if (!snapshot)
362 		return;
363 
364 	for (i = 0; i < snapshot->batch_addr_len; i++)
365 		drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
366 }
367 
368 int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
369 			  enum dma_resv_usage usage)
370 {
371 	return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
372 }
373