xref: /linux/drivers/gpu/drm/xe/xe_sched_job.c (revision 90d32e92011eaae8e70a9169b4e7acf4ca8f9d3a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_sched_job.h"
7 
8 #include <drm/xe_drm.h>
9 #include <linux/dma-fence-array.h>
10 #include <linux/slab.h>
11 
12 #include "xe_device.h"
13 #include "xe_exec_queue.h"
14 #include "xe_gt.h"
15 #include "xe_hw_engine_types.h"
16 #include "xe_hw_fence.h"
17 #include "xe_lrc.h"
18 #include "xe_macros.h"
19 #include "xe_pm.h"
20 #include "xe_sync_types.h"
21 #include "xe_trace.h"
22 #include "xe_vm.h"
23 
24 static struct kmem_cache *xe_sched_job_slab;
25 static struct kmem_cache *xe_sched_job_parallel_slab;
26 
27 int __init xe_sched_job_module_init(void)
28 {
29 	xe_sched_job_slab =
30 		kmem_cache_create("xe_sched_job",
31 				  sizeof(struct xe_sched_job) +
32 				  sizeof(u64), 0,
33 				  SLAB_HWCACHE_ALIGN, NULL);
34 	if (!xe_sched_job_slab)
35 		return -ENOMEM;
36 
37 	xe_sched_job_parallel_slab =
38 		kmem_cache_create("xe_sched_job_parallel",
39 				  sizeof(struct xe_sched_job) +
40 				  sizeof(u64) *
41 				  XE_HW_ENGINE_MAX_INSTANCE, 0,
42 				  SLAB_HWCACHE_ALIGN, NULL);
43 	if (!xe_sched_job_parallel_slab) {
44 		kmem_cache_destroy(xe_sched_job_slab);
45 		return -ENOMEM;
46 	}
47 
48 	return 0;
49 }
50 
51 void xe_sched_job_module_exit(void)
52 {
53 	kmem_cache_destroy(xe_sched_job_slab);
54 	kmem_cache_destroy(xe_sched_job_parallel_slab);
55 }
56 
57 static struct xe_sched_job *job_alloc(bool parallel)
58 {
59 	return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
60 				 xe_sched_job_slab, GFP_KERNEL);
61 }
62 
63 bool xe_sched_job_is_migration(struct xe_exec_queue *q)
64 {
65 	return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
66 }
67 
68 static void job_free(struct xe_sched_job *job)
69 {
70 	struct xe_exec_queue *q = job->q;
71 	bool is_migration = xe_sched_job_is_migration(q);
72 
73 	kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
74 			xe_sched_job_parallel_slab : xe_sched_job_slab, job);
75 }
76 
77 static struct xe_device *job_to_xe(struct xe_sched_job *job)
78 {
79 	return gt_to_xe(job->q->gt);
80 }
81 
82 struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
83 					 u64 *batch_addr)
84 {
85 	struct xe_sched_job *job;
86 	struct dma_fence **fences;
87 	bool is_migration = xe_sched_job_is_migration(q);
88 	int err;
89 	int i, j;
90 	u32 width;
91 
92 	/* only a kernel context can submit a vm-less job */
93 	XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
94 
95 	/* Migration and kernel engines have their own locking */
96 	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
97 		lockdep_assert_held(&q->vm->lock);
98 		if (!xe_vm_in_lr_mode(q->vm))
99 			xe_vm_assert_held(q->vm);
100 	}
101 
102 	job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
103 	if (!job)
104 		return ERR_PTR(-ENOMEM);
105 
106 	job->q = q;
107 	kref_init(&job->refcount);
108 	xe_exec_queue_get(job->q);
109 
110 	err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
111 	if (err)
112 		goto err_free;
113 
114 	if (!xe_exec_queue_is_parallel(q)) {
115 		job->fence = xe_lrc_create_seqno_fence(q->lrc);
116 		if (IS_ERR(job->fence)) {
117 			err = PTR_ERR(job->fence);
118 			goto err_sched_job;
119 		}
120 	} else {
121 		struct dma_fence_array *cf;
122 
123 		fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL);
124 		if (!fences) {
125 			err = -ENOMEM;
126 			goto err_sched_job;
127 		}
128 
129 		for (j = 0; j < q->width; ++j) {
130 			fences[j] = xe_lrc_create_seqno_fence(q->lrc + j);
131 			if (IS_ERR(fences[j])) {
132 				err = PTR_ERR(fences[j]);
133 				goto err_fences;
134 			}
135 		}
136 
137 		cf = dma_fence_array_create(q->width, fences,
138 					    q->parallel.composite_fence_ctx,
139 					    q->parallel.composite_fence_seqno++,
140 					    false);
141 		if (!cf) {
142 			--q->parallel.composite_fence_seqno;
143 			err = -ENOMEM;
144 			goto err_fences;
145 		}
146 
147 		/* Sanity check */
148 		for (j = 0; j < q->width; ++j)
149 			xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno);
150 
151 		job->fence = &cf->base;
152 	}
153 
154 	width = q->width;
155 	if (is_migration)
156 		width = 2;
157 
158 	for (i = 0; i < width; ++i)
159 		job->batch_addr[i] = batch_addr[i];
160 
161 	/* All other jobs require a VM to be open which has a ref */
162 	if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL))
163 		xe_pm_runtime_get_noresume(job_to_xe(job));
164 	xe_device_assert_mem_access(job_to_xe(job));
165 
166 	trace_xe_sched_job_create(job);
167 	return job;
168 
169 err_fences:
170 	for (j = j - 1; j >= 0; --j) {
171 		--q->lrc[j].fence_ctx.next_seqno;
172 		dma_fence_put(fences[j]);
173 	}
174 	kfree(fences);
175 err_sched_job:
176 	drm_sched_job_cleanup(&job->drm);
177 err_free:
178 	xe_exec_queue_put(q);
179 	job_free(job);
180 	return ERR_PTR(err);
181 }
182 
183 /**
184  * xe_sched_job_destroy - Destroy XE schedule job
185  * @ref: reference to XE schedule job
186  *
187  * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
188  * base DRM schedule job, and free memory for XE schedule job.
189  */
190 void xe_sched_job_destroy(struct kref *ref)
191 {
192 	struct xe_sched_job *job =
193 		container_of(ref, struct xe_sched_job, refcount);
194 
195 	if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL))
196 		xe_pm_runtime_put(job_to_xe(job));
197 	xe_exec_queue_put(job->q);
198 	dma_fence_put(job->fence);
199 	drm_sched_job_cleanup(&job->drm);
200 	job_free(job);
201 }
202 
203 void xe_sched_job_set_error(struct xe_sched_job *job, int error)
204 {
205 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags))
206 		return;
207 
208 	dma_fence_set_error(job->fence, error);
209 
210 	if (dma_fence_is_array(job->fence)) {
211 		struct dma_fence_array *array =
212 			to_dma_fence_array(job->fence);
213 		struct dma_fence **child = array->fences;
214 		unsigned int nchild = array->num_fences;
215 
216 		do {
217 			struct dma_fence *current_fence = *child++;
218 
219 			if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
220 				     &current_fence->flags))
221 				continue;
222 			dma_fence_set_error(current_fence, error);
223 		} while (--nchild);
224 	}
225 
226 	trace_xe_sched_job_set_error(job);
227 
228 	dma_fence_enable_sw_signaling(job->fence);
229 	xe_hw_fence_irq_run(job->q->fence_irq);
230 }
231 
232 bool xe_sched_job_started(struct xe_sched_job *job)
233 {
234 	struct xe_lrc *lrc = job->q->lrc;
235 
236 	return !__dma_fence_is_later(xe_sched_job_seqno(job),
237 				     xe_lrc_start_seqno(lrc),
238 				     job->fence->ops);
239 }
240 
241 bool xe_sched_job_completed(struct xe_sched_job *job)
242 {
243 	struct xe_lrc *lrc = job->q->lrc;
244 
245 	/*
246 	 * Can safely check just LRC[0] seqno as that is last seqno written when
247 	 * parallel handshake is done.
248 	 */
249 
250 	return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc),
251 				     job->fence->ops);
252 }
253 
254 void xe_sched_job_arm(struct xe_sched_job *job)
255 {
256 	struct xe_exec_queue *q = job->q;
257 	struct xe_vm *vm = q->vm;
258 
259 	if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
260 	    (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
261 		xe_vm_assert_held(vm);
262 		q->tlb_flush_seqno = vm->tlb_flush_seqno;
263 		job->ring_ops_flush_tlb = true;
264 	}
265 
266 	drm_sched_job_arm(&job->drm);
267 }
268 
269 void xe_sched_job_push(struct xe_sched_job *job)
270 {
271 	xe_sched_job_get(job);
272 	trace_xe_sched_job_exec(job);
273 	drm_sched_entity_push_job(&job->drm);
274 	xe_sched_job_put(job);
275 }
276 
277 /**
278  * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
279  * @job:job to add the last fence dependency to
280  * @vm: virtual memory job belongs to
281  *
282  * Returns:
283  * 0 on success, or an error on failing to expand the array.
284  */
285 int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
286 {
287 	struct dma_fence *fence;
288 
289 	fence = xe_exec_queue_last_fence_get(job->q, vm);
290 
291 	return drm_sched_job_add_dependency(&job->drm, fence);
292 }
293 
294 /**
295  * xe_sched_job_init_user_fence - Initialize user_fence for the job
296  * @job: job whose user_fence needs an init
297  * @sync: sync to be use to init user_fence
298  */
299 void xe_sched_job_init_user_fence(struct xe_sched_job *job,
300 				  struct xe_sync_entry *sync)
301 {
302 	if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE)
303 		return;
304 
305 	job->user_fence.used = true;
306 	job->user_fence.addr = sync->addr;
307 	job->user_fence.value = sync->timeline_value;
308 }
309 
310 struct xe_sched_job_snapshot *
311 xe_sched_job_snapshot_capture(struct xe_sched_job *job)
312 {
313 	struct xe_exec_queue *q = job->q;
314 	struct xe_device *xe = q->gt->tile->xe;
315 	struct xe_sched_job_snapshot *snapshot;
316 	size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
317 	u16 i;
318 
319 	snapshot = kzalloc(len, GFP_ATOMIC);
320 	if (!snapshot)
321 		return NULL;
322 
323 	snapshot->batch_addr_len = q->width;
324 	for (i = 0; i < q->width; i++)
325 		snapshot->batch_addr[i] = xe_device_uncanonicalize_addr(xe, job->batch_addr[i]);
326 
327 	return snapshot;
328 }
329 
330 void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
331 {
332 	kfree(snapshot);
333 }
334 
335 void
336 xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
337 			    struct drm_printer *p)
338 {
339 	u16 i;
340 
341 	if (!snapshot)
342 		return;
343 
344 	for (i = 0; i < snapshot->batch_addr_len; i++)
345 		drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
346 }
347