1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2021 Intel Corporation
4 */
5
6 #include "xe_sched_job.h"
7
8 #include <uapi/drm/xe_drm.h>
9 #include <linux/dma-fence-chain.h>
10 #include <linux/slab.h>
11
12 #include "xe_device.h"
13 #include "xe_exec_queue.h"
14 #include "xe_gt.h"
15 #include "xe_hw_engine_types.h"
16 #include "xe_hw_fence.h"
17 #include "xe_lrc.h"
18 #include "xe_macros.h"
19 #include "xe_pm.h"
20 #include "xe_sync_types.h"
21 #include "xe_trace.h"
22 #include "xe_vm.h"
23
24 static struct kmem_cache *xe_sched_job_slab;
25 static struct kmem_cache *xe_sched_job_parallel_slab;
26
xe_sched_job_module_init(void)27 int __init xe_sched_job_module_init(void)
28 {
29 xe_sched_job_slab =
30 kmem_cache_create("xe_sched_job",
31 sizeof(struct xe_sched_job) +
32 sizeof(struct xe_job_ptrs), 0,
33 SLAB_HWCACHE_ALIGN, NULL);
34 if (!xe_sched_job_slab)
35 return -ENOMEM;
36
37 xe_sched_job_parallel_slab =
38 kmem_cache_create("xe_sched_job_parallel",
39 sizeof(struct xe_sched_job) +
40 sizeof(struct xe_job_ptrs) *
41 XE_HW_ENGINE_MAX_INSTANCE, 0,
42 SLAB_HWCACHE_ALIGN, NULL);
43 if (!xe_sched_job_parallel_slab) {
44 kmem_cache_destroy(xe_sched_job_slab);
45 return -ENOMEM;
46 }
47
48 return 0;
49 }
50
xe_sched_job_module_exit(void)51 void xe_sched_job_module_exit(void)
52 {
53 kmem_cache_destroy(xe_sched_job_slab);
54 kmem_cache_destroy(xe_sched_job_parallel_slab);
55 }
56
job_alloc(bool parallel)57 static struct xe_sched_job *job_alloc(bool parallel)
58 {
59 return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab :
60 xe_sched_job_slab, GFP_KERNEL);
61 }
62
xe_sched_job_is_migration(struct xe_exec_queue * q)63 bool xe_sched_job_is_migration(struct xe_exec_queue *q)
64 {
65 return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION);
66 }
67
job_free(struct xe_sched_job * job)68 static void job_free(struct xe_sched_job *job)
69 {
70 struct xe_exec_queue *q = job->q;
71 bool is_migration = xe_sched_job_is_migration(q);
72
73 kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ?
74 xe_sched_job_parallel_slab : xe_sched_job_slab, job);
75 }
76
job_to_xe(struct xe_sched_job * job)77 static struct xe_device *job_to_xe(struct xe_sched_job *job)
78 {
79 return gt_to_xe(job->q->gt);
80 }
81
82 /* Free unused pre-allocated fences */
xe_sched_job_free_fences(struct xe_sched_job * job)83 static void xe_sched_job_free_fences(struct xe_sched_job *job)
84 {
85 int i;
86
87 for (i = 0; i < job->q->width; ++i) {
88 struct xe_job_ptrs *ptrs = &job->ptrs[i];
89
90 if (ptrs->lrc_fence)
91 xe_lrc_free_seqno_fence(ptrs->lrc_fence);
92 dma_fence_chain_free(ptrs->chain_fence);
93 }
94 }
95
xe_sched_job_create(struct xe_exec_queue * q,u64 * batch_addr)96 struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
97 u64 *batch_addr)
98 {
99 bool is_migration = xe_sched_job_is_migration(q);
100 struct xe_sched_job *job;
101 int err;
102 int i;
103 u32 width;
104
105 /* only a kernel context can submit a vm-less job */
106 XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL));
107
108 job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration);
109 if (!job)
110 return ERR_PTR(-ENOMEM);
111
112 job->q = q;
113 kref_init(&job->refcount);
114 xe_exec_queue_get(job->q);
115
116 err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
117 if (err)
118 goto err_free;
119
120 for (i = 0; i < q->width; ++i) {
121 struct dma_fence *fence = xe_lrc_alloc_seqno_fence();
122 struct dma_fence_chain *chain;
123
124 if (IS_ERR(fence)) {
125 err = PTR_ERR(fence);
126 goto err_sched_job;
127 }
128 job->ptrs[i].lrc_fence = fence;
129
130 if (i + 1 == q->width)
131 continue;
132
133 chain = dma_fence_chain_alloc();
134 if (!chain) {
135 err = -ENOMEM;
136 goto err_sched_job;
137 }
138 job->ptrs[i].chain_fence = chain;
139 }
140
141 width = q->width;
142 if (is_migration)
143 width = 2;
144
145 for (i = 0; i < width; ++i)
146 job->ptrs[i].batch_addr = batch_addr[i];
147
148 xe_pm_runtime_get_noresume(job_to_xe(job));
149 trace_xe_sched_job_create(job);
150 return job;
151
152 err_sched_job:
153 xe_sched_job_free_fences(job);
154 drm_sched_job_cleanup(&job->drm);
155 err_free:
156 xe_exec_queue_put(q);
157 job_free(job);
158 return ERR_PTR(err);
159 }
160
161 /**
162 * xe_sched_job_destroy - Destroy XE schedule job
163 * @ref: reference to XE schedule job
164 *
165 * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
166 * base DRM schedule job, and free memory for XE schedule job.
167 */
xe_sched_job_destroy(struct kref * ref)168 void xe_sched_job_destroy(struct kref *ref)
169 {
170 struct xe_sched_job *job =
171 container_of(ref, struct xe_sched_job, refcount);
172 struct xe_device *xe = job_to_xe(job);
173 struct xe_exec_queue *q = job->q;
174
175 xe_sched_job_free_fences(job);
176 dma_fence_put(job->fence);
177 drm_sched_job_cleanup(&job->drm);
178 job_free(job);
179 xe_exec_queue_put(q);
180 xe_pm_runtime_put(xe);
181 }
182
183 /* Set the error status under the fence to avoid racing with signaling */
xe_fence_set_error(struct dma_fence * fence,int error)184 static bool xe_fence_set_error(struct dma_fence *fence, int error)
185 {
186 unsigned long irq_flags;
187 bool signaled;
188
189 spin_lock_irqsave(fence->lock, irq_flags);
190 signaled = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
191 if (!signaled)
192 dma_fence_set_error(fence, error);
193 spin_unlock_irqrestore(fence->lock, irq_flags);
194
195 return signaled;
196 }
197
xe_sched_job_set_error(struct xe_sched_job * job,int error)198 void xe_sched_job_set_error(struct xe_sched_job *job, int error)
199 {
200 if (xe_fence_set_error(job->fence, error))
201 return;
202
203 if (dma_fence_is_chain(job->fence)) {
204 struct dma_fence *iter;
205
206 dma_fence_chain_for_each(iter, job->fence)
207 xe_fence_set_error(dma_fence_chain_contained(iter),
208 error);
209 }
210
211 trace_xe_sched_job_set_error(job);
212
213 dma_fence_enable_sw_signaling(job->fence);
214 xe_hw_fence_irq_run(job->q->fence_irq);
215 }
216
xe_sched_job_started(struct xe_sched_job * job)217 bool xe_sched_job_started(struct xe_sched_job *job)
218 {
219 struct xe_lrc *lrc = job->q->lrc[0];
220
221 return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
222 xe_lrc_start_seqno(lrc),
223 dma_fence_chain_contained(job->fence)->ops);
224 }
225
xe_sched_job_completed(struct xe_sched_job * job)226 bool xe_sched_job_completed(struct xe_sched_job *job)
227 {
228 struct xe_lrc *lrc = job->q->lrc[0];
229
230 /*
231 * Can safely check just LRC[0] seqno as that is last seqno written when
232 * parallel handshake is done.
233 */
234
235 return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
236 xe_lrc_seqno(lrc),
237 dma_fence_chain_contained(job->fence)->ops);
238 }
239
xe_sched_job_arm(struct xe_sched_job * job)240 void xe_sched_job_arm(struct xe_sched_job *job)
241 {
242 struct xe_exec_queue *q = job->q;
243 struct dma_fence *fence, *prev;
244 struct xe_vm *vm = q->vm;
245 u64 seqno = 0;
246 int i;
247
248 /* Migration and kernel engines have their own locking */
249 if (IS_ENABLED(CONFIG_LOCKDEP) &&
250 !(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) {
251 lockdep_assert_held(&q->vm->lock);
252 if (!xe_vm_in_lr_mode(q->vm))
253 xe_vm_assert_held(q->vm);
254 }
255
256 if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
257 (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
258 xe_vm_assert_held(vm);
259 q->tlb_flush_seqno = vm->tlb_flush_seqno;
260 job->ring_ops_flush_tlb = true;
261 }
262
263 /* Arm the pre-allocated fences */
264 for (i = 0; i < q->width; prev = fence, ++i) {
265 struct dma_fence_chain *chain;
266
267 fence = job->ptrs[i].lrc_fence;
268 xe_lrc_init_seqno_fence(q->lrc[i], fence);
269 job->ptrs[i].lrc_fence = NULL;
270 if (!i) {
271 job->lrc_seqno = fence->seqno;
272 continue;
273 } else {
274 xe_assert(gt_to_xe(q->gt), job->lrc_seqno == fence->seqno);
275 }
276
277 chain = job->ptrs[i - 1].chain_fence;
278 dma_fence_chain_init(chain, prev, fence, seqno++);
279 job->ptrs[i - 1].chain_fence = NULL;
280 fence = &chain->base;
281 }
282
283 job->fence = dma_fence_get(fence); /* Pairs with put in scheduler */
284 drm_sched_job_arm(&job->drm);
285 }
286
xe_sched_job_push(struct xe_sched_job * job)287 void xe_sched_job_push(struct xe_sched_job *job)
288 {
289 xe_sched_job_get(job);
290 trace_xe_sched_job_exec(job);
291 drm_sched_entity_push_job(&job->drm);
292 xe_sched_job_put(job);
293 }
294
295 /**
296 * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
297 * @job:job to add the last fence dependency to
298 * @vm: virtual memory job belongs to
299 *
300 * Returns:
301 * 0 on success, or an error on failing to expand the array.
302 */
xe_sched_job_last_fence_add_dep(struct xe_sched_job * job,struct xe_vm * vm)303 int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
304 {
305 struct dma_fence *fence;
306
307 fence = xe_exec_queue_last_fence_get(job->q, vm);
308
309 return drm_sched_job_add_dependency(&job->drm, fence);
310 }
311
312 /**
313 * xe_sched_job_init_user_fence - Initialize user_fence for the job
314 * @job: job whose user_fence needs an init
315 * @sync: sync to be use to init user_fence
316 */
xe_sched_job_init_user_fence(struct xe_sched_job * job,struct xe_sync_entry * sync)317 void xe_sched_job_init_user_fence(struct xe_sched_job *job,
318 struct xe_sync_entry *sync)
319 {
320 if (sync->type != DRM_XE_SYNC_TYPE_USER_FENCE)
321 return;
322
323 job->user_fence.used = true;
324 job->user_fence.addr = sync->addr;
325 job->user_fence.value = sync->timeline_value;
326 }
327
328 struct xe_sched_job_snapshot *
xe_sched_job_snapshot_capture(struct xe_sched_job * job)329 xe_sched_job_snapshot_capture(struct xe_sched_job *job)
330 {
331 struct xe_exec_queue *q = job->q;
332 struct xe_device *xe = q->gt->tile->xe;
333 struct xe_sched_job_snapshot *snapshot;
334 size_t len = sizeof(*snapshot) + (sizeof(u64) * q->width);
335 u16 i;
336
337 snapshot = kzalloc(len, GFP_ATOMIC);
338 if (!snapshot)
339 return NULL;
340
341 snapshot->batch_addr_len = q->width;
342 for (i = 0; i < q->width; i++)
343 snapshot->batch_addr[i] =
344 xe_device_uncanonicalize_addr(xe, job->ptrs[i].batch_addr);
345
346 return snapshot;
347 }
348
xe_sched_job_snapshot_free(struct xe_sched_job_snapshot * snapshot)349 void xe_sched_job_snapshot_free(struct xe_sched_job_snapshot *snapshot)
350 {
351 kfree(snapshot);
352 }
353
354 void
xe_sched_job_snapshot_print(struct xe_sched_job_snapshot * snapshot,struct drm_printer * p)355 xe_sched_job_snapshot_print(struct xe_sched_job_snapshot *snapshot,
356 struct drm_printer *p)
357 {
358 u16 i;
359
360 if (!snapshot)
361 return;
362
363 for (i = 0; i < snapshot->batch_addr_len; i++)
364 drm_printf(p, "batch_addr[%u]: 0x%016llx\n", i, snapshot->batch_addr[i]);
365 }
366
xe_sched_job_add_deps(struct xe_sched_job * job,struct dma_resv * resv,enum dma_resv_usage usage)367 int xe_sched_job_add_deps(struct xe_sched_job *job, struct dma_resv *resv,
368 enum dma_resv_usage usage)
369 {
370 return drm_sched_job_add_resv_dependencies(&job->drm, resv, usage);
371 }
372