1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_sched_job.h" 7 8 #include <linux/dma-fence-array.h> 9 #include <linux/slab.h> 10 11 #include "xe_device.h" 12 #include "xe_exec_queue.h" 13 #include "xe_gt.h" 14 #include "xe_hw_engine_types.h" 15 #include "xe_hw_fence.h" 16 #include "xe_lrc.h" 17 #include "xe_macros.h" 18 #include "xe_trace.h" 19 #include "xe_vm.h" 20 21 static struct kmem_cache *xe_sched_job_slab; 22 static struct kmem_cache *xe_sched_job_parallel_slab; 23 24 int __init xe_sched_job_module_init(void) 25 { 26 xe_sched_job_slab = 27 kmem_cache_create("xe_sched_job", 28 sizeof(struct xe_sched_job) + 29 sizeof(u64), 0, 30 SLAB_HWCACHE_ALIGN, NULL); 31 if (!xe_sched_job_slab) 32 return -ENOMEM; 33 34 xe_sched_job_parallel_slab = 35 kmem_cache_create("xe_sched_job_parallel", 36 sizeof(struct xe_sched_job) + 37 sizeof(u64) * 38 XE_HW_ENGINE_MAX_INSTANCE, 0, 39 SLAB_HWCACHE_ALIGN, NULL); 40 if (!xe_sched_job_parallel_slab) { 41 kmem_cache_destroy(xe_sched_job_slab); 42 return -ENOMEM; 43 } 44 45 return 0; 46 } 47 48 void xe_sched_job_module_exit(void) 49 { 50 kmem_cache_destroy(xe_sched_job_slab); 51 kmem_cache_destroy(xe_sched_job_parallel_slab); 52 } 53 54 static struct xe_sched_job *job_alloc(bool parallel) 55 { 56 return kmem_cache_zalloc(parallel ? xe_sched_job_parallel_slab : 57 xe_sched_job_slab, GFP_KERNEL); 58 } 59 60 bool xe_sched_job_is_migration(struct xe_exec_queue *q) 61 { 62 return q->vm && (q->vm->flags & XE_VM_FLAG_MIGRATION); 63 } 64 65 static void job_free(struct xe_sched_job *job) 66 { 67 struct xe_exec_queue *q = job->q; 68 bool is_migration = xe_sched_job_is_migration(q); 69 70 kmem_cache_free(xe_exec_queue_is_parallel(job->q) || is_migration ? 71 xe_sched_job_parallel_slab : xe_sched_job_slab, job); 72 } 73 74 static struct xe_device *job_to_xe(struct xe_sched_job *job) 75 { 76 return gt_to_xe(job->q->gt); 77 } 78 79 struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q, 80 u64 *batch_addr) 81 { 82 struct xe_sched_job *job; 83 struct dma_fence **fences; 84 bool is_migration = xe_sched_job_is_migration(q); 85 int err; 86 int i, j; 87 u32 width; 88 89 /* only a kernel context can submit a vm-less job */ 90 XE_WARN_ON(!q->vm && !(q->flags & EXEC_QUEUE_FLAG_KERNEL)); 91 92 /* Migration and kernel engines have their own locking */ 93 if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { 94 lockdep_assert_held(&q->vm->lock); 95 if (!xe_vm_in_lr_mode(q->vm)) 96 xe_vm_assert_held(q->vm); 97 } 98 99 job = job_alloc(xe_exec_queue_is_parallel(q) || is_migration); 100 if (!job) 101 return ERR_PTR(-ENOMEM); 102 103 job->q = q; 104 kref_init(&job->refcount); 105 xe_exec_queue_get(job->q); 106 107 err = drm_sched_job_init(&job->drm, q->entity, 1, NULL); 108 if (err) 109 goto err_free; 110 111 if (!xe_exec_queue_is_parallel(q)) { 112 job->fence = xe_lrc_create_seqno_fence(q->lrc); 113 if (IS_ERR(job->fence)) { 114 err = PTR_ERR(job->fence); 115 goto err_sched_job; 116 } 117 } else { 118 struct dma_fence_array *cf; 119 120 fences = kmalloc_array(q->width, sizeof(*fences), GFP_KERNEL); 121 if (!fences) { 122 err = -ENOMEM; 123 goto err_sched_job; 124 } 125 126 for (j = 0; j < q->width; ++j) { 127 fences[j] = xe_lrc_create_seqno_fence(q->lrc + j); 128 if (IS_ERR(fences[j])) { 129 err = PTR_ERR(fences[j]); 130 goto err_fences; 131 } 132 } 133 134 cf = dma_fence_array_create(q->width, fences, 135 q->parallel.composite_fence_ctx, 136 q->parallel.composite_fence_seqno++, 137 false); 138 if (!cf) { 139 --q->parallel.composite_fence_seqno; 140 err = -ENOMEM; 141 goto err_fences; 142 } 143 144 /* Sanity check */ 145 for (j = 0; j < q->width; ++j) 146 xe_assert(job_to_xe(job), cf->base.seqno == fences[j]->seqno); 147 148 job->fence = &cf->base; 149 } 150 151 width = q->width; 152 if (is_migration) 153 width = 2; 154 155 for (i = 0; i < width; ++i) 156 job->batch_addr[i] = batch_addr[i]; 157 158 /* All other jobs require a VM to be open which has a ref */ 159 if (unlikely(q->flags & EXEC_QUEUE_FLAG_KERNEL)) 160 xe_device_mem_access_get(job_to_xe(job)); 161 xe_device_assert_mem_access(job_to_xe(job)); 162 163 trace_xe_sched_job_create(job); 164 return job; 165 166 err_fences: 167 for (j = j - 1; j >= 0; --j) { 168 --q->lrc[j].fence_ctx.next_seqno; 169 dma_fence_put(fences[j]); 170 } 171 kfree(fences); 172 err_sched_job: 173 drm_sched_job_cleanup(&job->drm); 174 err_free: 175 xe_exec_queue_put(q); 176 job_free(job); 177 return ERR_PTR(err); 178 } 179 180 /** 181 * xe_sched_job_destroy - Destroy XE schedule job 182 * @ref: reference to XE schedule job 183 * 184 * Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup 185 * base DRM schedule job, and free memory for XE schedule job. 186 */ 187 void xe_sched_job_destroy(struct kref *ref) 188 { 189 struct xe_sched_job *job = 190 container_of(ref, struct xe_sched_job, refcount); 191 192 if (unlikely(job->q->flags & EXEC_QUEUE_FLAG_KERNEL)) 193 xe_device_mem_access_put(job_to_xe(job)); 194 xe_exec_queue_put(job->q); 195 dma_fence_put(job->fence); 196 drm_sched_job_cleanup(&job->drm); 197 job_free(job); 198 } 199 200 void xe_sched_job_set_error(struct xe_sched_job *job, int error) 201 { 202 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) 203 return; 204 205 dma_fence_set_error(job->fence, error); 206 207 if (dma_fence_is_array(job->fence)) { 208 struct dma_fence_array *array = 209 to_dma_fence_array(job->fence); 210 struct dma_fence **child = array->fences; 211 unsigned int nchild = array->num_fences; 212 213 do { 214 struct dma_fence *current_fence = *child++; 215 216 if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 217 ¤t_fence->flags)) 218 continue; 219 dma_fence_set_error(current_fence, error); 220 } while (--nchild); 221 } 222 223 trace_xe_sched_job_set_error(job); 224 225 dma_fence_enable_sw_signaling(job->fence); 226 xe_hw_fence_irq_run(job->q->fence_irq); 227 } 228 229 bool xe_sched_job_started(struct xe_sched_job *job) 230 { 231 struct xe_lrc *lrc = job->q->lrc; 232 233 return !__dma_fence_is_later(xe_sched_job_seqno(job), 234 xe_lrc_start_seqno(lrc), 235 job->fence->ops); 236 } 237 238 bool xe_sched_job_completed(struct xe_sched_job *job) 239 { 240 struct xe_lrc *lrc = job->q->lrc; 241 242 /* 243 * Can safely check just LRC[0] seqno as that is last seqno written when 244 * parallel handshake is done. 245 */ 246 247 return !__dma_fence_is_later(xe_sched_job_seqno(job), xe_lrc_seqno(lrc), 248 job->fence->ops); 249 } 250 251 void xe_sched_job_arm(struct xe_sched_job *job) 252 { 253 drm_sched_job_arm(&job->drm); 254 } 255 256 void xe_sched_job_push(struct xe_sched_job *job) 257 { 258 xe_sched_job_get(job); 259 trace_xe_sched_job_exec(job); 260 drm_sched_entity_push_job(&job->drm); 261 xe_sched_job_put(job); 262 } 263 264 /** 265 * xe_sched_job_last_fence_add_dep - Add last fence dependency to job 266 * @job:job to add the last fence dependency to 267 * @vm: virtual memory job belongs to 268 * 269 * Returns: 270 * 0 on success, or an error on failing to expand the array. 271 */ 272 int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm) 273 { 274 struct dma_fence *fence; 275 276 fence = xe_exec_queue_last_fence_get(job->q, vm); 277 278 return drm_sched_job_add_dependency(&job->drm, fence); 279 } 280