1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_assert.h" 9 #include "xe_device.h" 10 #include "xe_exec_queue.h" 11 #include "xe_gt.h" 12 #include "xe_hw_engine_group.h" 13 #include "xe_vm.h" 14 15 static void 16 hw_engine_group_free(struct drm_device *drm, void *arg) 17 { 18 struct xe_hw_engine_group *group = arg; 19 20 destroy_workqueue(group->resume_wq); 21 kfree(group); 22 } 23 24 static void 25 hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 26 { 27 struct xe_exec_queue *q; 28 struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); 29 int err; 30 enum xe_hw_engine_group_execution_mode previous_mode; 31 32 err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 33 if (err) 34 return; 35 36 if (previous_mode == EXEC_MODE_LR) 37 goto put; 38 39 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 40 if (!xe_vm_in_fault_mode(q->vm)) 41 continue; 42 43 q->ops->resume(q); 44 } 45 46 put: 47 xe_hw_engine_group_put(group); 48 } 49 50 static struct xe_hw_engine_group * 51 hw_engine_group_alloc(struct xe_device *xe) 52 { 53 struct xe_hw_engine_group *group; 54 int err; 55 56 group = kzalloc(sizeof(*group), GFP_KERNEL); 57 if (!group) 58 return ERR_PTR(-ENOMEM); 59 60 group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); 61 if (!group->resume_wq) 62 return ERR_PTR(-ENOMEM); 63 64 init_rwsem(&group->mode_sem); 65 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 66 INIT_LIST_HEAD(&group->exec_queue_list); 67 68 err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); 69 if (err) 70 return ERR_PTR(err); 71 72 return group; 73 } 74 75 /** 76 * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt 77 * @gt: The gt for which groups are setup 78 * 79 * Return: 0 on success, negative error code on error. 80 */ 81 int xe_hw_engine_setup_groups(struct xe_gt *gt) 82 { 83 struct xe_hw_engine *hwe; 84 enum xe_hw_engine_id id; 85 struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; 86 struct xe_device *xe = gt_to_xe(gt); 87 int err; 88 89 group_rcs_ccs = hw_engine_group_alloc(xe); 90 if (IS_ERR(group_rcs_ccs)) { 91 err = PTR_ERR(group_rcs_ccs); 92 goto err_group_rcs_ccs; 93 } 94 95 group_bcs = hw_engine_group_alloc(xe); 96 if (IS_ERR(group_bcs)) { 97 err = PTR_ERR(group_bcs); 98 goto err_group_bcs; 99 } 100 101 group_vcs_vecs = hw_engine_group_alloc(xe); 102 if (IS_ERR(group_vcs_vecs)) { 103 err = PTR_ERR(group_vcs_vecs); 104 goto err_group_vcs_vecs; 105 } 106 107 for_each_hw_engine(hwe, gt, id) { 108 switch (hwe->class) { 109 case XE_ENGINE_CLASS_COPY: 110 hwe->hw_engine_group = group_bcs; 111 break; 112 case XE_ENGINE_CLASS_RENDER: 113 case XE_ENGINE_CLASS_COMPUTE: 114 hwe->hw_engine_group = group_rcs_ccs; 115 break; 116 case XE_ENGINE_CLASS_VIDEO_DECODE: 117 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 118 hwe->hw_engine_group = group_vcs_vecs; 119 break; 120 case XE_ENGINE_CLASS_OTHER: 121 break; 122 default: 123 drm_warn(&xe->drm, "NOT POSSIBLE"); 124 } 125 } 126 127 return 0; 128 129 err_group_vcs_vecs: 130 kfree(group_vcs_vecs); 131 err_group_bcs: 132 kfree(group_bcs); 133 err_group_rcs_ccs: 134 kfree(group_rcs_ccs); 135 136 return err; 137 } 138 139 /** 140 * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group 141 * @group: The hw engine group 142 * @q: The exec_queue 143 * 144 * Return: 0 on success, 145 * -EINTR if the lock could not be acquired 146 */ 147 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 148 { 149 int err; 150 struct xe_device *xe = gt_to_xe(q->gt); 151 152 xe_assert(xe, group); 153 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 154 xe_assert(xe, q->vm); 155 156 if (xe_vm_in_preempt_fence_mode(q->vm)) 157 return 0; 158 159 err = down_write_killable(&group->mode_sem); 160 if (err) 161 return err; 162 163 if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { 164 q->ops->suspend(q); 165 err = q->ops->suspend_wait(q); 166 if (err) 167 goto err_suspend; 168 169 xe_hw_engine_group_resume_faulting_lr_jobs(group); 170 } 171 172 list_add(&q->hw_engine_group_link, &group->exec_queue_list); 173 up_write(&group->mode_sem); 174 175 return 0; 176 177 err_suspend: 178 up_write(&group->mode_sem); 179 return err; 180 } 181 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO); 182 183 /** 184 * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group 185 * @group: The hw engine group 186 * @q: The exec_queue 187 */ 188 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 189 { 190 struct xe_device *xe = gt_to_xe(q->gt); 191 192 xe_assert(xe, group); 193 xe_assert(xe, q->vm); 194 195 down_write(&group->mode_sem); 196 197 if (!list_empty(&q->hw_engine_group_link)) 198 list_del(&q->hw_engine_group_link); 199 200 up_write(&group->mode_sem); 201 } 202 203 /** 204 * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's 205 * faulting LR jobs 206 * @group: The hw engine group 207 */ 208 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) 209 { 210 queue_work(group->resume_wq, &group->resume_work); 211 } 212 213 /** 214 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 215 * @group: The hw engine group 216 * 217 * Return: 0 on success, negative error code on error. 218 */ 219 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 220 { 221 int err; 222 struct xe_exec_queue *q; 223 bool need_resume = false; 224 225 lockdep_assert_held_write(&group->mode_sem); 226 227 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 228 if (!xe_vm_in_fault_mode(q->vm)) 229 continue; 230 231 need_resume = true; 232 q->ops->suspend(q); 233 } 234 235 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 236 if (!xe_vm_in_fault_mode(q->vm)) 237 continue; 238 239 err = q->ops->suspend_wait(q); 240 if (err) 241 goto err_suspend; 242 } 243 244 if (need_resume) 245 xe_hw_engine_group_resume_faulting_lr_jobs(group); 246 247 return 0; 248 249 err_suspend: 250 up_write(&group->mode_sem); 251 return err; 252 } 253 254 /** 255 * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete 256 * @group: The hw engine group 257 * 258 * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() 259 * is not interruptible. 260 * 261 * Return: 0 on success, 262 * -ETIME if waiting for one job failed 263 */ 264 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) 265 { 266 long timeout; 267 struct xe_exec_queue *q; 268 struct dma_fence *fence; 269 270 lockdep_assert_held_write(&group->mode_sem); 271 272 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 273 if (xe_vm_in_lr_mode(q->vm)) 274 continue; 275 276 fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); 277 timeout = dma_fence_wait(fence, false); 278 dma_fence_put(fence); 279 280 if (timeout < 0) 281 return -ETIME; 282 } 283 284 return 0; 285 } 286 287 static int switch_mode(struct xe_hw_engine_group *group) 288 { 289 int err = 0; 290 enum xe_hw_engine_group_execution_mode new_mode; 291 292 lockdep_assert_held_write(&group->mode_sem); 293 294 switch (group->cur_mode) { 295 case EXEC_MODE_LR: 296 new_mode = EXEC_MODE_DMA_FENCE; 297 err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 298 break; 299 case EXEC_MODE_DMA_FENCE: 300 new_mode = EXEC_MODE_LR; 301 err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); 302 break; 303 } 304 305 if (err) 306 return err; 307 308 group->cur_mode = new_mode; 309 310 return 0; 311 } 312 313 /** 314 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 315 * @group: The hw engine group 316 * @new_mode: The new execution mode 317 * @previous_mode: Pointer to the previous mode provided for use by caller 318 * 319 * Return: 0 if successful, -EINTR if locking failed. 320 */ 321 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 322 enum xe_hw_engine_group_execution_mode new_mode, 323 enum xe_hw_engine_group_execution_mode *previous_mode) 324 __acquires(&group->mode_sem) 325 { 326 int err = down_read_interruptible(&group->mode_sem); 327 328 if (err) 329 return err; 330 331 *previous_mode = group->cur_mode; 332 333 if (new_mode != group->cur_mode) { 334 up_read(&group->mode_sem); 335 err = down_write_killable(&group->mode_sem); 336 if (err) 337 return err; 338 339 if (new_mode != group->cur_mode) { 340 err = switch_mode(group); 341 if (err) { 342 up_write(&group->mode_sem); 343 return err; 344 } 345 } 346 downgrade_write(&group->mode_sem); 347 } 348 349 return err; 350 } 351 352 /** 353 * xe_hw_engine_group_put() - Put the group 354 * @group: The hw engine group 355 */ 356 void xe_hw_engine_group_put(struct xe_hw_engine_group *group) 357 __releases(&group->mode_sem) 358 { 359 up_read(&group->mode_sem); 360 } 361 362 /** 363 * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue 364 * @q: The exec_queue 365 */ 366 enum xe_hw_engine_group_execution_mode 367 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) 368 { 369 if (xe_vm_in_fault_mode(q->vm)) 370 return EXEC_MODE_LR; 371 else 372 return EXEC_MODE_DMA_FENCE; 373 } 374