1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_assert.h" 9 #include "xe_device.h" 10 #include "xe_exec_queue.h" 11 #include "xe_gt.h" 12 #include "xe_hw_engine_group.h" 13 #include "xe_vm.h" 14 15 static void 16 hw_engine_group_free(struct drm_device *drm, void *arg) 17 { 18 struct xe_hw_engine_group *group = arg; 19 20 destroy_workqueue(group->resume_wq); 21 kfree(group); 22 } 23 24 static void 25 hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 26 { 27 struct xe_exec_queue *q; 28 struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); 29 int err; 30 enum xe_hw_engine_group_execution_mode previous_mode; 31 32 err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 33 if (err) 34 return; 35 36 if (previous_mode == EXEC_MODE_LR) 37 goto put; 38 39 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 40 if (!xe_vm_in_fault_mode(q->vm)) 41 continue; 42 43 q->ops->resume(q); 44 } 45 46 put: 47 xe_hw_engine_group_put(group); 48 } 49 50 static struct xe_hw_engine_group * 51 hw_engine_group_alloc(struct xe_device *xe) 52 { 53 struct xe_hw_engine_group *group; 54 int err; 55 56 group = kzalloc(sizeof(*group), GFP_KERNEL); 57 if (!group) 58 return ERR_PTR(-ENOMEM); 59 60 group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); 61 if (!group->resume_wq) 62 return ERR_PTR(-ENOMEM); 63 64 init_rwsem(&group->mode_sem); 65 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 66 INIT_LIST_HEAD(&group->exec_queue_list); 67 68 err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); 69 if (err) 70 return ERR_PTR(err); 71 72 return group; 73 } 74 75 /** 76 * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt 77 * @gt: The gt for which groups are setup 78 * 79 * Return: 0 on success, negative error code on error. 80 */ 81 int xe_hw_engine_setup_groups(struct xe_gt *gt) 82 { 83 struct xe_hw_engine *hwe; 84 enum xe_hw_engine_id id; 85 struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; 86 struct xe_device *xe = gt_to_xe(gt); 87 int err; 88 89 group_rcs_ccs = hw_engine_group_alloc(xe); 90 if (IS_ERR(group_rcs_ccs)) { 91 err = PTR_ERR(group_rcs_ccs); 92 goto err_group_rcs_ccs; 93 } 94 95 group_bcs = hw_engine_group_alloc(xe); 96 if (IS_ERR(group_bcs)) { 97 err = PTR_ERR(group_bcs); 98 goto err_group_bcs; 99 } 100 101 group_vcs_vecs = hw_engine_group_alloc(xe); 102 if (IS_ERR(group_vcs_vecs)) { 103 err = PTR_ERR(group_vcs_vecs); 104 goto err_group_vcs_vecs; 105 } 106 107 for_each_hw_engine(hwe, gt, id) { 108 switch (hwe->class) { 109 case XE_ENGINE_CLASS_COPY: 110 hwe->hw_engine_group = group_bcs; 111 break; 112 case XE_ENGINE_CLASS_RENDER: 113 case XE_ENGINE_CLASS_COMPUTE: 114 hwe->hw_engine_group = group_rcs_ccs; 115 break; 116 case XE_ENGINE_CLASS_VIDEO_DECODE: 117 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 118 hwe->hw_engine_group = group_vcs_vecs; 119 break; 120 case XE_ENGINE_CLASS_OTHER: 121 break; 122 default: 123 drm_warn(&xe->drm, "NOT POSSIBLE"); 124 } 125 } 126 127 return 0; 128 129 err_group_vcs_vecs: 130 kfree(group_vcs_vecs); 131 err_group_bcs: 132 kfree(group_bcs); 133 err_group_rcs_ccs: 134 kfree(group_rcs_ccs); 135 136 return err; 137 } 138 139 /** 140 * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group 141 * @group: The hw engine group 142 * @q: The exec_queue 143 * 144 * Return: 0 on success, 145 * -EINTR if the lock could not be acquired 146 */ 147 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 148 { 149 int err; 150 struct xe_device *xe = gt_to_xe(q->gt); 151 152 xe_assert(xe, group); 153 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 154 xe_assert(xe, q->vm); 155 156 if (xe_vm_in_preempt_fence_mode(q->vm)) 157 return 0; 158 159 err = down_write_killable(&group->mode_sem); 160 if (err) 161 return err; 162 163 if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { 164 q->ops->suspend(q); 165 err = q->ops->suspend_wait(q); 166 if (err) 167 goto err_suspend; 168 169 xe_hw_engine_group_resume_faulting_lr_jobs(group); 170 } 171 172 list_add(&q->hw_engine_group_link, &group->exec_queue_list); 173 up_write(&group->mode_sem); 174 175 return 0; 176 177 err_suspend: 178 up_write(&group->mode_sem); 179 return err; 180 } 181 182 /** 183 * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group 184 * @group: The hw engine group 185 * @q: The exec_queue 186 */ 187 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 188 { 189 struct xe_device *xe = gt_to_xe(q->gt); 190 191 xe_assert(xe, group); 192 xe_assert(xe, q->vm); 193 194 down_write(&group->mode_sem); 195 196 if (!list_empty(&q->hw_engine_group_link)) 197 list_del(&q->hw_engine_group_link); 198 199 up_write(&group->mode_sem); 200 } 201 202 /** 203 * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's 204 * faulting LR jobs 205 * @group: The hw engine group 206 */ 207 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) 208 { 209 queue_work(group->resume_wq, &group->resume_work); 210 } 211 212 /** 213 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 214 * @group: The hw engine group 215 * 216 * Return: 0 on success, negative error code on error. 217 */ 218 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 219 { 220 int err; 221 struct xe_exec_queue *q; 222 bool need_resume = false; 223 224 lockdep_assert_held_write(&group->mode_sem); 225 226 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 227 if (!xe_vm_in_fault_mode(q->vm)) 228 continue; 229 230 need_resume = true; 231 q->ops->suspend(q); 232 } 233 234 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 235 if (!xe_vm_in_fault_mode(q->vm)) 236 continue; 237 238 err = q->ops->suspend_wait(q); 239 if (err) 240 goto err_suspend; 241 } 242 243 if (need_resume) 244 xe_hw_engine_group_resume_faulting_lr_jobs(group); 245 246 return 0; 247 248 err_suspend: 249 up_write(&group->mode_sem); 250 return err; 251 } 252 253 /** 254 * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete 255 * @group: The hw engine group 256 * 257 * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() 258 * is not interruptible. 259 * 260 * Return: 0 on success, 261 * -ETIME if waiting for one job failed 262 */ 263 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) 264 { 265 long timeout; 266 struct xe_exec_queue *q; 267 struct dma_fence *fence; 268 269 lockdep_assert_held_write(&group->mode_sem); 270 271 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 272 if (xe_vm_in_lr_mode(q->vm)) 273 continue; 274 275 fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); 276 timeout = dma_fence_wait(fence, false); 277 dma_fence_put(fence); 278 279 if (timeout < 0) 280 return -ETIME; 281 } 282 283 return 0; 284 } 285 286 static int switch_mode(struct xe_hw_engine_group *group) 287 { 288 int err = 0; 289 enum xe_hw_engine_group_execution_mode new_mode; 290 291 lockdep_assert_held_write(&group->mode_sem); 292 293 switch (group->cur_mode) { 294 case EXEC_MODE_LR: 295 new_mode = EXEC_MODE_DMA_FENCE; 296 err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 297 break; 298 case EXEC_MODE_DMA_FENCE: 299 new_mode = EXEC_MODE_LR; 300 err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); 301 break; 302 } 303 304 if (err) 305 return err; 306 307 group->cur_mode = new_mode; 308 309 return 0; 310 } 311 312 /** 313 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 314 * @group: The hw engine group 315 * @new_mode: The new execution mode 316 * @previous_mode: Pointer to the previous mode provided for use by caller 317 * 318 * Return: 0 if successful, -EINTR if locking failed. 319 */ 320 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 321 enum xe_hw_engine_group_execution_mode new_mode, 322 enum xe_hw_engine_group_execution_mode *previous_mode) 323 __acquires(&group->mode_sem) 324 { 325 int err = down_read_interruptible(&group->mode_sem); 326 327 if (err) 328 return err; 329 330 *previous_mode = group->cur_mode; 331 332 if (new_mode != group->cur_mode) { 333 up_read(&group->mode_sem); 334 err = down_write_killable(&group->mode_sem); 335 if (err) 336 return err; 337 338 if (new_mode != group->cur_mode) { 339 err = switch_mode(group); 340 if (err) { 341 up_write(&group->mode_sem); 342 return err; 343 } 344 } 345 downgrade_write(&group->mode_sem); 346 } 347 348 return err; 349 } 350 351 /** 352 * xe_hw_engine_group_put() - Put the group 353 * @group: The hw engine group 354 */ 355 void xe_hw_engine_group_put(struct xe_hw_engine_group *group) 356 __releases(&group->mode_sem) 357 { 358 up_read(&group->mode_sem); 359 } 360 361 /** 362 * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue 363 * @q: The exec_queue 364 */ 365 enum xe_hw_engine_group_execution_mode 366 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) 367 { 368 if (xe_vm_in_fault_mode(q->vm)) 369 return EXEC_MODE_LR; 370 else 371 return EXEC_MODE_DMA_FENCE; 372 } 373