1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_assert.h" 9 #include "xe_device.h" 10 #include "xe_exec_queue.h" 11 #include "xe_gt.h" 12 #include "xe_hw_engine_group.h" 13 #include "xe_vm.h" 14 15 static void 16 hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 17 { 18 struct xe_exec_queue *q; 19 struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); 20 int err; 21 enum xe_hw_engine_group_execution_mode previous_mode; 22 23 err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 24 if (err) 25 return; 26 27 if (previous_mode == EXEC_MODE_LR) 28 goto put; 29 30 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 31 if (!xe_vm_in_fault_mode(q->vm)) 32 continue; 33 34 q->ops->resume(q); 35 } 36 37 put: 38 xe_hw_engine_group_put(group); 39 } 40 41 static struct xe_hw_engine_group * 42 hw_engine_group_alloc(struct xe_device *xe) 43 { 44 struct xe_hw_engine_group *group; 45 int err; 46 47 group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); 48 if (!group) 49 return ERR_PTR(-ENOMEM); 50 51 group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); 52 if (!group->resume_wq) 53 return ERR_PTR(-ENOMEM); 54 55 err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); 56 if (err) 57 return ERR_PTR(err); 58 59 init_rwsem(&group->mode_sem); 60 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 61 INIT_LIST_HEAD(&group->exec_queue_list); 62 63 return group; 64 } 65 66 /** 67 * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt 68 * @gt: The gt for which groups are setup 69 * 70 * Return: 0 on success, negative error code on error. 71 */ 72 int xe_hw_engine_setup_groups(struct xe_gt *gt) 73 { 74 struct xe_hw_engine *hwe; 75 enum xe_hw_engine_id id; 76 struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; 77 struct xe_device *xe = gt_to_xe(gt); 78 int err; 79 80 group_rcs_ccs = hw_engine_group_alloc(xe); 81 if (IS_ERR(group_rcs_ccs)) { 82 err = PTR_ERR(group_rcs_ccs); 83 goto err_group_rcs_ccs; 84 } 85 86 group_bcs = hw_engine_group_alloc(xe); 87 if (IS_ERR(group_bcs)) { 88 err = PTR_ERR(group_bcs); 89 goto err_group_bcs; 90 } 91 92 group_vcs_vecs = hw_engine_group_alloc(xe); 93 if (IS_ERR(group_vcs_vecs)) { 94 err = PTR_ERR(group_vcs_vecs); 95 goto err_group_vcs_vecs; 96 } 97 98 for_each_hw_engine(hwe, gt, id) { 99 switch (hwe->class) { 100 case XE_ENGINE_CLASS_COPY: 101 hwe->hw_engine_group = group_bcs; 102 break; 103 case XE_ENGINE_CLASS_RENDER: 104 case XE_ENGINE_CLASS_COMPUTE: 105 hwe->hw_engine_group = group_rcs_ccs; 106 break; 107 case XE_ENGINE_CLASS_VIDEO_DECODE: 108 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 109 hwe->hw_engine_group = group_vcs_vecs; 110 break; 111 case XE_ENGINE_CLASS_OTHER: 112 break; 113 default: 114 drm_warn(&xe->drm, "NOT POSSIBLE"); 115 } 116 } 117 118 return 0; 119 120 err_group_vcs_vecs: 121 kfree(group_vcs_vecs); 122 err_group_bcs: 123 kfree(group_bcs); 124 err_group_rcs_ccs: 125 kfree(group_rcs_ccs); 126 127 return err; 128 } 129 130 /** 131 * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group 132 * @group: The hw engine group 133 * @q: The exec_queue 134 * 135 * Return: 0 on success, 136 * -EINTR if the lock could not be acquired 137 */ 138 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 139 { 140 int err; 141 struct xe_device *xe = gt_to_xe(q->gt); 142 143 xe_assert(xe, group); 144 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 145 xe_assert(xe, q->vm); 146 147 if (xe_vm_in_preempt_fence_mode(q->vm)) 148 return 0; 149 150 err = down_write_killable(&group->mode_sem); 151 if (err) 152 return err; 153 154 if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { 155 q->ops->suspend(q); 156 err = q->ops->suspend_wait(q); 157 if (err) 158 goto err_suspend; 159 160 xe_hw_engine_group_resume_faulting_lr_jobs(group); 161 } 162 163 list_add(&q->hw_engine_group_link, &group->exec_queue_list); 164 up_write(&group->mode_sem); 165 166 return 0; 167 168 err_suspend: 169 up_write(&group->mode_sem); 170 return err; 171 } 172 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO); 173 174 /** 175 * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group 176 * @group: The hw engine group 177 * @q: The exec_queue 178 */ 179 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 180 { 181 struct xe_device *xe = gt_to_xe(q->gt); 182 183 xe_assert(xe, group); 184 xe_assert(xe, q->vm); 185 186 down_write(&group->mode_sem); 187 188 if (!list_empty(&q->hw_engine_group_link)) 189 list_del(&q->hw_engine_group_link); 190 191 up_write(&group->mode_sem); 192 } 193 194 /** 195 * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's 196 * faulting LR jobs 197 * @group: The hw engine group 198 */ 199 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) 200 { 201 queue_work(group->resume_wq, &group->resume_work); 202 } 203 204 /** 205 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 206 * @group: The hw engine group 207 * 208 * Return: 0 on success, negative error code on error. 209 */ 210 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 211 { 212 int err; 213 struct xe_exec_queue *q; 214 bool need_resume = false; 215 216 lockdep_assert_held_write(&group->mode_sem); 217 218 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 219 if (!xe_vm_in_fault_mode(q->vm)) 220 continue; 221 222 need_resume = true; 223 q->ops->suspend(q); 224 } 225 226 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 227 if (!xe_vm_in_fault_mode(q->vm)) 228 continue; 229 230 err = q->ops->suspend_wait(q); 231 if (err) 232 goto err_suspend; 233 } 234 235 if (need_resume) 236 xe_hw_engine_group_resume_faulting_lr_jobs(group); 237 238 return 0; 239 240 err_suspend: 241 up_write(&group->mode_sem); 242 return err; 243 } 244 245 /** 246 * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete 247 * @group: The hw engine group 248 * 249 * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() 250 * is not interruptible. 251 * 252 * Return: 0 on success, 253 * -ETIME if waiting for one job failed 254 */ 255 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) 256 { 257 long timeout; 258 struct xe_exec_queue *q; 259 struct dma_fence *fence; 260 261 lockdep_assert_held_write(&group->mode_sem); 262 263 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 264 if (xe_vm_in_lr_mode(q->vm)) 265 continue; 266 267 fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); 268 timeout = dma_fence_wait(fence, false); 269 dma_fence_put(fence); 270 271 if (timeout < 0) 272 return -ETIME; 273 } 274 275 return 0; 276 } 277 278 static int switch_mode(struct xe_hw_engine_group *group) 279 { 280 int err = 0; 281 enum xe_hw_engine_group_execution_mode new_mode; 282 283 lockdep_assert_held_write(&group->mode_sem); 284 285 switch (group->cur_mode) { 286 case EXEC_MODE_LR: 287 new_mode = EXEC_MODE_DMA_FENCE; 288 err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 289 break; 290 case EXEC_MODE_DMA_FENCE: 291 new_mode = EXEC_MODE_LR; 292 err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); 293 break; 294 } 295 296 if (err) 297 return err; 298 299 group->cur_mode = new_mode; 300 301 return 0; 302 } 303 304 /** 305 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 306 * @group: The hw engine group 307 * @new_mode: The new execution mode 308 * @previous_mode: Pointer to the previous mode provided for use by caller 309 * 310 * Return: 0 if successful, -EINTR if locking failed. 311 */ 312 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 313 enum xe_hw_engine_group_execution_mode new_mode, 314 enum xe_hw_engine_group_execution_mode *previous_mode) 315 __acquires(&group->mode_sem) 316 { 317 int err = down_read_interruptible(&group->mode_sem); 318 319 if (err) 320 return err; 321 322 *previous_mode = group->cur_mode; 323 324 if (new_mode != group->cur_mode) { 325 up_read(&group->mode_sem); 326 err = down_write_killable(&group->mode_sem); 327 if (err) 328 return err; 329 330 if (new_mode != group->cur_mode) { 331 err = switch_mode(group); 332 if (err) { 333 up_write(&group->mode_sem); 334 return err; 335 } 336 } 337 downgrade_write(&group->mode_sem); 338 } 339 340 return err; 341 } 342 343 /** 344 * xe_hw_engine_group_put() - Put the group 345 * @group: The hw engine group 346 */ 347 void xe_hw_engine_group_put(struct xe_hw_engine_group *group) 348 __releases(&group->mode_sem) 349 { 350 up_read(&group->mode_sem); 351 } 352 353 /** 354 * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue 355 * @q: The exec_queue 356 */ 357 enum xe_hw_engine_group_execution_mode 358 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) 359 { 360 if (xe_vm_in_fault_mode(q->vm)) 361 return EXEC_MODE_LR; 362 else 363 return EXEC_MODE_DMA_FENCE; 364 } 365