1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_assert.h" 9 #include "xe_device.h" 10 #include "xe_exec_queue.h" 11 #include "xe_gt.h" 12 #include "xe_gt_stats.h" 13 #include "xe_hw_engine_group.h" 14 #include "xe_vm.h" 15 16 static void 17 hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 18 { 19 struct xe_exec_queue *q; 20 struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); 21 int err; 22 enum xe_hw_engine_group_execution_mode previous_mode; 23 24 err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 25 if (err) 26 return; 27 28 if (previous_mode == EXEC_MODE_LR) 29 goto put; 30 31 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 32 if (!xe_vm_in_fault_mode(q->vm)) 33 continue; 34 35 q->ops->resume(q); 36 } 37 38 put: 39 xe_hw_engine_group_put(group); 40 } 41 42 static struct xe_hw_engine_group * 43 hw_engine_group_alloc(struct xe_device *xe) 44 { 45 struct xe_hw_engine_group *group; 46 int err; 47 48 group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); 49 if (!group) 50 return ERR_PTR(-ENOMEM); 51 52 group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); 53 if (!group->resume_wq) 54 return ERR_PTR(-ENOMEM); 55 56 err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); 57 if (err) 58 return ERR_PTR(err); 59 60 init_rwsem(&group->mode_sem); 61 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 62 INIT_LIST_HEAD(&group->exec_queue_list); 63 64 return group; 65 } 66 67 /** 68 * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt 69 * @gt: The gt for which groups are setup 70 * 71 * Return: 0 on success, negative error code on error. 72 */ 73 int xe_hw_engine_setup_groups(struct xe_gt *gt) 74 { 75 struct xe_hw_engine *hwe; 76 enum xe_hw_engine_id id; 77 struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; 78 struct xe_device *xe = gt_to_xe(gt); 79 80 group_rcs_ccs = hw_engine_group_alloc(xe); 81 if (IS_ERR(group_rcs_ccs)) 82 return PTR_ERR(group_rcs_ccs); 83 84 group_bcs = hw_engine_group_alloc(xe); 85 if (IS_ERR(group_bcs)) 86 return PTR_ERR(group_bcs); 87 88 group_vcs_vecs = hw_engine_group_alloc(xe); 89 if (IS_ERR(group_vcs_vecs)) 90 return PTR_ERR(group_vcs_vecs); 91 92 for_each_hw_engine(hwe, gt, id) { 93 switch (hwe->class) { 94 case XE_ENGINE_CLASS_COPY: 95 hwe->hw_engine_group = group_bcs; 96 break; 97 case XE_ENGINE_CLASS_RENDER: 98 case XE_ENGINE_CLASS_COMPUTE: 99 hwe->hw_engine_group = group_rcs_ccs; 100 break; 101 case XE_ENGINE_CLASS_VIDEO_DECODE: 102 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 103 hwe->hw_engine_group = group_vcs_vecs; 104 break; 105 case XE_ENGINE_CLASS_OTHER: 106 break; 107 case XE_ENGINE_CLASS_MAX: 108 xe_gt_assert(gt, false); 109 } 110 } 111 112 return 0; 113 } 114 115 /** 116 * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group 117 * @group: The hw engine group 118 * @q: The exec_queue 119 * 120 * Return: 0 on success, 121 * -EINTR if the lock could not be acquired 122 */ 123 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 124 { 125 int err; 126 struct xe_device *xe = gt_to_xe(q->gt); 127 128 xe_assert(xe, group); 129 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 130 xe_assert(xe, q->vm); 131 132 if (xe_vm_in_preempt_fence_mode(q->vm)) 133 return 0; 134 135 err = down_write_killable(&group->mode_sem); 136 if (err) 137 return err; 138 139 if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { 140 q->ops->suspend(q); 141 err = q->ops->suspend_wait(q); 142 if (err) 143 goto err_suspend; 144 145 xe_hw_engine_group_resume_faulting_lr_jobs(group); 146 } 147 148 list_add(&q->hw_engine_group_link, &group->exec_queue_list); 149 up_write(&group->mode_sem); 150 151 return 0; 152 153 err_suspend: 154 up_write(&group->mode_sem); 155 return err; 156 } 157 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO); 158 159 /** 160 * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group 161 * @group: The hw engine group 162 * @q: The exec_queue 163 */ 164 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 165 { 166 struct xe_device *xe = gt_to_xe(q->gt); 167 168 xe_assert(xe, group); 169 xe_assert(xe, q->vm); 170 171 down_write(&group->mode_sem); 172 173 if (!list_empty(&q->hw_engine_group_link)) 174 list_del(&q->hw_engine_group_link); 175 176 up_write(&group->mode_sem); 177 } 178 179 /** 180 * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's 181 * faulting LR jobs 182 * @group: The hw engine group 183 */ 184 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) 185 { 186 queue_work(group->resume_wq, &group->resume_work); 187 } 188 189 /** 190 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 191 * @group: The hw engine group 192 * 193 * Return: 0 on success, negative error code on error. 194 */ 195 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 196 { 197 int err; 198 struct xe_exec_queue *q; 199 bool need_resume = false; 200 201 lockdep_assert_held_write(&group->mode_sem); 202 203 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 204 if (!xe_vm_in_fault_mode(q->vm)) 205 continue; 206 207 xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_SUSPEND_LR_QUEUE_COUNT, 1); 208 need_resume = true; 209 q->ops->suspend(q); 210 } 211 212 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 213 if (!xe_vm_in_fault_mode(q->vm)) 214 continue; 215 216 err = q->ops->suspend_wait(q); 217 if (err) 218 return err; 219 } 220 221 if (need_resume) 222 xe_hw_engine_group_resume_faulting_lr_jobs(group); 223 224 return 0; 225 } 226 227 /** 228 * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete 229 * @group: The hw engine group 230 * 231 * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() 232 * is not interruptible. 233 * 234 * Return: 0 on success, 235 * -ETIME if waiting for one job failed 236 */ 237 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) 238 { 239 long timeout; 240 struct xe_exec_queue *q; 241 struct dma_fence *fence; 242 243 lockdep_assert_held_write(&group->mode_sem); 244 245 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 246 if (xe_vm_in_lr_mode(q->vm)) 247 continue; 248 249 xe_gt_stats_incr(q->gt, XE_GT_STATS_ID_HW_ENGINE_GROUP_WAIT_DMA_QUEUE_COUNT, 1); 250 fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); 251 timeout = dma_fence_wait(fence, false); 252 dma_fence_put(fence); 253 254 if (timeout < 0) 255 return -ETIME; 256 } 257 258 return 0; 259 } 260 261 static int switch_mode(struct xe_hw_engine_group *group) 262 { 263 int err = 0; 264 enum xe_hw_engine_group_execution_mode new_mode; 265 266 lockdep_assert_held_write(&group->mode_sem); 267 268 switch (group->cur_mode) { 269 case EXEC_MODE_LR: 270 new_mode = EXEC_MODE_DMA_FENCE; 271 err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 272 break; 273 case EXEC_MODE_DMA_FENCE: 274 new_mode = EXEC_MODE_LR; 275 err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); 276 break; 277 } 278 279 if (err) 280 return err; 281 282 group->cur_mode = new_mode; 283 284 return 0; 285 } 286 287 /** 288 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 289 * @group: The hw engine group 290 * @new_mode: The new execution mode 291 * @previous_mode: Pointer to the previous mode provided for use by caller 292 * 293 * Return: 0 if successful, -EINTR if locking failed. 294 */ 295 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 296 enum xe_hw_engine_group_execution_mode new_mode, 297 enum xe_hw_engine_group_execution_mode *previous_mode) 298 __acquires(&group->mode_sem) 299 { 300 int err = down_read_interruptible(&group->mode_sem); 301 302 if (err) 303 return err; 304 305 *previous_mode = group->cur_mode; 306 307 if (new_mode != group->cur_mode) { 308 up_read(&group->mode_sem); 309 err = down_write_killable(&group->mode_sem); 310 if (err) 311 return err; 312 313 if (new_mode != group->cur_mode) { 314 err = switch_mode(group); 315 if (err) { 316 up_write(&group->mode_sem); 317 return err; 318 } 319 } 320 downgrade_write(&group->mode_sem); 321 } 322 323 return err; 324 } 325 326 /** 327 * xe_hw_engine_group_put() - Put the group 328 * @group: The hw engine group 329 */ 330 void xe_hw_engine_group_put(struct xe_hw_engine_group *group) 331 __releases(&group->mode_sem) 332 { 333 up_read(&group->mode_sem); 334 } 335 336 /** 337 * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue 338 * @q: The exec_queue 339 */ 340 enum xe_hw_engine_group_execution_mode 341 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) 342 { 343 if (xe_vm_in_fault_mode(q->vm)) 344 return EXEC_MODE_LR; 345 else 346 return EXEC_MODE_DMA_FENCE; 347 } 348