1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <drm/drm_managed.h> 7 8 #include "xe_assert.h" 9 #include "xe_device.h" 10 #include "xe_exec_queue.h" 11 #include "xe_gt.h" 12 #include "xe_hw_engine_group.h" 13 #include "xe_vm.h" 14 15 static void 16 hw_engine_group_resume_lr_jobs_func(struct work_struct *w) 17 { 18 struct xe_exec_queue *q; 19 struct xe_hw_engine_group *group = container_of(w, struct xe_hw_engine_group, resume_work); 20 int err; 21 enum xe_hw_engine_group_execution_mode previous_mode; 22 23 err = xe_hw_engine_group_get_mode(group, EXEC_MODE_LR, &previous_mode); 24 if (err) 25 return; 26 27 if (previous_mode == EXEC_MODE_LR) 28 goto put; 29 30 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 31 if (!xe_vm_in_fault_mode(q->vm)) 32 continue; 33 34 q->ops->resume(q); 35 } 36 37 put: 38 xe_hw_engine_group_put(group); 39 } 40 41 static struct xe_hw_engine_group * 42 hw_engine_group_alloc(struct xe_device *xe) 43 { 44 struct xe_hw_engine_group *group; 45 int err; 46 47 group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); 48 if (!group) 49 return ERR_PTR(-ENOMEM); 50 51 group->resume_wq = alloc_workqueue("xe-resume-lr-jobs-wq", 0, 0); 52 if (!group->resume_wq) 53 return ERR_PTR(-ENOMEM); 54 55 err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); 56 if (err) 57 return ERR_PTR(err); 58 59 init_rwsem(&group->mode_sem); 60 INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); 61 INIT_LIST_HEAD(&group->exec_queue_list); 62 63 return group; 64 } 65 66 /** 67 * xe_hw_engine_setup_groups() - Setup the hw engine groups for the gt 68 * @gt: The gt for which groups are setup 69 * 70 * Return: 0 on success, negative error code on error. 71 */ 72 int xe_hw_engine_setup_groups(struct xe_gt *gt) 73 { 74 struct xe_hw_engine *hwe; 75 enum xe_hw_engine_id id; 76 struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; 77 struct xe_device *xe = gt_to_xe(gt); 78 79 group_rcs_ccs = hw_engine_group_alloc(xe); 80 if (IS_ERR(group_rcs_ccs)) 81 return PTR_ERR(group_rcs_ccs); 82 83 group_bcs = hw_engine_group_alloc(xe); 84 if (IS_ERR(group_bcs)) 85 return PTR_ERR(group_bcs); 86 87 group_vcs_vecs = hw_engine_group_alloc(xe); 88 if (IS_ERR(group_vcs_vecs)) 89 return PTR_ERR(group_vcs_vecs); 90 91 for_each_hw_engine(hwe, gt, id) { 92 switch (hwe->class) { 93 case XE_ENGINE_CLASS_COPY: 94 hwe->hw_engine_group = group_bcs; 95 break; 96 case XE_ENGINE_CLASS_RENDER: 97 case XE_ENGINE_CLASS_COMPUTE: 98 hwe->hw_engine_group = group_rcs_ccs; 99 break; 100 case XE_ENGINE_CLASS_VIDEO_DECODE: 101 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 102 hwe->hw_engine_group = group_vcs_vecs; 103 break; 104 case XE_ENGINE_CLASS_OTHER: 105 break; 106 case XE_ENGINE_CLASS_MAX: 107 xe_gt_assert(gt, false); 108 } 109 } 110 111 return 0; 112 } 113 114 /** 115 * xe_hw_engine_group_add_exec_queue() - Add an exec queue to a hw engine group 116 * @group: The hw engine group 117 * @q: The exec_queue 118 * 119 * Return: 0 on success, 120 * -EINTR if the lock could not be acquired 121 */ 122 int xe_hw_engine_group_add_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 123 { 124 int err; 125 struct xe_device *xe = gt_to_xe(q->gt); 126 127 xe_assert(xe, group); 128 xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_VM)); 129 xe_assert(xe, q->vm); 130 131 if (xe_vm_in_preempt_fence_mode(q->vm)) 132 return 0; 133 134 err = down_write_killable(&group->mode_sem); 135 if (err) 136 return err; 137 138 if (xe_vm_in_fault_mode(q->vm) && group->cur_mode == EXEC_MODE_DMA_FENCE) { 139 q->ops->suspend(q); 140 err = q->ops->suspend_wait(q); 141 if (err) 142 goto err_suspend; 143 144 xe_hw_engine_group_resume_faulting_lr_jobs(group); 145 } 146 147 list_add(&q->hw_engine_group_link, &group->exec_queue_list); 148 up_write(&group->mode_sem); 149 150 return 0; 151 152 err_suspend: 153 up_write(&group->mode_sem); 154 return err; 155 } 156 ALLOW_ERROR_INJECTION(xe_hw_engine_group_add_exec_queue, ERRNO); 157 158 /** 159 * xe_hw_engine_group_del_exec_queue() - Delete an exec queue from a hw engine group 160 * @group: The hw engine group 161 * @q: The exec_queue 162 */ 163 void xe_hw_engine_group_del_exec_queue(struct xe_hw_engine_group *group, struct xe_exec_queue *q) 164 { 165 struct xe_device *xe = gt_to_xe(q->gt); 166 167 xe_assert(xe, group); 168 xe_assert(xe, q->vm); 169 170 down_write(&group->mode_sem); 171 172 if (!list_empty(&q->hw_engine_group_link)) 173 list_del(&q->hw_engine_group_link); 174 175 up_write(&group->mode_sem); 176 } 177 178 /** 179 * xe_hw_engine_group_resume_faulting_lr_jobs() - Asynchronously resume the hw engine group's 180 * faulting LR jobs 181 * @group: The hw engine group 182 */ 183 void xe_hw_engine_group_resume_faulting_lr_jobs(struct xe_hw_engine_group *group) 184 { 185 queue_work(group->resume_wq, &group->resume_work); 186 } 187 188 /** 189 * xe_hw_engine_group_suspend_faulting_lr_jobs() - Suspend the faulting LR jobs of this group 190 * @group: The hw engine group 191 * 192 * Return: 0 on success, negative error code on error. 193 */ 194 static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group *group) 195 { 196 int err; 197 struct xe_exec_queue *q; 198 bool need_resume = false; 199 200 lockdep_assert_held_write(&group->mode_sem); 201 202 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 203 if (!xe_vm_in_fault_mode(q->vm)) 204 continue; 205 206 need_resume = true; 207 q->ops->suspend(q); 208 } 209 210 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 211 if (!xe_vm_in_fault_mode(q->vm)) 212 continue; 213 214 err = q->ops->suspend_wait(q); 215 if (err) 216 return err; 217 } 218 219 if (need_resume) 220 xe_hw_engine_group_resume_faulting_lr_jobs(group); 221 222 return 0; 223 } 224 225 /** 226 * xe_hw_engine_group_wait_for_dma_fence_jobs() - Wait for dma fence jobs to complete 227 * @group: The hw engine group 228 * 229 * This function is not meant to be called directly from a user IOCTL as dma_fence_wait() 230 * is not interruptible. 231 * 232 * Return: 0 on success, 233 * -ETIME if waiting for one job failed 234 */ 235 static int xe_hw_engine_group_wait_for_dma_fence_jobs(struct xe_hw_engine_group *group) 236 { 237 long timeout; 238 struct xe_exec_queue *q; 239 struct dma_fence *fence; 240 241 lockdep_assert_held_write(&group->mode_sem); 242 243 list_for_each_entry(q, &group->exec_queue_list, hw_engine_group_link) { 244 if (xe_vm_in_lr_mode(q->vm)) 245 continue; 246 247 fence = xe_exec_queue_last_fence_get_for_resume(q, q->vm); 248 timeout = dma_fence_wait(fence, false); 249 dma_fence_put(fence); 250 251 if (timeout < 0) 252 return -ETIME; 253 } 254 255 return 0; 256 } 257 258 static int switch_mode(struct xe_hw_engine_group *group) 259 { 260 int err = 0; 261 enum xe_hw_engine_group_execution_mode new_mode; 262 263 lockdep_assert_held_write(&group->mode_sem); 264 265 switch (group->cur_mode) { 266 case EXEC_MODE_LR: 267 new_mode = EXEC_MODE_DMA_FENCE; 268 err = xe_hw_engine_group_suspend_faulting_lr_jobs(group); 269 break; 270 case EXEC_MODE_DMA_FENCE: 271 new_mode = EXEC_MODE_LR; 272 err = xe_hw_engine_group_wait_for_dma_fence_jobs(group); 273 break; 274 } 275 276 if (err) 277 return err; 278 279 group->cur_mode = new_mode; 280 281 return 0; 282 } 283 284 /** 285 * xe_hw_engine_group_get_mode() - Get the group to execute in the new mode 286 * @group: The hw engine group 287 * @new_mode: The new execution mode 288 * @previous_mode: Pointer to the previous mode provided for use by caller 289 * 290 * Return: 0 if successful, -EINTR if locking failed. 291 */ 292 int xe_hw_engine_group_get_mode(struct xe_hw_engine_group *group, 293 enum xe_hw_engine_group_execution_mode new_mode, 294 enum xe_hw_engine_group_execution_mode *previous_mode) 295 __acquires(&group->mode_sem) 296 { 297 int err = down_read_interruptible(&group->mode_sem); 298 299 if (err) 300 return err; 301 302 *previous_mode = group->cur_mode; 303 304 if (new_mode != group->cur_mode) { 305 up_read(&group->mode_sem); 306 err = down_write_killable(&group->mode_sem); 307 if (err) 308 return err; 309 310 if (new_mode != group->cur_mode) { 311 err = switch_mode(group); 312 if (err) { 313 up_write(&group->mode_sem); 314 return err; 315 } 316 } 317 downgrade_write(&group->mode_sem); 318 } 319 320 return err; 321 } 322 323 /** 324 * xe_hw_engine_group_put() - Put the group 325 * @group: The hw engine group 326 */ 327 void xe_hw_engine_group_put(struct xe_hw_engine_group *group) 328 __releases(&group->mode_sem) 329 { 330 up_read(&group->mode_sem); 331 } 332 333 /** 334 * xe_hw_engine_group_find_exec_mode() - Find the execution mode for this exec queue 335 * @q: The exec_queue 336 */ 337 enum xe_hw_engine_group_execution_mode 338 xe_hw_engine_group_find_exec_mode(struct xe_exec_queue *q) 339 { 340 if (xe_vm_in_fault_mode(q->vm)) 341 return EXEC_MODE_LR; 342 else 343 return EXEC_MODE_DMA_FENCE; 344 } 345