1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_execlist.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "instructions/xe_mi_commands.h" 11 #include "regs/xe_engine_regs.h" 12 #include "regs/xe_gt_regs.h" 13 #include "regs/xe_lrc_layout.h" 14 #include "xe_assert.h" 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_exec_queue.h" 18 #include "xe_gt.h" 19 #include "xe_hw_fence.h" 20 #include "xe_lrc.h" 21 #include "xe_macros.h" 22 #include "xe_mmio.h" 23 #include "xe_mocs.h" 24 #include "xe_ring_ops_types.h" 25 #include "xe_sched_job.h" 26 27 #define XE_EXECLIST_HANG_LIMIT 1 28 29 #define SW_CTX_ID_SHIFT 37 30 #define SW_CTX_ID_WIDTH 11 31 #define XEHP_SW_CTX_ID_SHIFT 39 32 #define XEHP_SW_CTX_ID_WIDTH 16 33 34 #define SW_CTX_ID \ 35 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ 36 SW_CTX_ID_SHIFT) 37 38 #define XEHP_SW_CTX_ID \ 39 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ 40 XEHP_SW_CTX_ID_SHIFT) 41 42 43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, 44 u32 ctx_id) 45 { 46 struct xe_gt *gt = hwe->gt; 47 struct xe_device *xe = gt_to_xe(gt); 48 u64 lrc_desc; 49 50 lrc_desc = xe_lrc_descriptor(lrc); 51 52 if (GRAPHICS_VERx100(xe) >= 1250) { 53 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); 54 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); 55 } else { 56 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); 57 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); 58 } 59 60 if (hwe->class == XE_ENGINE_CLASS_COMPUTE) 61 xe_mmio_write32(hwe->gt, RCU_MODE, 62 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 63 64 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 65 lrc->ring.old_tail = lrc->ring.tail; 66 67 /* 68 * Make sure the context image is complete before we submit it to HW. 69 * 70 * Ostensibly, writes (including the WCB) should be flushed prior to 71 * an uncached write such as our mmio register access, the empirical 72 * evidence (esp. on Braswell) suggests that the WC write into memory 73 * may not be visible to the HW prior to the completion of the UC 74 * register write and that we may begin execution from the context 75 * before its image is complete leading to invalid PD chasing. 76 */ 77 wmb(); 78 79 xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base), 80 xe_bo_ggtt_addr(hwe->hwsp)); 81 xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base)); 82 xe_mmio_write32(gt, RING_MODE(hwe->mmio_base), 83 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 84 85 xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 86 lower_32_bits(lrc_desc)); 87 xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), 88 upper_32_bits(lrc_desc)); 89 xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base), 90 EL_CTRL_LOAD); 91 } 92 93 static void __xe_execlist_port_start(struct xe_execlist_port *port, 94 struct xe_execlist_exec_queue *exl) 95 { 96 struct xe_device *xe = gt_to_xe(port->hwe->gt); 97 int max_ctx = FIELD_MAX(SW_CTX_ID); 98 99 if (GRAPHICS_VERx100(xe) >= 1250) 100 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); 101 102 xe_execlist_port_assert_held(port); 103 104 if (port->running_exl != exl || !exl->has_run) { 105 port->last_ctx_id++; 106 107 /* 0 is reserved for the kernel context */ 108 if (port->last_ctx_id > max_ctx) 109 port->last_ctx_id = 1; 110 } 111 112 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id); 113 port->running_exl = exl; 114 exl->has_run = true; 115 } 116 117 static void __xe_execlist_port_idle(struct xe_execlist_port *port) 118 { 119 u32 noop[2] = { MI_NOOP, MI_NOOP }; 120 121 xe_execlist_port_assert_held(port); 122 123 if (!port->running_exl) 124 return; 125 126 xe_lrc_write_ring(port->lrc, noop, sizeof(noop)); 127 __start_lrc(port->hwe, port->lrc, 0); 128 port->running_exl = NULL; 129 } 130 131 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) 132 { 133 struct xe_lrc *lrc = exl->q->lrc[0]; 134 135 return lrc->ring.tail == lrc->ring.old_tail; 136 } 137 138 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) 139 { 140 struct xe_execlist_exec_queue *exl = NULL; 141 int i; 142 143 xe_execlist_port_assert_held(port); 144 145 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { 146 while (!list_empty(&port->active[i])) { 147 exl = list_first_entry(&port->active[i], 148 struct xe_execlist_exec_queue, 149 active_link); 150 list_del(&exl->active_link); 151 152 if (xe_execlist_is_idle(exl)) { 153 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 154 continue; 155 } 156 157 list_add_tail(&exl->active_link, &port->active[i]); 158 __xe_execlist_port_start(port, exl); 159 return; 160 } 161 } 162 163 __xe_execlist_port_idle(port); 164 } 165 166 static u64 read_execlist_status(struct xe_hw_engine *hwe) 167 { 168 struct xe_gt *gt = hwe->gt; 169 u32 hi, lo; 170 171 lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); 172 hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); 173 174 return lo | (u64)hi << 32; 175 } 176 177 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) 178 { 179 u64 status; 180 181 xe_execlist_port_assert_held(port); 182 183 status = read_execlist_status(port->hwe); 184 if (status & BIT(7)) 185 return; 186 187 __xe_execlist_port_start_next_active(port); 188 } 189 190 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, 191 u16 intr_vec) 192 { 193 struct xe_execlist_port *port = hwe->exl_port; 194 195 spin_lock(&port->lock); 196 xe_execlist_port_irq_handler_locked(port); 197 spin_unlock(&port->lock); 198 } 199 200 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, 201 enum xe_exec_queue_priority priority) 202 { 203 xe_execlist_port_assert_held(port); 204 205 if (port->running_exl && port->running_exl->active_priority >= priority) 206 return; 207 208 __xe_execlist_port_start_next_active(port); 209 } 210 211 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) 212 { 213 struct xe_execlist_port *port = exl->port; 214 enum xe_exec_queue_priority priority = exl->q->sched_props.priority; 215 216 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); 217 XE_WARN_ON(priority < 0); 218 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); 219 220 spin_lock_irq(&port->lock); 221 222 if (exl->active_priority != priority && 223 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { 224 /* Priority changed, move it to the right list */ 225 list_del(&exl->active_link); 226 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 227 } 228 229 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { 230 exl->active_priority = priority; 231 list_add_tail(&exl->active_link, &port->active[priority]); 232 } 233 234 xe_execlist_port_wake_locked(exl->port, priority); 235 236 spin_unlock_irq(&port->lock); 237 } 238 239 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) 240 { 241 struct xe_execlist_port *port = 242 container_of(timer, struct xe_execlist_port, irq_fail); 243 244 spin_lock_irq(&port->lock); 245 xe_execlist_port_irq_handler_locked(port); 246 spin_unlock_irq(&port->lock); 247 248 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 249 add_timer(&port->irq_fail); 250 } 251 252 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 253 struct xe_hw_engine *hwe) 254 { 255 struct drm_device *drm = &xe->drm; 256 struct xe_execlist_port *port; 257 int i, err; 258 259 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); 260 if (!port) { 261 err = -ENOMEM; 262 goto err; 263 } 264 265 port->hwe = hwe; 266 267 port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); 268 if (IS_ERR(port->lrc)) { 269 err = PTR_ERR(port->lrc); 270 goto err; 271 } 272 273 spin_lock_init(&port->lock); 274 for (i = 0; i < ARRAY_SIZE(port->active); i++) 275 INIT_LIST_HEAD(&port->active[i]); 276 277 port->last_ctx_id = 1; 278 port->running_exl = NULL; 279 280 hwe->irq_handler = xe_execlist_port_irq_handler; 281 282 /* TODO: Fix the interrupt code so it doesn't race like mad */ 283 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); 284 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 285 add_timer(&port->irq_fail); 286 287 return port; 288 289 err: 290 return ERR_PTR(err); 291 } 292 293 void xe_execlist_port_destroy(struct xe_execlist_port *port) 294 { 295 del_timer(&port->irq_fail); 296 297 /* Prevent an interrupt while we're destroying */ 298 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); 299 port->hwe->irq_handler = NULL; 300 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); 301 302 xe_lrc_put(port->lrc); 303 } 304 305 static struct dma_fence * 306 execlist_run_job(struct drm_sched_job *drm_job) 307 { 308 struct xe_sched_job *job = to_xe_sched_job(drm_job); 309 struct xe_exec_queue *q = job->q; 310 struct xe_execlist_exec_queue *exl = job->q->execlist; 311 312 q->ring_ops->emit_job(job); 313 xe_execlist_make_active(exl); 314 315 return dma_fence_get(job->fence); 316 } 317 318 static void execlist_job_free(struct drm_sched_job *drm_job) 319 { 320 struct xe_sched_job *job = to_xe_sched_job(drm_job); 321 322 xe_exec_queue_update_run_ticks(job->q); 323 xe_sched_job_put(job); 324 } 325 326 static const struct drm_sched_backend_ops drm_sched_ops = { 327 .run_job = execlist_run_job, 328 .free_job = execlist_job_free, 329 }; 330 331 static int execlist_exec_queue_init(struct xe_exec_queue *q) 332 { 333 struct drm_gpu_scheduler *sched; 334 struct xe_execlist_exec_queue *exl; 335 struct xe_device *xe = gt_to_xe(q->gt); 336 int err; 337 338 xe_assert(xe, !xe_device_uc_enabled(xe)); 339 340 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); 341 342 exl = kzalloc(sizeof(*exl), GFP_KERNEL); 343 if (!exl) 344 return -ENOMEM; 345 346 exl->q = q; 347 348 err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, 349 q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 350 XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, 351 NULL, NULL, q->hwe->name, 352 gt_to_xe(q->gt)->drm.dev); 353 if (err) 354 goto err_free; 355 356 sched = &exl->sched; 357 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); 358 if (err) 359 goto err_sched; 360 361 exl->port = q->hwe->exl_port; 362 exl->has_run = false; 363 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 364 q->execlist = exl; 365 q->entity = &exl->entity; 366 367 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); 368 369 return 0; 370 371 err_sched: 372 drm_sched_fini(&exl->sched); 373 err_free: 374 kfree(exl); 375 return err; 376 } 377 378 static void execlist_exec_queue_fini_async(struct work_struct *w) 379 { 380 struct xe_execlist_exec_queue *ee = 381 container_of(w, struct xe_execlist_exec_queue, fini_async); 382 struct xe_exec_queue *q = ee->q; 383 struct xe_execlist_exec_queue *exl = q->execlist; 384 struct xe_device *xe = gt_to_xe(q->gt); 385 unsigned long flags; 386 387 xe_assert(xe, !xe_device_uc_enabled(xe)); 388 389 spin_lock_irqsave(&exl->port->lock, flags); 390 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) 391 list_del(&exl->active_link); 392 spin_unlock_irqrestore(&exl->port->lock, flags); 393 394 drm_sched_entity_fini(&exl->entity); 395 drm_sched_fini(&exl->sched); 396 kfree(exl); 397 398 xe_exec_queue_fini(q); 399 } 400 401 static void execlist_exec_queue_kill(struct xe_exec_queue *q) 402 { 403 /* NIY */ 404 } 405 406 static void execlist_exec_queue_fini(struct xe_exec_queue *q) 407 { 408 INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); 409 queue_work(system_unbound_wq, &q->execlist->fini_async); 410 } 411 412 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, 413 enum xe_exec_queue_priority priority) 414 { 415 /* NIY */ 416 return 0; 417 } 418 419 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 420 { 421 /* NIY */ 422 return 0; 423 } 424 425 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 426 u32 preempt_timeout_us) 427 { 428 /* NIY */ 429 return 0; 430 } 431 432 static int execlist_exec_queue_suspend(struct xe_exec_queue *q) 433 { 434 /* NIY */ 435 return 0; 436 } 437 438 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) 439 440 { 441 /* NIY */ 442 return 0; 443 } 444 445 static void execlist_exec_queue_resume(struct xe_exec_queue *q) 446 { 447 /* NIY */ 448 } 449 450 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) 451 { 452 /* NIY */ 453 return false; 454 } 455 456 static const struct xe_exec_queue_ops execlist_exec_queue_ops = { 457 .init = execlist_exec_queue_init, 458 .kill = execlist_exec_queue_kill, 459 .fini = execlist_exec_queue_fini, 460 .set_priority = execlist_exec_queue_set_priority, 461 .set_timeslice = execlist_exec_queue_set_timeslice, 462 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, 463 .suspend = execlist_exec_queue_suspend, 464 .suspend_wait = execlist_exec_queue_suspend_wait, 465 .resume = execlist_exec_queue_resume, 466 .reset_status = execlist_exec_queue_reset_status, 467 }; 468 469 int xe_execlist_init(struct xe_gt *gt) 470 { 471 /* GuC submission enabled, nothing to do */ 472 if (xe_device_uc_enabled(gt_to_xe(gt))) 473 return 0; 474 475 gt->exec_queue_ops = &execlist_exec_queue_ops; 476 477 return 0; 478 } 479