1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_execlist.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "instructions/xe_mi_commands.h" 11 #include "regs/xe_engine_regs.h" 12 #include "regs/xe_gt_regs.h" 13 #include "regs/xe_lrc_layout.h" 14 #include "xe_assert.h" 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_exec_queue.h" 18 #include "xe_gt_types.h" 19 #include "xe_irq.h" 20 #include "xe_lrc.h" 21 #include "xe_macros.h" 22 #include "xe_mmio.h" 23 #include "xe_mocs.h" 24 #include "xe_ring_ops_types.h" 25 #include "xe_sched_job.h" 26 27 #define XE_EXECLIST_HANG_LIMIT 1 28 29 #define SW_CTX_ID_SHIFT 37 30 #define SW_CTX_ID_WIDTH 11 31 #define XEHP_SW_CTX_ID_SHIFT 39 32 #define XEHP_SW_CTX_ID_WIDTH 16 33 34 #define SW_CTX_ID \ 35 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ 36 SW_CTX_ID_SHIFT) 37 38 #define XEHP_SW_CTX_ID \ 39 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ 40 XEHP_SW_CTX_ID_SHIFT) 41 42 43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, 44 u32 ctx_id) 45 { 46 struct xe_gt *gt = hwe->gt; 47 struct xe_mmio *mmio = >->mmio; 48 struct xe_device *xe = gt_to_xe(gt); 49 u64 lrc_desc; 50 u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); 51 52 lrc_desc = xe_lrc_descriptor(lrc); 53 54 if (GRAPHICS_VERx100(xe) >= 1250) { 55 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); 56 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); 57 } else { 58 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); 59 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); 60 } 61 62 if (hwe->class == XE_ENGINE_CLASS_COMPUTE) 63 xe_mmio_write32(mmio, RCU_MODE, 64 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 65 66 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 67 lrc->ring.old_tail = lrc->ring.tail; 68 69 /* 70 * Make sure the context image is complete before we submit it to HW. 71 * 72 * Ostensibly, writes (including the WCB) should be flushed prior to 73 * an uncached write such as our mmio register access, the empirical 74 * evidence (esp. on Braswell) suggests that the WC write into memory 75 * may not be visible to the HW prior to the completion of the UC 76 * register write and that we may begin execution from the context 77 * before its image is complete leading to invalid PD chasing. 78 */ 79 wmb(); 80 81 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), 82 xe_bo_ggtt_addr(hwe->hwsp)); 83 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); 84 85 if (xe_device_has_msix(gt_to_xe(hwe->gt))) 86 ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); 87 xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode); 88 89 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 90 lower_32_bits(lrc_desc)); 91 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), 92 upper_32_bits(lrc_desc)); 93 xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base), 94 EL_CTRL_LOAD); 95 } 96 97 static void __xe_execlist_port_start(struct xe_execlist_port *port, 98 struct xe_execlist_exec_queue *exl) 99 { 100 struct xe_device *xe = gt_to_xe(port->hwe->gt); 101 int max_ctx = FIELD_MAX(SW_CTX_ID); 102 103 if (GRAPHICS_VERx100(xe) >= 1250) 104 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); 105 106 xe_execlist_port_assert_held(port); 107 108 if (port->running_exl != exl || !exl->has_run) { 109 port->last_ctx_id++; 110 111 /* 0 is reserved for the kernel context */ 112 if (port->last_ctx_id > max_ctx) 113 port->last_ctx_id = 1; 114 } 115 116 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id); 117 port->running_exl = exl; 118 exl->has_run = true; 119 } 120 121 static void __xe_execlist_port_idle(struct xe_execlist_port *port) 122 { 123 u32 noop[2] = { MI_NOOP, MI_NOOP }; 124 125 xe_execlist_port_assert_held(port); 126 127 if (!port->running_exl) 128 return; 129 130 xe_lrc_write_ring(port->lrc, noop, sizeof(noop)); 131 __start_lrc(port->hwe, port->lrc, 0); 132 port->running_exl = NULL; 133 } 134 135 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) 136 { 137 struct xe_lrc *lrc = exl->q->lrc[0]; 138 139 return lrc->ring.tail == lrc->ring.old_tail; 140 } 141 142 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) 143 { 144 struct xe_execlist_exec_queue *exl = NULL; 145 int i; 146 147 xe_execlist_port_assert_held(port); 148 149 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { 150 while (!list_empty(&port->active[i])) { 151 exl = list_first_entry(&port->active[i], 152 struct xe_execlist_exec_queue, 153 active_link); 154 list_del(&exl->active_link); 155 156 if (xe_execlist_is_idle(exl)) { 157 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 158 continue; 159 } 160 161 list_add_tail(&exl->active_link, &port->active[i]); 162 __xe_execlist_port_start(port, exl); 163 return; 164 } 165 } 166 167 __xe_execlist_port_idle(port); 168 } 169 170 static u64 read_execlist_status(struct xe_hw_engine *hwe) 171 { 172 struct xe_gt *gt = hwe->gt; 173 u32 hi, lo; 174 175 lo = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); 176 hi = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); 177 178 return lo | (u64)hi << 32; 179 } 180 181 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) 182 { 183 u64 status; 184 185 xe_execlist_port_assert_held(port); 186 187 status = read_execlist_status(port->hwe); 188 if (status & BIT(7)) 189 return; 190 191 __xe_execlist_port_start_next_active(port); 192 } 193 194 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, 195 u16 intr_vec) 196 { 197 struct xe_execlist_port *port = hwe->exl_port; 198 199 spin_lock(&port->lock); 200 xe_execlist_port_irq_handler_locked(port); 201 spin_unlock(&port->lock); 202 } 203 204 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, 205 enum xe_exec_queue_priority priority) 206 { 207 xe_execlist_port_assert_held(port); 208 209 if (port->running_exl && port->running_exl->active_priority >= priority) 210 return; 211 212 __xe_execlist_port_start_next_active(port); 213 } 214 215 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) 216 { 217 struct xe_execlist_port *port = exl->port; 218 enum xe_exec_queue_priority priority = exl->q->sched_props.priority; 219 220 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); 221 XE_WARN_ON(priority < 0); 222 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); 223 224 spin_lock_irq(&port->lock); 225 226 if (exl->active_priority != priority && 227 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { 228 /* Priority changed, move it to the right list */ 229 list_del(&exl->active_link); 230 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 231 } 232 233 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { 234 exl->active_priority = priority; 235 list_add_tail(&exl->active_link, &port->active[priority]); 236 } 237 238 xe_execlist_port_wake_locked(exl->port, priority); 239 240 spin_unlock_irq(&port->lock); 241 } 242 243 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) 244 { 245 struct xe_execlist_port *port = 246 container_of(timer, struct xe_execlist_port, irq_fail); 247 248 spin_lock_irq(&port->lock); 249 xe_execlist_port_irq_handler_locked(port); 250 spin_unlock_irq(&port->lock); 251 252 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 253 add_timer(&port->irq_fail); 254 } 255 256 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 257 struct xe_hw_engine *hwe) 258 { 259 struct drm_device *drm = &xe->drm; 260 struct xe_execlist_port *port; 261 int i, err; 262 263 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); 264 if (!port) { 265 err = -ENOMEM; 266 goto err; 267 } 268 269 port->hwe = hwe; 270 271 port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); 272 if (IS_ERR(port->lrc)) { 273 err = PTR_ERR(port->lrc); 274 goto err; 275 } 276 277 spin_lock_init(&port->lock); 278 for (i = 0; i < ARRAY_SIZE(port->active); i++) 279 INIT_LIST_HEAD(&port->active[i]); 280 281 port->last_ctx_id = 1; 282 port->running_exl = NULL; 283 284 hwe->irq_handler = xe_execlist_port_irq_handler; 285 286 /* TODO: Fix the interrupt code so it doesn't race like mad */ 287 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); 288 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 289 add_timer(&port->irq_fail); 290 291 return port; 292 293 err: 294 return ERR_PTR(err); 295 } 296 297 void xe_execlist_port_destroy(struct xe_execlist_port *port) 298 { 299 timer_delete(&port->irq_fail); 300 301 /* Prevent an interrupt while we're destroying */ 302 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); 303 port->hwe->irq_handler = NULL; 304 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); 305 306 xe_lrc_put(port->lrc); 307 } 308 309 static struct dma_fence * 310 execlist_run_job(struct drm_sched_job *drm_job) 311 { 312 struct xe_sched_job *job = to_xe_sched_job(drm_job); 313 struct xe_exec_queue *q = job->q; 314 struct xe_execlist_exec_queue *exl = job->q->execlist; 315 316 q->ring_ops->emit_job(job); 317 xe_execlist_make_active(exl); 318 319 return job->fence; 320 } 321 322 static void execlist_job_free(struct drm_sched_job *drm_job) 323 { 324 struct xe_sched_job *job = to_xe_sched_job(drm_job); 325 326 xe_exec_queue_update_run_ticks(job->q); 327 xe_sched_job_put(job); 328 } 329 330 static const struct drm_sched_backend_ops drm_sched_ops = { 331 .run_job = execlist_run_job, 332 .free_job = execlist_job_free, 333 }; 334 335 static int execlist_exec_queue_init(struct xe_exec_queue *q) 336 { 337 struct drm_gpu_scheduler *sched; 338 const struct drm_sched_init_args args = { 339 .ops = &drm_sched_ops, 340 .num_rqs = 1, 341 .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 342 .hang_limit = XE_SCHED_HANG_LIMIT, 343 .timeout = XE_SCHED_JOB_TIMEOUT, 344 .name = q->hwe->name, 345 .dev = gt_to_xe(q->gt)->drm.dev, 346 }; 347 struct xe_execlist_exec_queue *exl; 348 struct xe_device *xe = gt_to_xe(q->gt); 349 int err; 350 351 xe_assert(xe, !xe_device_uc_enabled(xe)); 352 353 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); 354 355 exl = kzalloc(sizeof(*exl), GFP_KERNEL); 356 if (!exl) 357 return -ENOMEM; 358 359 exl->q = q; 360 361 err = drm_sched_init(&exl->sched, &args); 362 if (err) 363 goto err_free; 364 365 sched = &exl->sched; 366 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); 367 if (err) 368 goto err_sched; 369 370 exl->port = q->hwe->exl_port; 371 exl->has_run = false; 372 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 373 q->execlist = exl; 374 q->entity = &exl->entity; 375 376 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); 377 378 return 0; 379 380 err_sched: 381 drm_sched_fini(&exl->sched); 382 err_free: 383 kfree(exl); 384 return err; 385 } 386 387 static void execlist_exec_queue_fini(struct xe_exec_queue *q) 388 { 389 struct xe_execlist_exec_queue *exl = q->execlist; 390 391 drm_sched_entity_fini(&exl->entity); 392 drm_sched_fini(&exl->sched); 393 394 kfree(exl); 395 } 396 397 static void execlist_exec_queue_destroy_async(struct work_struct *w) 398 { 399 struct xe_execlist_exec_queue *ee = 400 container_of(w, struct xe_execlist_exec_queue, destroy_async); 401 struct xe_exec_queue *q = ee->q; 402 struct xe_execlist_exec_queue *exl = q->execlist; 403 struct xe_device *xe = gt_to_xe(q->gt); 404 unsigned long flags; 405 406 xe_assert(xe, !xe_device_uc_enabled(xe)); 407 408 spin_lock_irqsave(&exl->port->lock, flags); 409 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) 410 list_del(&exl->active_link); 411 spin_unlock_irqrestore(&exl->port->lock, flags); 412 413 xe_exec_queue_fini(q); 414 } 415 416 static void execlist_exec_queue_kill(struct xe_exec_queue *q) 417 { 418 /* NIY */ 419 } 420 421 static void execlist_exec_queue_destroy(struct xe_exec_queue *q) 422 { 423 INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); 424 queue_work(system_unbound_wq, &q->execlist->destroy_async); 425 } 426 427 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, 428 enum xe_exec_queue_priority priority) 429 { 430 /* NIY */ 431 return 0; 432 } 433 434 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 435 { 436 /* NIY */ 437 return 0; 438 } 439 440 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 441 u32 preempt_timeout_us) 442 { 443 /* NIY */ 444 return 0; 445 } 446 447 static int execlist_exec_queue_suspend(struct xe_exec_queue *q) 448 { 449 /* NIY */ 450 return 0; 451 } 452 453 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) 454 455 { 456 /* NIY */ 457 return 0; 458 } 459 460 static void execlist_exec_queue_resume(struct xe_exec_queue *q) 461 { 462 /* NIY */ 463 } 464 465 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) 466 { 467 /* NIY */ 468 return false; 469 } 470 471 static const struct xe_exec_queue_ops execlist_exec_queue_ops = { 472 .init = execlist_exec_queue_init, 473 .kill = execlist_exec_queue_kill, 474 .fini = execlist_exec_queue_fini, 475 .destroy = execlist_exec_queue_destroy, 476 .set_priority = execlist_exec_queue_set_priority, 477 .set_timeslice = execlist_exec_queue_set_timeslice, 478 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, 479 .suspend = execlist_exec_queue_suspend, 480 .suspend_wait = execlist_exec_queue_suspend_wait, 481 .resume = execlist_exec_queue_resume, 482 .reset_status = execlist_exec_queue_reset_status, 483 }; 484 485 int xe_execlist_init(struct xe_gt *gt) 486 { 487 /* GuC submission enabled, nothing to do */ 488 if (xe_device_uc_enabled(gt_to_xe(gt))) 489 return 0; 490 491 gt->exec_queue_ops = &execlist_exec_queue_ops; 492 493 return 0; 494 } 495