1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_execlist.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "instructions/xe_mi_commands.h" 11 #include "regs/xe_engine_regs.h" 12 #include "regs/xe_gt_regs.h" 13 #include "regs/xe_lrc_layout.h" 14 #include "xe_assert.h" 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_exec_queue.h" 18 #include "xe_gt_types.h" 19 #include "xe_irq.h" 20 #include "xe_lrc.h" 21 #include "xe_macros.h" 22 #include "xe_mmio.h" 23 #include "xe_mocs.h" 24 #include "xe_ring_ops_types.h" 25 #include "xe_sched_job.h" 26 27 #define XE_EXECLIST_HANG_LIMIT 1 28 29 #define SW_CTX_ID_SHIFT 37 30 #define SW_CTX_ID_WIDTH 11 31 #define XEHP_SW_CTX_ID_SHIFT 39 32 #define XEHP_SW_CTX_ID_WIDTH 16 33 34 #define SW_CTX_ID \ 35 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ 36 SW_CTX_ID_SHIFT) 37 38 #define XEHP_SW_CTX_ID \ 39 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ 40 XEHP_SW_CTX_ID_SHIFT) 41 42 43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, 44 u32 ctx_id) 45 { 46 struct xe_gt *gt = hwe->gt; 47 struct xe_mmio *mmio = >->mmio; 48 struct xe_device *xe = gt_to_xe(gt); 49 u64 lrc_desc; 50 51 lrc_desc = xe_lrc_descriptor(lrc); 52 53 if (GRAPHICS_VERx100(xe) >= 1250) { 54 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); 55 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); 56 } else { 57 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); 58 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); 59 } 60 61 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 62 lrc->ring.old_tail = lrc->ring.tail; 63 64 /* 65 * Make sure the context image is complete before we submit it to HW. 66 * 67 * Ostensibly, writes (including the WCB) should be flushed prior to 68 * an uncached write such as our mmio register access, the empirical 69 * evidence (esp. on Braswell) suggests that the WC write into memory 70 * may not be visible to the HW prior to the completion of the UC 71 * register write and that we may begin execution from the context 72 * before its image is complete leading to invalid PD chasing. 73 */ 74 wmb(); 75 76 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), 77 xe_bo_ggtt_addr(hwe->hwsp)); 78 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); 79 80 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 81 lower_32_bits(lrc_desc)); 82 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), 83 upper_32_bits(lrc_desc)); 84 xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base), 85 EL_CTRL_LOAD); 86 } 87 88 static void __xe_execlist_port_start(struct xe_execlist_port *port, 89 struct xe_execlist_exec_queue *exl) 90 { 91 struct xe_device *xe = gt_to_xe(port->hwe->gt); 92 int max_ctx = FIELD_MAX(SW_CTX_ID); 93 94 if (GRAPHICS_VERx100(xe) >= 1250) 95 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); 96 97 xe_execlist_port_assert_held(port); 98 99 if (port->running_exl != exl || !exl->has_run) { 100 port->last_ctx_id++; 101 102 /* 0 is reserved for the kernel context */ 103 if (port->last_ctx_id > max_ctx) 104 port->last_ctx_id = 1; 105 } 106 107 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id); 108 port->running_exl = exl; 109 exl->has_run = true; 110 } 111 112 static void __xe_execlist_port_idle(struct xe_execlist_port *port) 113 { 114 u32 noop[2] = { MI_NOOP, MI_NOOP }; 115 116 xe_execlist_port_assert_held(port); 117 118 if (!port->running_exl) 119 return; 120 121 xe_lrc_write_ring(port->lrc, noop, sizeof(noop)); 122 __start_lrc(port->hwe, port->lrc, 0); 123 port->running_exl = NULL; 124 } 125 126 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) 127 { 128 struct xe_lrc *lrc = exl->q->lrc[0]; 129 130 return lrc->ring.tail == lrc->ring.old_tail; 131 } 132 133 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) 134 { 135 struct xe_execlist_exec_queue *exl = NULL; 136 int i; 137 138 xe_execlist_port_assert_held(port); 139 140 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { 141 while (!list_empty(&port->active[i])) { 142 exl = list_first_entry(&port->active[i], 143 struct xe_execlist_exec_queue, 144 active_link); 145 list_del(&exl->active_link); 146 147 if (xe_execlist_is_idle(exl)) { 148 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 149 continue; 150 } 151 152 list_add_tail(&exl->active_link, &port->active[i]); 153 __xe_execlist_port_start(port, exl); 154 return; 155 } 156 } 157 158 __xe_execlist_port_idle(port); 159 } 160 161 static u64 read_execlist_status(struct xe_hw_engine *hwe) 162 { 163 struct xe_gt *gt = hwe->gt; 164 u32 hi, lo; 165 166 lo = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); 167 hi = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); 168 169 return lo | (u64)hi << 32; 170 } 171 172 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) 173 { 174 u64 status; 175 176 xe_execlist_port_assert_held(port); 177 178 status = read_execlist_status(port->hwe); 179 if (status & BIT(7)) 180 return; 181 182 __xe_execlist_port_start_next_active(port); 183 } 184 185 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, 186 u16 intr_vec) 187 { 188 struct xe_execlist_port *port = hwe->exl_port; 189 190 spin_lock(&port->lock); 191 xe_execlist_port_irq_handler_locked(port); 192 spin_unlock(&port->lock); 193 } 194 195 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, 196 enum xe_exec_queue_priority priority) 197 { 198 xe_execlist_port_assert_held(port); 199 200 if (port->running_exl && port->running_exl->active_priority >= priority) 201 return; 202 203 __xe_execlist_port_start_next_active(port); 204 } 205 206 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) 207 { 208 struct xe_execlist_port *port = exl->port; 209 enum xe_exec_queue_priority priority = exl->q->sched_props.priority; 210 211 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); 212 XE_WARN_ON(priority < 0); 213 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); 214 215 spin_lock_irq(&port->lock); 216 217 if (exl->active_priority != priority && 218 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { 219 /* Priority changed, move it to the right list */ 220 list_del(&exl->active_link); 221 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 222 } 223 224 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { 225 exl->active_priority = priority; 226 list_add_tail(&exl->active_link, &port->active[priority]); 227 } 228 229 xe_execlist_port_wake_locked(exl->port, priority); 230 231 spin_unlock_irq(&port->lock); 232 } 233 234 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) 235 { 236 struct xe_execlist_port *port = 237 container_of(timer, struct xe_execlist_port, irq_fail); 238 239 spin_lock_irq(&port->lock); 240 xe_execlist_port_irq_handler_locked(port); 241 spin_unlock_irq(&port->lock); 242 243 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 244 add_timer(&port->irq_fail); 245 } 246 247 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 248 struct xe_hw_engine *hwe) 249 { 250 struct drm_device *drm = &xe->drm; 251 struct xe_execlist_port *port; 252 int i, err; 253 254 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); 255 if (!port) { 256 err = -ENOMEM; 257 goto err; 258 } 259 260 port->hwe = hwe; 261 262 port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0); 263 if (IS_ERR(port->lrc)) { 264 err = PTR_ERR(port->lrc); 265 goto err; 266 } 267 268 spin_lock_init(&port->lock); 269 for (i = 0; i < ARRAY_SIZE(port->active); i++) 270 INIT_LIST_HEAD(&port->active[i]); 271 272 port->last_ctx_id = 1; 273 port->running_exl = NULL; 274 275 hwe->irq_handler = xe_execlist_port_irq_handler; 276 277 /* TODO: Fix the interrupt code so it doesn't race like mad */ 278 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); 279 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 280 add_timer(&port->irq_fail); 281 282 return port; 283 284 err: 285 return ERR_PTR(err); 286 } 287 288 void xe_execlist_port_destroy(struct xe_execlist_port *port) 289 { 290 timer_delete(&port->irq_fail); 291 292 /* Prevent an interrupt while we're destroying */ 293 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); 294 port->hwe->irq_handler = NULL; 295 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); 296 297 xe_lrc_put(port->lrc); 298 } 299 300 static struct dma_fence * 301 execlist_run_job(struct drm_sched_job *drm_job) 302 { 303 struct xe_sched_job *job = to_xe_sched_job(drm_job); 304 struct xe_exec_queue *q = job->q; 305 struct xe_execlist_exec_queue *exl = job->q->execlist; 306 307 q->ring_ops->emit_job(job); 308 xe_execlist_make_active(exl); 309 310 return job->fence; 311 } 312 313 static void execlist_job_free(struct drm_sched_job *drm_job) 314 { 315 struct xe_sched_job *job = to_xe_sched_job(drm_job); 316 317 xe_exec_queue_update_run_ticks(job->q); 318 xe_sched_job_put(job); 319 } 320 321 static const struct drm_sched_backend_ops drm_sched_ops = { 322 .run_job = execlist_run_job, 323 .free_job = execlist_job_free, 324 }; 325 326 static int execlist_exec_queue_init(struct xe_exec_queue *q) 327 { 328 struct drm_gpu_scheduler *sched; 329 const struct drm_sched_init_args args = { 330 .ops = &drm_sched_ops, 331 .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 332 .hang_limit = XE_SCHED_HANG_LIMIT, 333 .timeout = XE_SCHED_JOB_TIMEOUT, 334 .name = q->hwe->name, 335 .dev = gt_to_xe(q->gt)->drm.dev, 336 }; 337 struct xe_execlist_exec_queue *exl; 338 struct xe_device *xe = gt_to_xe(q->gt); 339 int err; 340 341 xe_assert(xe, !xe_device_uc_enabled(xe)); 342 343 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); 344 345 exl = kzalloc_obj(*exl); 346 if (!exl) 347 return -ENOMEM; 348 349 exl->q = q; 350 351 err = drm_sched_init(&exl->sched, &args); 352 if (err) 353 goto err_free; 354 355 sched = &exl->sched; 356 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); 357 if (err) 358 goto err_sched; 359 360 exl->port = q->hwe->exl_port; 361 exl->has_run = false; 362 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 363 q->execlist = exl; 364 q->entity = &exl->entity; 365 366 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); 367 368 return 0; 369 370 err_sched: 371 drm_sched_fini(&exl->sched); 372 err_free: 373 kfree(exl); 374 return err; 375 } 376 377 static void execlist_exec_queue_fini(struct xe_exec_queue *q) 378 { 379 struct xe_execlist_exec_queue *exl = q->execlist; 380 381 drm_sched_entity_fini(&exl->entity); 382 drm_sched_fini(&exl->sched); 383 384 kfree(exl); 385 } 386 387 static void execlist_exec_queue_destroy_async(struct work_struct *w) 388 { 389 struct xe_execlist_exec_queue *ee = 390 container_of(w, struct xe_execlist_exec_queue, destroy_async); 391 struct xe_exec_queue *q = ee->q; 392 struct xe_execlist_exec_queue *exl = q->execlist; 393 struct xe_device *xe = gt_to_xe(q->gt); 394 unsigned long flags; 395 396 xe_assert(xe, !xe_device_uc_enabled(xe)); 397 398 spin_lock_irqsave(&exl->port->lock, flags); 399 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) 400 list_del(&exl->active_link); 401 spin_unlock_irqrestore(&exl->port->lock, flags); 402 403 xe_exec_queue_fini(q); 404 } 405 406 static void execlist_exec_queue_kill(struct xe_exec_queue *q) 407 { 408 /* NIY */ 409 } 410 411 static void execlist_exec_queue_destroy(struct xe_exec_queue *q) 412 { 413 INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); 414 queue_work(system_dfl_wq, &q->execlist->destroy_async); 415 } 416 417 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, 418 enum xe_exec_queue_priority priority) 419 { 420 /* NIY */ 421 return 0; 422 } 423 424 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 425 { 426 /* NIY */ 427 return 0; 428 } 429 430 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 431 u32 preempt_timeout_us) 432 { 433 /* NIY */ 434 return 0; 435 } 436 437 static int execlist_exec_queue_suspend(struct xe_exec_queue *q) 438 { 439 /* NIY */ 440 return 0; 441 } 442 443 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) 444 445 { 446 /* NIY */ 447 return 0; 448 } 449 450 static void execlist_exec_queue_resume(struct xe_exec_queue *q) 451 { 452 /* NIY */ 453 } 454 455 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) 456 { 457 /* NIY */ 458 return false; 459 } 460 461 static bool execlist_exec_queue_active(struct xe_exec_queue *q) 462 { 463 /* NIY */ 464 return false; 465 } 466 467 static const struct xe_exec_queue_ops execlist_exec_queue_ops = { 468 .init = execlist_exec_queue_init, 469 .kill = execlist_exec_queue_kill, 470 .fini = execlist_exec_queue_fini, 471 .destroy = execlist_exec_queue_destroy, 472 .set_priority = execlist_exec_queue_set_priority, 473 .set_timeslice = execlist_exec_queue_set_timeslice, 474 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, 475 .suspend = execlist_exec_queue_suspend, 476 .suspend_wait = execlist_exec_queue_suspend_wait, 477 .resume = execlist_exec_queue_resume, 478 .reset_status = execlist_exec_queue_reset_status, 479 .active = execlist_exec_queue_active, 480 }; 481 482 int xe_execlist_init(struct xe_gt *gt) 483 { 484 /* GuC submission enabled, nothing to do */ 485 if (xe_device_uc_enabled(gt_to_xe(gt))) 486 return 0; 487 488 gt->exec_queue_ops = &execlist_exec_queue_ops; 489 490 return 0; 491 } 492