1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_execlist.h" 7 8 #include <drm/drm_managed.h> 9 10 #include "instructions/xe_mi_commands.h" 11 #include "regs/xe_engine_regs.h" 12 #include "regs/xe_gt_regs.h" 13 #include "regs/xe_lrc_layout.h" 14 #include "xe_assert.h" 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_exec_queue.h" 18 #include "xe_gt.h" 19 #include "xe_hw_fence.h" 20 #include "xe_lrc.h" 21 #include "xe_macros.h" 22 #include "xe_mmio.h" 23 #include "xe_mocs.h" 24 #include "xe_ring_ops_types.h" 25 #include "xe_sched_job.h" 26 27 #define XE_EXECLIST_HANG_LIMIT 1 28 29 #define SW_CTX_ID_SHIFT 37 30 #define SW_CTX_ID_WIDTH 11 31 #define XEHP_SW_CTX_ID_SHIFT 39 32 #define XEHP_SW_CTX_ID_WIDTH 16 33 34 #define SW_CTX_ID \ 35 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ 36 SW_CTX_ID_SHIFT) 37 38 #define XEHP_SW_CTX_ID \ 39 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ 40 XEHP_SW_CTX_ID_SHIFT) 41 42 43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, 44 u32 ctx_id) 45 { 46 struct xe_gt *gt = hwe->gt; 47 struct xe_mmio *mmio = >->mmio; 48 struct xe_device *xe = gt_to_xe(gt); 49 u64 lrc_desc; 50 51 lrc_desc = xe_lrc_descriptor(lrc); 52 53 if (GRAPHICS_VERx100(xe) >= 1250) { 54 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); 55 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); 56 } else { 57 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); 58 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); 59 } 60 61 if (hwe->class == XE_ENGINE_CLASS_COMPUTE) 62 xe_mmio_write32(mmio, RCU_MODE, 63 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); 64 65 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); 66 lrc->ring.old_tail = lrc->ring.tail; 67 68 /* 69 * Make sure the context image is complete before we submit it to HW. 70 * 71 * Ostensibly, writes (including the WCB) should be flushed prior to 72 * an uncached write such as our mmio register access, the empirical 73 * evidence (esp. on Braswell) suggests that the WC write into memory 74 * may not be visible to the HW prior to the completion of the UC 75 * register write and that we may begin execution from the context 76 * before its image is complete leading to invalid PD chasing. 77 */ 78 wmb(); 79 80 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), 81 xe_bo_ggtt_addr(hwe->hwsp)); 82 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); 83 xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), 84 _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE)); 85 86 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), 87 lower_32_bits(lrc_desc)); 88 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), 89 upper_32_bits(lrc_desc)); 90 xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base), 91 EL_CTRL_LOAD); 92 } 93 94 static void __xe_execlist_port_start(struct xe_execlist_port *port, 95 struct xe_execlist_exec_queue *exl) 96 { 97 struct xe_device *xe = gt_to_xe(port->hwe->gt); 98 int max_ctx = FIELD_MAX(SW_CTX_ID); 99 100 if (GRAPHICS_VERx100(xe) >= 1250) 101 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); 102 103 xe_execlist_port_assert_held(port); 104 105 if (port->running_exl != exl || !exl->has_run) { 106 port->last_ctx_id++; 107 108 /* 0 is reserved for the kernel context */ 109 if (port->last_ctx_id > max_ctx) 110 port->last_ctx_id = 1; 111 } 112 113 __start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id); 114 port->running_exl = exl; 115 exl->has_run = true; 116 } 117 118 static void __xe_execlist_port_idle(struct xe_execlist_port *port) 119 { 120 u32 noop[2] = { MI_NOOP, MI_NOOP }; 121 122 xe_execlist_port_assert_held(port); 123 124 if (!port->running_exl) 125 return; 126 127 xe_lrc_write_ring(port->lrc, noop, sizeof(noop)); 128 __start_lrc(port->hwe, port->lrc, 0); 129 port->running_exl = NULL; 130 } 131 132 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) 133 { 134 struct xe_lrc *lrc = exl->q->lrc[0]; 135 136 return lrc->ring.tail == lrc->ring.old_tail; 137 } 138 139 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) 140 { 141 struct xe_execlist_exec_queue *exl = NULL; 142 int i; 143 144 xe_execlist_port_assert_held(port); 145 146 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { 147 while (!list_empty(&port->active[i])) { 148 exl = list_first_entry(&port->active[i], 149 struct xe_execlist_exec_queue, 150 active_link); 151 list_del(&exl->active_link); 152 153 if (xe_execlist_is_idle(exl)) { 154 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 155 continue; 156 } 157 158 list_add_tail(&exl->active_link, &port->active[i]); 159 __xe_execlist_port_start(port, exl); 160 return; 161 } 162 } 163 164 __xe_execlist_port_idle(port); 165 } 166 167 static u64 read_execlist_status(struct xe_hw_engine *hwe) 168 { 169 struct xe_gt *gt = hwe->gt; 170 u32 hi, lo; 171 172 lo = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); 173 hi = xe_mmio_read32(>->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); 174 175 return lo | (u64)hi << 32; 176 } 177 178 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) 179 { 180 u64 status; 181 182 xe_execlist_port_assert_held(port); 183 184 status = read_execlist_status(port->hwe); 185 if (status & BIT(7)) 186 return; 187 188 __xe_execlist_port_start_next_active(port); 189 } 190 191 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, 192 u16 intr_vec) 193 { 194 struct xe_execlist_port *port = hwe->exl_port; 195 196 spin_lock(&port->lock); 197 xe_execlist_port_irq_handler_locked(port); 198 spin_unlock(&port->lock); 199 } 200 201 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, 202 enum xe_exec_queue_priority priority) 203 { 204 xe_execlist_port_assert_held(port); 205 206 if (port->running_exl && port->running_exl->active_priority >= priority) 207 return; 208 209 __xe_execlist_port_start_next_active(port); 210 } 211 212 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) 213 { 214 struct xe_execlist_port *port = exl->port; 215 enum xe_exec_queue_priority priority = exl->q->sched_props.priority; 216 217 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); 218 XE_WARN_ON(priority < 0); 219 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); 220 221 spin_lock_irq(&port->lock); 222 223 if (exl->active_priority != priority && 224 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { 225 /* Priority changed, move it to the right list */ 226 list_del(&exl->active_link); 227 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 228 } 229 230 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { 231 exl->active_priority = priority; 232 list_add_tail(&exl->active_link, &port->active[priority]); 233 } 234 235 xe_execlist_port_wake_locked(exl->port, priority); 236 237 spin_unlock_irq(&port->lock); 238 } 239 240 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) 241 { 242 struct xe_execlist_port *port = 243 container_of(timer, struct xe_execlist_port, irq_fail); 244 245 spin_lock_irq(&port->lock); 246 xe_execlist_port_irq_handler_locked(port); 247 spin_unlock_irq(&port->lock); 248 249 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 250 add_timer(&port->irq_fail); 251 } 252 253 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, 254 struct xe_hw_engine *hwe) 255 { 256 struct drm_device *drm = &xe->drm; 257 struct xe_execlist_port *port; 258 int i, err; 259 260 port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL); 261 if (!port) { 262 err = -ENOMEM; 263 goto err; 264 } 265 266 port->hwe = hwe; 267 268 port->lrc = xe_lrc_create(hwe, NULL, SZ_16K); 269 if (IS_ERR(port->lrc)) { 270 err = PTR_ERR(port->lrc); 271 goto err; 272 } 273 274 spin_lock_init(&port->lock); 275 for (i = 0; i < ARRAY_SIZE(port->active); i++) 276 INIT_LIST_HEAD(&port->active[i]); 277 278 port->last_ctx_id = 1; 279 port->running_exl = NULL; 280 281 hwe->irq_handler = xe_execlist_port_irq_handler; 282 283 /* TODO: Fix the interrupt code so it doesn't race like mad */ 284 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); 285 port->irq_fail.expires = jiffies + msecs_to_jiffies(1000); 286 add_timer(&port->irq_fail); 287 288 return port; 289 290 err: 291 return ERR_PTR(err); 292 } 293 294 void xe_execlist_port_destroy(struct xe_execlist_port *port) 295 { 296 del_timer(&port->irq_fail); 297 298 /* Prevent an interrupt while we're destroying */ 299 spin_lock_irq(>_to_xe(port->hwe->gt)->irq.lock); 300 port->hwe->irq_handler = NULL; 301 spin_unlock_irq(>_to_xe(port->hwe->gt)->irq.lock); 302 303 xe_lrc_put(port->lrc); 304 } 305 306 static struct dma_fence * 307 execlist_run_job(struct drm_sched_job *drm_job) 308 { 309 struct xe_sched_job *job = to_xe_sched_job(drm_job); 310 struct xe_exec_queue *q = job->q; 311 struct xe_execlist_exec_queue *exl = job->q->execlist; 312 313 q->ring_ops->emit_job(job); 314 xe_execlist_make_active(exl); 315 316 return job->fence; 317 } 318 319 static void execlist_job_free(struct drm_sched_job *drm_job) 320 { 321 struct xe_sched_job *job = to_xe_sched_job(drm_job); 322 323 xe_exec_queue_update_run_ticks(job->q); 324 xe_sched_job_put(job); 325 } 326 327 static const struct drm_sched_backend_ops drm_sched_ops = { 328 .run_job = execlist_run_job, 329 .free_job = execlist_job_free, 330 }; 331 332 static int execlist_exec_queue_init(struct xe_exec_queue *q) 333 { 334 struct drm_gpu_scheduler *sched; 335 struct xe_execlist_exec_queue *exl; 336 struct xe_device *xe = gt_to_xe(q->gt); 337 int err; 338 339 xe_assert(xe, !xe_device_uc_enabled(xe)); 340 341 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n"); 342 343 exl = kzalloc(sizeof(*exl), GFP_KERNEL); 344 if (!exl) 345 return -ENOMEM; 346 347 exl->q = q; 348 349 err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1, 350 q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 351 XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT, 352 NULL, NULL, q->hwe->name, 353 gt_to_xe(q->gt)->drm.dev); 354 if (err) 355 goto err_free; 356 357 sched = &exl->sched; 358 err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL); 359 if (err) 360 goto err_sched; 361 362 exl->port = q->hwe->exl_port; 363 exl->has_run = false; 364 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; 365 q->execlist = exl; 366 q->entity = &exl->entity; 367 368 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); 369 370 return 0; 371 372 err_sched: 373 drm_sched_fini(&exl->sched); 374 err_free: 375 kfree(exl); 376 return err; 377 } 378 379 static void execlist_exec_queue_fini_async(struct work_struct *w) 380 { 381 struct xe_execlist_exec_queue *ee = 382 container_of(w, struct xe_execlist_exec_queue, fini_async); 383 struct xe_exec_queue *q = ee->q; 384 struct xe_execlist_exec_queue *exl = q->execlist; 385 struct xe_device *xe = gt_to_xe(q->gt); 386 unsigned long flags; 387 388 xe_assert(xe, !xe_device_uc_enabled(xe)); 389 390 spin_lock_irqsave(&exl->port->lock, flags); 391 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) 392 list_del(&exl->active_link); 393 spin_unlock_irqrestore(&exl->port->lock, flags); 394 395 drm_sched_entity_fini(&exl->entity); 396 drm_sched_fini(&exl->sched); 397 kfree(exl); 398 399 xe_exec_queue_fini(q); 400 } 401 402 static void execlist_exec_queue_kill(struct xe_exec_queue *q) 403 { 404 /* NIY */ 405 } 406 407 static void execlist_exec_queue_fini(struct xe_exec_queue *q) 408 { 409 INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async); 410 queue_work(system_unbound_wq, &q->execlist->fini_async); 411 } 412 413 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, 414 enum xe_exec_queue_priority priority) 415 { 416 /* NIY */ 417 return 0; 418 } 419 420 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) 421 { 422 /* NIY */ 423 return 0; 424 } 425 426 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, 427 u32 preempt_timeout_us) 428 { 429 /* NIY */ 430 return 0; 431 } 432 433 static int execlist_exec_queue_suspend(struct xe_exec_queue *q) 434 { 435 /* NIY */ 436 return 0; 437 } 438 439 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) 440 441 { 442 /* NIY */ 443 return 0; 444 } 445 446 static void execlist_exec_queue_resume(struct xe_exec_queue *q) 447 { 448 /* NIY */ 449 } 450 451 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) 452 { 453 /* NIY */ 454 return false; 455 } 456 457 static const struct xe_exec_queue_ops execlist_exec_queue_ops = { 458 .init = execlist_exec_queue_init, 459 .kill = execlist_exec_queue_kill, 460 .fini = execlist_exec_queue_fini, 461 .set_priority = execlist_exec_queue_set_priority, 462 .set_timeslice = execlist_exec_queue_set_timeslice, 463 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, 464 .suspend = execlist_exec_queue_suspend, 465 .suspend_wait = execlist_exec_queue_suspend_wait, 466 .resume = execlist_exec_queue_resume, 467 .reset_status = execlist_exec_queue_reset_status, 468 }; 469 470 int xe_execlist_init(struct xe_gt *gt) 471 { 472 /* GuC submission enabled, nothing to do */ 473 if (xe_device_uc_enabled(gt_to_xe(gt))) 474 return 0; 475 476 gt->exec_queue_ops = &execlist_exec_queue_ops; 477 478 return 0; 479 } 480