xref: /linux/drivers/gpu/drm/xe/xe_execlist.c (revision 16cd1c2657762c62a00ac78eecaa25868f7e601b)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_execlist.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "regs/xe_engine_regs.h"
12 #include "regs/xe_gt_regs.h"
13 #include "regs/xe_lrc_layout.h"
14 #include "xe_assert.h"
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_exec_queue.h"
18 #include "xe_gt.h"
19 #include "xe_hw_fence.h"
20 #include "xe_irq.h"
21 #include "xe_lrc.h"
22 #include "xe_macros.h"
23 #include "xe_mmio.h"
24 #include "xe_mocs.h"
25 #include "xe_ring_ops_types.h"
26 #include "xe_sched_job.h"
27 
28 #define XE_EXECLIST_HANG_LIMIT 1
29 
30 #define SW_CTX_ID_SHIFT 37
31 #define SW_CTX_ID_WIDTH 11
32 #define XEHP_SW_CTX_ID_SHIFT  39
33 #define XEHP_SW_CTX_ID_WIDTH  16
34 
35 #define SW_CTX_ID \
36 	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
37 		    SW_CTX_ID_SHIFT)
38 
39 #define XEHP_SW_CTX_ID \
40 	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
41 		    XEHP_SW_CTX_ID_SHIFT)
42 
43 
__start_lrc(struct xe_hw_engine * hwe,struct xe_lrc * lrc,u32 ctx_id)44 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
45 			u32 ctx_id)
46 {
47 	struct xe_gt *gt = hwe->gt;
48 	struct xe_mmio *mmio = &gt->mmio;
49 	struct xe_device *xe = gt_to_xe(gt);
50 	u64 lrc_desc;
51 	u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
52 
53 	lrc_desc = xe_lrc_descriptor(lrc);
54 
55 	if (GRAPHICS_VERx100(xe) >= 1250) {
56 		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
57 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
58 	} else {
59 		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
60 		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
61 	}
62 
63 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
64 		xe_mmio_write32(mmio, RCU_MODE,
65 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
66 
67 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
68 	lrc->ring.old_tail = lrc->ring.tail;
69 
70 	/*
71 	 * Make sure the context image is complete before we submit it to HW.
72 	 *
73 	 * Ostensibly, writes (including the WCB) should be flushed prior to
74 	 * an uncached write such as our mmio register access, the empirical
75 	 * evidence (esp. on Braswell) suggests that the WC write into memory
76 	 * may not be visible to the HW prior to the completion of the UC
77 	 * register write and that we may begin execution from the context
78 	 * before its image is complete leading to invalid PD chasing.
79 	 */
80 	wmb();
81 
82 	xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
83 			xe_bo_ggtt_addr(hwe->hwsp));
84 	xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
85 
86 	if (xe_device_has_msix(gt_to_xe(hwe->gt)))
87 		ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
88 	xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode);
89 
90 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
91 			lower_32_bits(lrc_desc));
92 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
93 			upper_32_bits(lrc_desc));
94 	xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
95 			EL_CTRL_LOAD);
96 }
97 
__xe_execlist_port_start(struct xe_execlist_port * port,struct xe_execlist_exec_queue * exl)98 static void __xe_execlist_port_start(struct xe_execlist_port *port,
99 				     struct xe_execlist_exec_queue *exl)
100 {
101 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
102 	int max_ctx = FIELD_MAX(SW_CTX_ID);
103 
104 	if (GRAPHICS_VERx100(xe) >= 1250)
105 		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
106 
107 	xe_execlist_port_assert_held(port);
108 
109 	if (port->running_exl != exl || !exl->has_run) {
110 		port->last_ctx_id++;
111 
112 		/* 0 is reserved for the kernel context */
113 		if (port->last_ctx_id > max_ctx)
114 			port->last_ctx_id = 1;
115 	}
116 
117 	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
118 	port->running_exl = exl;
119 	exl->has_run = true;
120 }
121 
__xe_execlist_port_idle(struct xe_execlist_port * port)122 static void __xe_execlist_port_idle(struct xe_execlist_port *port)
123 {
124 	u32 noop[2] = { MI_NOOP, MI_NOOP };
125 
126 	xe_execlist_port_assert_held(port);
127 
128 	if (!port->running_exl)
129 		return;
130 
131 	xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
132 	__start_lrc(port->hwe, port->lrc, 0);
133 	port->running_exl = NULL;
134 }
135 
xe_execlist_is_idle(struct xe_execlist_exec_queue * exl)136 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
137 {
138 	struct xe_lrc *lrc = exl->q->lrc[0];
139 
140 	return lrc->ring.tail == lrc->ring.old_tail;
141 }
142 
__xe_execlist_port_start_next_active(struct xe_execlist_port * port)143 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
144 {
145 	struct xe_execlist_exec_queue *exl = NULL;
146 	int i;
147 
148 	xe_execlist_port_assert_held(port);
149 
150 	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
151 		while (!list_empty(&port->active[i])) {
152 			exl = list_first_entry(&port->active[i],
153 					       struct xe_execlist_exec_queue,
154 					       active_link);
155 			list_del(&exl->active_link);
156 
157 			if (xe_execlist_is_idle(exl)) {
158 				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
159 				continue;
160 			}
161 
162 			list_add_tail(&exl->active_link, &port->active[i]);
163 			__xe_execlist_port_start(port, exl);
164 			return;
165 		}
166 	}
167 
168 	__xe_execlist_port_idle(port);
169 }
170 
read_execlist_status(struct xe_hw_engine * hwe)171 static u64 read_execlist_status(struct xe_hw_engine *hwe)
172 {
173 	struct xe_gt *gt = hwe->gt;
174 	u32 hi, lo;
175 
176 	lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
177 	hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
178 
179 	return lo | (u64)hi << 32;
180 }
181 
xe_execlist_port_irq_handler_locked(struct xe_execlist_port * port)182 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
183 {
184 	u64 status;
185 
186 	xe_execlist_port_assert_held(port);
187 
188 	status = read_execlist_status(port->hwe);
189 	if (status & BIT(7))
190 		return;
191 
192 	__xe_execlist_port_start_next_active(port);
193 }
194 
xe_execlist_port_irq_handler(struct xe_hw_engine * hwe,u16 intr_vec)195 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
196 					 u16 intr_vec)
197 {
198 	struct xe_execlist_port *port = hwe->exl_port;
199 
200 	spin_lock(&port->lock);
201 	xe_execlist_port_irq_handler_locked(port);
202 	spin_unlock(&port->lock);
203 }
204 
xe_execlist_port_wake_locked(struct xe_execlist_port * port,enum xe_exec_queue_priority priority)205 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
206 					 enum xe_exec_queue_priority priority)
207 {
208 	xe_execlist_port_assert_held(port);
209 
210 	if (port->running_exl && port->running_exl->active_priority >= priority)
211 		return;
212 
213 	__xe_execlist_port_start_next_active(port);
214 }
215 
xe_execlist_make_active(struct xe_execlist_exec_queue * exl)216 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
217 {
218 	struct xe_execlist_port *port = exl->port;
219 	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
220 
221 	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
222 	XE_WARN_ON(priority < 0);
223 	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
224 
225 	spin_lock_irq(&port->lock);
226 
227 	if (exl->active_priority != priority &&
228 	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
229 		/* Priority changed, move it to the right list */
230 		list_del(&exl->active_link);
231 		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
232 	}
233 
234 	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
235 		exl->active_priority = priority;
236 		list_add_tail(&exl->active_link, &port->active[priority]);
237 	}
238 
239 	xe_execlist_port_wake_locked(exl->port, priority);
240 
241 	spin_unlock_irq(&port->lock);
242 }
243 
xe_execlist_port_irq_fail_timer(struct timer_list * timer)244 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
245 {
246 	struct xe_execlist_port *port =
247 		container_of(timer, struct xe_execlist_port, irq_fail);
248 
249 	spin_lock_irq(&port->lock);
250 	xe_execlist_port_irq_handler_locked(port);
251 	spin_unlock_irq(&port->lock);
252 
253 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
254 	add_timer(&port->irq_fail);
255 }
256 
xe_execlist_port_create(struct xe_device * xe,struct xe_hw_engine * hwe)257 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
258 						 struct xe_hw_engine *hwe)
259 {
260 	struct drm_device *drm = &xe->drm;
261 	struct xe_execlist_port *port;
262 	int i, err;
263 
264 	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
265 	if (!port) {
266 		err = -ENOMEM;
267 		goto err;
268 	}
269 
270 	port->hwe = hwe;
271 
272 	port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
273 	if (IS_ERR(port->lrc)) {
274 		err = PTR_ERR(port->lrc);
275 		goto err;
276 	}
277 
278 	spin_lock_init(&port->lock);
279 	for (i = 0; i < ARRAY_SIZE(port->active); i++)
280 		INIT_LIST_HEAD(&port->active[i]);
281 
282 	port->last_ctx_id = 1;
283 	port->running_exl = NULL;
284 
285 	hwe->irq_handler = xe_execlist_port_irq_handler;
286 
287 	/* TODO: Fix the interrupt code so it doesn't race like mad */
288 	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
289 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
290 	add_timer(&port->irq_fail);
291 
292 	return port;
293 
294 err:
295 	return ERR_PTR(err);
296 }
297 
xe_execlist_port_destroy(struct xe_execlist_port * port)298 void xe_execlist_port_destroy(struct xe_execlist_port *port)
299 {
300 	timer_delete(&port->irq_fail);
301 
302 	/* Prevent an interrupt while we're destroying */
303 	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
304 	port->hwe->irq_handler = NULL;
305 	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
306 
307 	xe_lrc_put(port->lrc);
308 }
309 
310 static struct dma_fence *
execlist_run_job(struct drm_sched_job * drm_job)311 execlist_run_job(struct drm_sched_job *drm_job)
312 {
313 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
314 	struct xe_exec_queue *q = job->q;
315 	struct xe_execlist_exec_queue *exl = job->q->execlist;
316 
317 	q->ring_ops->emit_job(job);
318 	xe_execlist_make_active(exl);
319 
320 	return job->fence;
321 }
322 
execlist_job_free(struct drm_sched_job * drm_job)323 static void execlist_job_free(struct drm_sched_job *drm_job)
324 {
325 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
326 
327 	xe_exec_queue_update_run_ticks(job->q);
328 	xe_sched_job_put(job);
329 }
330 
331 static const struct drm_sched_backend_ops drm_sched_ops = {
332 	.run_job = execlist_run_job,
333 	.free_job = execlist_job_free,
334 };
335 
execlist_exec_queue_init(struct xe_exec_queue * q)336 static int execlist_exec_queue_init(struct xe_exec_queue *q)
337 {
338 	struct drm_gpu_scheduler *sched;
339 	const struct drm_sched_init_args args = {
340 		.ops = &drm_sched_ops,
341 		.num_rqs = 1,
342 		.credit_limit = q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
343 		.hang_limit = XE_SCHED_HANG_LIMIT,
344 		.timeout = XE_SCHED_JOB_TIMEOUT,
345 		.name = q->hwe->name,
346 		.dev = gt_to_xe(q->gt)->drm.dev,
347 	};
348 	struct xe_execlist_exec_queue *exl;
349 	struct xe_device *xe = gt_to_xe(q->gt);
350 	int err;
351 
352 	xe_assert(xe, !xe_device_uc_enabled(xe));
353 
354 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
355 
356 	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
357 	if (!exl)
358 		return -ENOMEM;
359 
360 	exl->q = q;
361 
362 	err = drm_sched_init(&exl->sched, &args);
363 	if (err)
364 		goto err_free;
365 
366 	sched = &exl->sched;
367 	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
368 	if (err)
369 		goto err_sched;
370 
371 	exl->port = q->hwe->exl_port;
372 	exl->has_run = false;
373 	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
374 	q->execlist = exl;
375 	q->entity = &exl->entity;
376 
377 	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
378 
379 	return 0;
380 
381 err_sched:
382 	drm_sched_fini(&exl->sched);
383 err_free:
384 	kfree(exl);
385 	return err;
386 }
387 
execlist_exec_queue_fini_async(struct work_struct * w)388 static void execlist_exec_queue_fini_async(struct work_struct *w)
389 {
390 	struct xe_execlist_exec_queue *ee =
391 		container_of(w, struct xe_execlist_exec_queue, fini_async);
392 	struct xe_exec_queue *q = ee->q;
393 	struct xe_execlist_exec_queue *exl = q->execlist;
394 	struct xe_device *xe = gt_to_xe(q->gt);
395 	unsigned long flags;
396 
397 	xe_assert(xe, !xe_device_uc_enabled(xe));
398 
399 	spin_lock_irqsave(&exl->port->lock, flags);
400 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
401 		list_del(&exl->active_link);
402 	spin_unlock_irqrestore(&exl->port->lock, flags);
403 
404 	drm_sched_entity_fini(&exl->entity);
405 	drm_sched_fini(&exl->sched);
406 	kfree(exl);
407 
408 	xe_exec_queue_fini(q);
409 }
410 
execlist_exec_queue_kill(struct xe_exec_queue * q)411 static void execlist_exec_queue_kill(struct xe_exec_queue *q)
412 {
413 	/* NIY */
414 }
415 
execlist_exec_queue_fini(struct xe_exec_queue * q)416 static void execlist_exec_queue_fini(struct xe_exec_queue *q)
417 {
418 	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
419 	queue_work(system_unbound_wq, &q->execlist->fini_async);
420 }
421 
execlist_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)422 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
423 					    enum xe_exec_queue_priority priority)
424 {
425 	/* NIY */
426 	return 0;
427 }
428 
execlist_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)429 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
430 {
431 	/* NIY */
432 	return 0;
433 }
434 
execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)435 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
436 						   u32 preempt_timeout_us)
437 {
438 	/* NIY */
439 	return 0;
440 }
441 
execlist_exec_queue_suspend(struct xe_exec_queue * q)442 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
443 {
444 	/* NIY */
445 	return 0;
446 }
447 
execlist_exec_queue_suspend_wait(struct xe_exec_queue * q)448 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
449 
450 {
451 	/* NIY */
452 	return 0;
453 }
454 
execlist_exec_queue_resume(struct xe_exec_queue * q)455 static void execlist_exec_queue_resume(struct xe_exec_queue *q)
456 {
457 	/* NIY */
458 }
459 
execlist_exec_queue_reset_status(struct xe_exec_queue * q)460 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
461 {
462 	/* NIY */
463 	return false;
464 }
465 
466 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
467 	.init = execlist_exec_queue_init,
468 	.kill = execlist_exec_queue_kill,
469 	.fini = execlist_exec_queue_fini,
470 	.set_priority = execlist_exec_queue_set_priority,
471 	.set_timeslice = execlist_exec_queue_set_timeslice,
472 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
473 	.suspend = execlist_exec_queue_suspend,
474 	.suspend_wait = execlist_exec_queue_suspend_wait,
475 	.resume = execlist_exec_queue_resume,
476 	.reset_status = execlist_exec_queue_reset_status,
477 };
478 
xe_execlist_init(struct xe_gt * gt)479 int xe_execlist_init(struct xe_gt *gt)
480 {
481 	/* GuC submission enabled, nothing to do */
482 	if (xe_device_uc_enabled(gt_to_xe(gt)))
483 		return 0;
484 
485 	gt->exec_queue_ops = &execlist_exec_queue_ops;
486 
487 	return 0;
488 }
489