xref: /linux/drivers/gpu/drm/xe/xe_execlist.c (revision bf4afc53b77aeaa48b5409da5c8da6bb4eff7f43)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_execlist.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "regs/xe_engine_regs.h"
12 #include "regs/xe_gt_regs.h"
13 #include "regs/xe_lrc_layout.h"
14 #include "xe_assert.h"
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_exec_queue.h"
18 #include "xe_gt_types.h"
19 #include "xe_irq.h"
20 #include "xe_lrc.h"
21 #include "xe_macros.h"
22 #include "xe_mmio.h"
23 #include "xe_mocs.h"
24 #include "xe_ring_ops_types.h"
25 #include "xe_sched_job.h"
26 
27 #define XE_EXECLIST_HANG_LIMIT 1
28 
29 #define SW_CTX_ID_SHIFT 37
30 #define SW_CTX_ID_WIDTH 11
31 #define XEHP_SW_CTX_ID_SHIFT  39
32 #define XEHP_SW_CTX_ID_WIDTH  16
33 
34 #define SW_CTX_ID \
35 	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
36 		    SW_CTX_ID_SHIFT)
37 
38 #define XEHP_SW_CTX_ID \
39 	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
40 		    XEHP_SW_CTX_ID_SHIFT)
41 
42 
__start_lrc(struct xe_hw_engine * hwe,struct xe_lrc * lrc,u32 ctx_id)43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
44 			u32 ctx_id)
45 {
46 	struct xe_gt *gt = hwe->gt;
47 	struct xe_mmio *mmio = &gt->mmio;
48 	struct xe_device *xe = gt_to_xe(gt);
49 	u64 lrc_desc;
50 	u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
51 
52 	lrc_desc = xe_lrc_descriptor(lrc);
53 
54 	if (GRAPHICS_VERx100(xe) >= 1250) {
55 		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
56 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
57 	} else {
58 		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
59 		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
60 	}
61 
62 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
63 		xe_mmio_write32(mmio, RCU_MODE,
64 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
65 
66 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
67 	lrc->ring.old_tail = lrc->ring.tail;
68 
69 	/*
70 	 * Make sure the context image is complete before we submit it to HW.
71 	 *
72 	 * Ostensibly, writes (including the WCB) should be flushed prior to
73 	 * an uncached write such as our mmio register access, the empirical
74 	 * evidence (esp. on Braswell) suggests that the WC write into memory
75 	 * may not be visible to the HW prior to the completion of the UC
76 	 * register write and that we may begin execution from the context
77 	 * before its image is complete leading to invalid PD chasing.
78 	 */
79 	wmb();
80 
81 	xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
82 			xe_bo_ggtt_addr(hwe->hwsp));
83 	xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
84 
85 	if (xe_device_has_msix(gt_to_xe(hwe->gt)))
86 		ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
87 	xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), ring_mode);
88 
89 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
90 			lower_32_bits(lrc_desc));
91 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
92 			upper_32_bits(lrc_desc));
93 	xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
94 			EL_CTRL_LOAD);
95 }
96 
__xe_execlist_port_start(struct xe_execlist_port * port,struct xe_execlist_exec_queue * exl)97 static void __xe_execlist_port_start(struct xe_execlist_port *port,
98 				     struct xe_execlist_exec_queue *exl)
99 {
100 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
101 	int max_ctx = FIELD_MAX(SW_CTX_ID);
102 
103 	if (GRAPHICS_VERx100(xe) >= 1250)
104 		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
105 
106 	xe_execlist_port_assert_held(port);
107 
108 	if (port->running_exl != exl || !exl->has_run) {
109 		port->last_ctx_id++;
110 
111 		/* 0 is reserved for the kernel context */
112 		if (port->last_ctx_id > max_ctx)
113 			port->last_ctx_id = 1;
114 	}
115 
116 	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
117 	port->running_exl = exl;
118 	exl->has_run = true;
119 }
120 
__xe_execlist_port_idle(struct xe_execlist_port * port)121 static void __xe_execlist_port_idle(struct xe_execlist_port *port)
122 {
123 	u32 noop[2] = { MI_NOOP, MI_NOOP };
124 
125 	xe_execlist_port_assert_held(port);
126 
127 	if (!port->running_exl)
128 		return;
129 
130 	xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
131 	__start_lrc(port->hwe, port->lrc, 0);
132 	port->running_exl = NULL;
133 }
134 
xe_execlist_is_idle(struct xe_execlist_exec_queue * exl)135 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
136 {
137 	struct xe_lrc *lrc = exl->q->lrc[0];
138 
139 	return lrc->ring.tail == lrc->ring.old_tail;
140 }
141 
__xe_execlist_port_start_next_active(struct xe_execlist_port * port)142 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
143 {
144 	struct xe_execlist_exec_queue *exl = NULL;
145 	int i;
146 
147 	xe_execlist_port_assert_held(port);
148 
149 	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
150 		while (!list_empty(&port->active[i])) {
151 			exl = list_first_entry(&port->active[i],
152 					       struct xe_execlist_exec_queue,
153 					       active_link);
154 			list_del(&exl->active_link);
155 
156 			if (xe_execlist_is_idle(exl)) {
157 				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
158 				continue;
159 			}
160 
161 			list_add_tail(&exl->active_link, &port->active[i]);
162 			__xe_execlist_port_start(port, exl);
163 			return;
164 		}
165 	}
166 
167 	__xe_execlist_port_idle(port);
168 }
169 
read_execlist_status(struct xe_hw_engine * hwe)170 static u64 read_execlist_status(struct xe_hw_engine *hwe)
171 {
172 	struct xe_gt *gt = hwe->gt;
173 	u32 hi, lo;
174 
175 	lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
176 	hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
177 
178 	return lo | (u64)hi << 32;
179 }
180 
xe_execlist_port_irq_handler_locked(struct xe_execlist_port * port)181 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
182 {
183 	u64 status;
184 
185 	xe_execlist_port_assert_held(port);
186 
187 	status = read_execlist_status(port->hwe);
188 	if (status & BIT(7))
189 		return;
190 
191 	__xe_execlist_port_start_next_active(port);
192 }
193 
xe_execlist_port_irq_handler(struct xe_hw_engine * hwe,u16 intr_vec)194 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
195 					 u16 intr_vec)
196 {
197 	struct xe_execlist_port *port = hwe->exl_port;
198 
199 	spin_lock(&port->lock);
200 	xe_execlist_port_irq_handler_locked(port);
201 	spin_unlock(&port->lock);
202 }
203 
xe_execlist_port_wake_locked(struct xe_execlist_port * port,enum xe_exec_queue_priority priority)204 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
205 					 enum xe_exec_queue_priority priority)
206 {
207 	xe_execlist_port_assert_held(port);
208 
209 	if (port->running_exl && port->running_exl->active_priority >= priority)
210 		return;
211 
212 	__xe_execlist_port_start_next_active(port);
213 }
214 
xe_execlist_make_active(struct xe_execlist_exec_queue * exl)215 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
216 {
217 	struct xe_execlist_port *port = exl->port;
218 	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
219 
220 	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
221 	XE_WARN_ON(priority < 0);
222 	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
223 
224 	spin_lock_irq(&port->lock);
225 
226 	if (exl->active_priority != priority &&
227 	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
228 		/* Priority changed, move it to the right list */
229 		list_del(&exl->active_link);
230 		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
231 	}
232 
233 	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
234 		exl->active_priority = priority;
235 		list_add_tail(&exl->active_link, &port->active[priority]);
236 	}
237 
238 	xe_execlist_port_wake_locked(exl->port, priority);
239 
240 	spin_unlock_irq(&port->lock);
241 }
242 
xe_execlist_port_irq_fail_timer(struct timer_list * timer)243 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
244 {
245 	struct xe_execlist_port *port =
246 		container_of(timer, struct xe_execlist_port, irq_fail);
247 
248 	spin_lock_irq(&port->lock);
249 	xe_execlist_port_irq_handler_locked(port);
250 	spin_unlock_irq(&port->lock);
251 
252 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
253 	add_timer(&port->irq_fail);
254 }
255 
xe_execlist_port_create(struct xe_device * xe,struct xe_hw_engine * hwe)256 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
257 						 struct xe_hw_engine *hwe)
258 {
259 	struct drm_device *drm = &xe->drm;
260 	struct xe_execlist_port *port;
261 	int i, err;
262 
263 	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
264 	if (!port) {
265 		err = -ENOMEM;
266 		goto err;
267 	}
268 
269 	port->hwe = hwe;
270 
271 	port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
272 	if (IS_ERR(port->lrc)) {
273 		err = PTR_ERR(port->lrc);
274 		goto err;
275 	}
276 
277 	spin_lock_init(&port->lock);
278 	for (i = 0; i < ARRAY_SIZE(port->active); i++)
279 		INIT_LIST_HEAD(&port->active[i]);
280 
281 	port->last_ctx_id = 1;
282 	port->running_exl = NULL;
283 
284 	hwe->irq_handler = xe_execlist_port_irq_handler;
285 
286 	/* TODO: Fix the interrupt code so it doesn't race like mad */
287 	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
288 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
289 	add_timer(&port->irq_fail);
290 
291 	return port;
292 
293 err:
294 	return ERR_PTR(err);
295 }
296 
xe_execlist_port_destroy(struct xe_execlist_port * port)297 void xe_execlist_port_destroy(struct xe_execlist_port *port)
298 {
299 	timer_delete(&port->irq_fail);
300 
301 	/* Prevent an interrupt while we're destroying */
302 	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
303 	port->hwe->irq_handler = NULL;
304 	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
305 
306 	xe_lrc_put(port->lrc);
307 }
308 
309 static struct dma_fence *
execlist_run_job(struct drm_sched_job * drm_job)310 execlist_run_job(struct drm_sched_job *drm_job)
311 {
312 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
313 	struct xe_exec_queue *q = job->q;
314 	struct xe_execlist_exec_queue *exl = job->q->execlist;
315 
316 	q->ring_ops->emit_job(job);
317 	xe_execlist_make_active(exl);
318 
319 	return job->fence;
320 }
321 
execlist_job_free(struct drm_sched_job * drm_job)322 static void execlist_job_free(struct drm_sched_job *drm_job)
323 {
324 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
325 
326 	xe_exec_queue_update_run_ticks(job->q);
327 	xe_sched_job_put(job);
328 }
329 
330 static const struct drm_sched_backend_ops drm_sched_ops = {
331 	.run_job = execlist_run_job,
332 	.free_job = execlist_job_free,
333 };
334 
execlist_exec_queue_init(struct xe_exec_queue * q)335 static int execlist_exec_queue_init(struct xe_exec_queue *q)
336 {
337 	struct drm_gpu_scheduler *sched;
338 	const struct drm_sched_init_args args = {
339 		.ops = &drm_sched_ops,
340 		.num_rqs = 1,
341 		.credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES,
342 		.hang_limit = XE_SCHED_HANG_LIMIT,
343 		.timeout = XE_SCHED_JOB_TIMEOUT,
344 		.name = q->hwe->name,
345 		.dev = gt_to_xe(q->gt)->drm.dev,
346 	};
347 	struct xe_execlist_exec_queue *exl;
348 	struct xe_device *xe = gt_to_xe(q->gt);
349 	int err;
350 
351 	xe_assert(xe, !xe_device_uc_enabled(xe));
352 
353 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
354 
355 	exl = kzalloc_obj(*exl);
356 	if (!exl)
357 		return -ENOMEM;
358 
359 	exl->q = q;
360 
361 	err = drm_sched_init(&exl->sched, &args);
362 	if (err)
363 		goto err_free;
364 
365 	sched = &exl->sched;
366 	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
367 	if (err)
368 		goto err_sched;
369 
370 	exl->port = q->hwe->exl_port;
371 	exl->has_run = false;
372 	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
373 	q->execlist = exl;
374 	q->entity = &exl->entity;
375 
376 	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
377 
378 	return 0;
379 
380 err_sched:
381 	drm_sched_fini(&exl->sched);
382 err_free:
383 	kfree(exl);
384 	return err;
385 }
386 
execlist_exec_queue_fini(struct xe_exec_queue * q)387 static void execlist_exec_queue_fini(struct xe_exec_queue *q)
388 {
389 	struct xe_execlist_exec_queue *exl = q->execlist;
390 
391 	drm_sched_entity_fini(&exl->entity);
392 	drm_sched_fini(&exl->sched);
393 
394 	kfree(exl);
395 }
396 
execlist_exec_queue_destroy_async(struct work_struct * w)397 static void execlist_exec_queue_destroy_async(struct work_struct *w)
398 {
399 	struct xe_execlist_exec_queue *ee =
400 		container_of(w, struct xe_execlist_exec_queue, destroy_async);
401 	struct xe_exec_queue *q = ee->q;
402 	struct xe_execlist_exec_queue *exl = q->execlist;
403 	struct xe_device *xe = gt_to_xe(q->gt);
404 	unsigned long flags;
405 
406 	xe_assert(xe, !xe_device_uc_enabled(xe));
407 
408 	spin_lock_irqsave(&exl->port->lock, flags);
409 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
410 		list_del(&exl->active_link);
411 	spin_unlock_irqrestore(&exl->port->lock, flags);
412 
413 	xe_exec_queue_fini(q);
414 }
415 
execlist_exec_queue_kill(struct xe_exec_queue * q)416 static void execlist_exec_queue_kill(struct xe_exec_queue *q)
417 {
418 	/* NIY */
419 }
420 
execlist_exec_queue_destroy(struct xe_exec_queue * q)421 static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
422 {
423 	INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
424 	queue_work(system_unbound_wq, &q->execlist->destroy_async);
425 }
426 
execlist_exec_queue_set_priority(struct xe_exec_queue * q,enum xe_exec_queue_priority priority)427 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
428 					    enum xe_exec_queue_priority priority)
429 {
430 	/* NIY */
431 	return 0;
432 }
433 
execlist_exec_queue_set_timeslice(struct xe_exec_queue * q,u32 timeslice_us)434 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
435 {
436 	/* NIY */
437 	return 0;
438 }
439 
execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue * q,u32 preempt_timeout_us)440 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
441 						   u32 preempt_timeout_us)
442 {
443 	/* NIY */
444 	return 0;
445 }
446 
execlist_exec_queue_suspend(struct xe_exec_queue * q)447 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
448 {
449 	/* NIY */
450 	return 0;
451 }
452 
execlist_exec_queue_suspend_wait(struct xe_exec_queue * q)453 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
454 
455 {
456 	/* NIY */
457 	return 0;
458 }
459 
execlist_exec_queue_resume(struct xe_exec_queue * q)460 static void execlist_exec_queue_resume(struct xe_exec_queue *q)
461 {
462 	/* NIY */
463 }
464 
execlist_exec_queue_reset_status(struct xe_exec_queue * q)465 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
466 {
467 	/* NIY */
468 	return false;
469 }
470 
471 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
472 	.init = execlist_exec_queue_init,
473 	.kill = execlist_exec_queue_kill,
474 	.fini = execlist_exec_queue_fini,
475 	.destroy = execlist_exec_queue_destroy,
476 	.set_priority = execlist_exec_queue_set_priority,
477 	.set_timeslice = execlist_exec_queue_set_timeslice,
478 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
479 	.suspend = execlist_exec_queue_suspend,
480 	.suspend_wait = execlist_exec_queue_suspend_wait,
481 	.resume = execlist_exec_queue_resume,
482 	.reset_status = execlist_exec_queue_reset_status,
483 };
484 
xe_execlist_init(struct xe_gt * gt)485 int xe_execlist_init(struct xe_gt *gt)
486 {
487 	/* GuC submission enabled, nothing to do */
488 	if (xe_device_uc_enabled(gt_to_xe(gt)))
489 		return 0;
490 
491 	gt->exec_queue_ops = &execlist_exec_queue_ops;
492 
493 	return 0;
494 }
495