xref: /linux/drivers/gpu/drm/xe/xe_execlist.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_execlist.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "regs/xe_engine_regs.h"
12 #include "regs/xe_gt_regs.h"
13 #include "regs/xe_lrc_layout.h"
14 #include "xe_assert.h"
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_exec_queue.h"
18 #include "xe_gt_types.h"
19 #include "xe_irq.h"
20 #include "xe_lrc.h"
21 #include "xe_macros.h"
22 #include "xe_mmio.h"
23 #include "xe_mocs.h"
24 #include "xe_ring_ops_types.h"
25 #include "xe_sched_job.h"
26 
27 #define XE_EXECLIST_HANG_LIMIT 1
28 
29 #define SW_CTX_ID_SHIFT 37
30 #define SW_CTX_ID_WIDTH 11
31 #define XEHP_SW_CTX_ID_SHIFT  39
32 #define XEHP_SW_CTX_ID_WIDTH  16
33 
34 #define SW_CTX_ID \
35 	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
36 		    SW_CTX_ID_SHIFT)
37 
38 #define XEHP_SW_CTX_ID \
39 	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
40 		    XEHP_SW_CTX_ID_SHIFT)
41 
42 
43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
44 			u32 ctx_id)
45 {
46 	struct xe_gt *gt = hwe->gt;
47 	struct xe_mmio *mmio = &gt->mmio;
48 	struct xe_device *xe = gt_to_xe(gt);
49 	u64 lrc_desc;
50 
51 	lrc_desc = xe_lrc_descriptor(lrc);
52 
53 	if (GRAPHICS_VERx100(xe) >= 1250) {
54 		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
55 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
56 	} else {
57 		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
58 		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
59 	}
60 
61 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
62 	lrc->ring.old_tail = lrc->ring.tail;
63 
64 	/*
65 	 * Make sure the context image is complete before we submit it to HW.
66 	 *
67 	 * Ostensibly, writes (including the WCB) should be flushed prior to
68 	 * an uncached write such as our mmio register access, the empirical
69 	 * evidence (esp. on Braswell) suggests that the WC write into memory
70 	 * may not be visible to the HW prior to the completion of the UC
71 	 * register write and that we may begin execution from the context
72 	 * before its image is complete leading to invalid PD chasing.
73 	 */
74 	wmb();
75 
76 	xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
77 			xe_bo_ggtt_addr(hwe->hwsp));
78 	xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
79 
80 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
81 			lower_32_bits(lrc_desc));
82 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
83 			upper_32_bits(lrc_desc));
84 	xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
85 			EL_CTRL_LOAD);
86 }
87 
88 static void __xe_execlist_port_start(struct xe_execlist_port *port,
89 				     struct xe_execlist_exec_queue *exl)
90 {
91 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
92 	int max_ctx = FIELD_MAX(SW_CTX_ID);
93 
94 	if (GRAPHICS_VERx100(xe) >= 1250)
95 		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
96 
97 	xe_execlist_port_assert_held(port);
98 
99 	if (port->running_exl != exl || !exl->has_run) {
100 		port->last_ctx_id++;
101 
102 		/* 0 is reserved for the kernel context */
103 		if (port->last_ctx_id > max_ctx)
104 			port->last_ctx_id = 1;
105 	}
106 
107 	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
108 	port->running_exl = exl;
109 	exl->has_run = true;
110 }
111 
112 static void __xe_execlist_port_idle(struct xe_execlist_port *port)
113 {
114 	u32 noop[2] = { MI_NOOP, MI_NOOP };
115 
116 	xe_execlist_port_assert_held(port);
117 
118 	if (!port->running_exl)
119 		return;
120 
121 	xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
122 	__start_lrc(port->hwe, port->lrc, 0);
123 	port->running_exl = NULL;
124 }
125 
126 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
127 {
128 	struct xe_lrc *lrc = exl->q->lrc[0];
129 
130 	return lrc->ring.tail == lrc->ring.old_tail;
131 }
132 
133 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
134 {
135 	struct xe_execlist_exec_queue *exl = NULL;
136 	int i;
137 
138 	xe_execlist_port_assert_held(port);
139 
140 	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
141 		while (!list_empty(&port->active[i])) {
142 			exl = list_first_entry(&port->active[i],
143 					       struct xe_execlist_exec_queue,
144 					       active_link);
145 			list_del(&exl->active_link);
146 
147 			if (xe_execlist_is_idle(exl)) {
148 				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
149 				continue;
150 			}
151 
152 			list_add_tail(&exl->active_link, &port->active[i]);
153 			__xe_execlist_port_start(port, exl);
154 			return;
155 		}
156 	}
157 
158 	__xe_execlist_port_idle(port);
159 }
160 
161 static u64 read_execlist_status(struct xe_hw_engine *hwe)
162 {
163 	struct xe_gt *gt = hwe->gt;
164 	u32 hi, lo;
165 
166 	lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
167 	hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
168 
169 	return lo | (u64)hi << 32;
170 }
171 
172 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
173 {
174 	u64 status;
175 
176 	xe_execlist_port_assert_held(port);
177 
178 	status = read_execlist_status(port->hwe);
179 	if (status & BIT(7))
180 		return;
181 
182 	__xe_execlist_port_start_next_active(port);
183 }
184 
185 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
186 					 u16 intr_vec)
187 {
188 	struct xe_execlist_port *port = hwe->exl_port;
189 
190 	spin_lock(&port->lock);
191 	xe_execlist_port_irq_handler_locked(port);
192 	spin_unlock(&port->lock);
193 }
194 
195 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
196 					 enum xe_exec_queue_priority priority)
197 {
198 	xe_execlist_port_assert_held(port);
199 
200 	if (port->running_exl && port->running_exl->active_priority >= priority)
201 		return;
202 
203 	__xe_execlist_port_start_next_active(port);
204 }
205 
206 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
207 {
208 	struct xe_execlist_port *port = exl->port;
209 	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
210 
211 	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
212 	XE_WARN_ON(priority < 0);
213 	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
214 
215 	spin_lock_irq(&port->lock);
216 
217 	if (exl->active_priority != priority &&
218 	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
219 		/* Priority changed, move it to the right list */
220 		list_del(&exl->active_link);
221 		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
222 	}
223 
224 	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
225 		exl->active_priority = priority;
226 		list_add_tail(&exl->active_link, &port->active[priority]);
227 	}
228 
229 	xe_execlist_port_wake_locked(exl->port, priority);
230 
231 	spin_unlock_irq(&port->lock);
232 }
233 
234 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
235 {
236 	struct xe_execlist_port *port =
237 		container_of(timer, struct xe_execlist_port, irq_fail);
238 
239 	spin_lock_irq(&port->lock);
240 	xe_execlist_port_irq_handler_locked(port);
241 	spin_unlock_irq(&port->lock);
242 
243 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
244 	add_timer(&port->irq_fail);
245 }
246 
247 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
248 						 struct xe_hw_engine *hwe)
249 {
250 	struct drm_device *drm = &xe->drm;
251 	struct xe_execlist_port *port;
252 	int i, err;
253 
254 	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
255 	if (!port) {
256 		err = -ENOMEM;
257 		goto err;
258 	}
259 
260 	port->hwe = hwe;
261 
262 	port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, 0);
263 	if (IS_ERR(port->lrc)) {
264 		err = PTR_ERR(port->lrc);
265 		goto err;
266 	}
267 
268 	spin_lock_init(&port->lock);
269 	for (i = 0; i < ARRAY_SIZE(port->active); i++)
270 		INIT_LIST_HEAD(&port->active[i]);
271 
272 	port->last_ctx_id = 1;
273 	port->running_exl = NULL;
274 
275 	hwe->irq_handler = xe_execlist_port_irq_handler;
276 
277 	/* TODO: Fix the interrupt code so it doesn't race like mad */
278 	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
279 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
280 	add_timer(&port->irq_fail);
281 
282 	return port;
283 
284 err:
285 	return ERR_PTR(err);
286 }
287 
288 void xe_execlist_port_destroy(struct xe_execlist_port *port)
289 {
290 	timer_delete(&port->irq_fail);
291 
292 	/* Prevent an interrupt while we're destroying */
293 	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
294 	port->hwe->irq_handler = NULL;
295 	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
296 
297 	xe_lrc_put(port->lrc);
298 }
299 
300 static struct dma_fence *
301 execlist_run_job(struct drm_sched_job *drm_job)
302 {
303 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
304 	struct xe_exec_queue *q = job->q;
305 	struct xe_execlist_exec_queue *exl = job->q->execlist;
306 
307 	q->ring_ops->emit_job(job);
308 	xe_execlist_make_active(exl);
309 
310 	return job->fence;
311 }
312 
313 static void execlist_job_free(struct drm_sched_job *drm_job)
314 {
315 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
316 
317 	xe_exec_queue_update_run_ticks(job->q);
318 	xe_sched_job_put(job);
319 }
320 
321 static const struct drm_sched_backend_ops drm_sched_ops = {
322 	.run_job = execlist_run_job,
323 	.free_job = execlist_job_free,
324 };
325 
326 static int execlist_exec_queue_init(struct xe_exec_queue *q)
327 {
328 	struct drm_gpu_scheduler *sched;
329 	const struct drm_sched_init_args args = {
330 		.ops = &drm_sched_ops,
331 		.credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES,
332 		.hang_limit = XE_SCHED_HANG_LIMIT,
333 		.timeout = XE_SCHED_JOB_TIMEOUT,
334 		.name = q->hwe->name,
335 		.dev = gt_to_xe(q->gt)->drm.dev,
336 	};
337 	struct xe_execlist_exec_queue *exl;
338 	struct xe_device *xe = gt_to_xe(q->gt);
339 	int err;
340 
341 	xe_assert(xe, !xe_device_uc_enabled(xe));
342 
343 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
344 
345 	exl = kzalloc_obj(*exl);
346 	if (!exl)
347 		return -ENOMEM;
348 
349 	exl->q = q;
350 
351 	err = drm_sched_init(&exl->sched, &args);
352 	if (err)
353 		goto err_free;
354 
355 	sched = &exl->sched;
356 	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
357 	if (err)
358 		goto err_sched;
359 
360 	exl->port = q->hwe->exl_port;
361 	exl->has_run = false;
362 	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
363 	q->execlist = exl;
364 	q->entity = &exl->entity;
365 
366 	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
367 
368 	return 0;
369 
370 err_sched:
371 	drm_sched_fini(&exl->sched);
372 err_free:
373 	kfree(exl);
374 	return err;
375 }
376 
377 static void execlist_exec_queue_fini(struct xe_exec_queue *q)
378 {
379 	struct xe_execlist_exec_queue *exl = q->execlist;
380 
381 	drm_sched_entity_fini(&exl->entity);
382 	drm_sched_fini(&exl->sched);
383 
384 	kfree(exl);
385 }
386 
387 static void execlist_exec_queue_destroy_async(struct work_struct *w)
388 {
389 	struct xe_execlist_exec_queue *ee =
390 		container_of(w, struct xe_execlist_exec_queue, destroy_async);
391 	struct xe_exec_queue *q = ee->q;
392 	struct xe_execlist_exec_queue *exl = q->execlist;
393 	struct xe_device *xe = gt_to_xe(q->gt);
394 	unsigned long flags;
395 
396 	xe_assert(xe, !xe_device_uc_enabled(xe));
397 
398 	spin_lock_irqsave(&exl->port->lock, flags);
399 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
400 		list_del(&exl->active_link);
401 	spin_unlock_irqrestore(&exl->port->lock, flags);
402 
403 	xe_exec_queue_fini(q);
404 }
405 
406 static void execlist_exec_queue_kill(struct xe_exec_queue *q)
407 {
408 	/* NIY */
409 }
410 
411 static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
412 {
413 	INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
414 	queue_work(system_dfl_wq, &q->execlist->destroy_async);
415 }
416 
417 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
418 					    enum xe_exec_queue_priority priority)
419 {
420 	/* NIY */
421 	return 0;
422 }
423 
424 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
425 {
426 	/* NIY */
427 	return 0;
428 }
429 
430 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
431 						   u32 preempt_timeout_us)
432 {
433 	/* NIY */
434 	return 0;
435 }
436 
437 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
438 {
439 	/* NIY */
440 	return 0;
441 }
442 
443 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
444 
445 {
446 	/* NIY */
447 	return 0;
448 }
449 
450 static void execlist_exec_queue_resume(struct xe_exec_queue *q)
451 {
452 	/* NIY */
453 }
454 
455 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
456 {
457 	/* NIY */
458 	return false;
459 }
460 
461 static bool execlist_exec_queue_active(struct xe_exec_queue *q)
462 {
463 	/* NIY */
464 	return false;
465 }
466 
467 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
468 	.init = execlist_exec_queue_init,
469 	.kill = execlist_exec_queue_kill,
470 	.fini = execlist_exec_queue_fini,
471 	.destroy = execlist_exec_queue_destroy,
472 	.set_priority = execlist_exec_queue_set_priority,
473 	.set_timeslice = execlist_exec_queue_set_timeslice,
474 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
475 	.suspend = execlist_exec_queue_suspend,
476 	.suspend_wait = execlist_exec_queue_suspend_wait,
477 	.resume = execlist_exec_queue_resume,
478 	.reset_status = execlist_exec_queue_reset_status,
479 	.active = execlist_exec_queue_active,
480 };
481 
482 int xe_execlist_init(struct xe_gt *gt)
483 {
484 	/* GuC submission enabled, nothing to do */
485 	if (xe_device_uc_enabled(gt_to_xe(gt)))
486 		return 0;
487 
488 	gt->exec_queue_ops = &execlist_exec_queue_ops;
489 
490 	return 0;
491 }
492