xref: /linux/drivers/gpu/drm/xe/xe_execlist.c (revision add452d09a38c7a7c44aea55c1015392cebf9fa7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_execlist.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "regs/xe_engine_regs.h"
12 #include "regs/xe_gt_regs.h"
13 #include "regs/xe_lrc_layout.h"
14 #include "xe_assert.h"
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_exec_queue.h"
18 #include "xe_gt.h"
19 #include "xe_hw_fence.h"
20 #include "xe_lrc.h"
21 #include "xe_macros.h"
22 #include "xe_mmio.h"
23 #include "xe_mocs.h"
24 #include "xe_ring_ops_types.h"
25 #include "xe_sched_job.h"
26 
27 #define XE_EXECLIST_HANG_LIMIT 1
28 
29 #define SW_CTX_ID_SHIFT 37
30 #define SW_CTX_ID_WIDTH 11
31 #define XEHP_SW_CTX_ID_SHIFT  39
32 #define XEHP_SW_CTX_ID_WIDTH  16
33 
34 #define SW_CTX_ID \
35 	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
36 		    SW_CTX_ID_SHIFT)
37 
38 #define XEHP_SW_CTX_ID \
39 	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
40 		    XEHP_SW_CTX_ID_SHIFT)
41 
42 
43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
44 			u32 ctx_id)
45 {
46 	struct xe_gt *gt = hwe->gt;
47 	struct xe_device *xe = gt_to_xe(gt);
48 	u64 lrc_desc;
49 
50 	lrc_desc = xe_lrc_descriptor(lrc);
51 
52 	if (GRAPHICS_VERx100(xe) >= 1250) {
53 		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
54 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
55 	} else {
56 		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
57 		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
58 	}
59 
60 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
61 		xe_mmio_write32(hwe->gt, RCU_MODE,
62 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
63 
64 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
65 	lrc->ring.old_tail = lrc->ring.tail;
66 
67 	/*
68 	 * Make sure the context image is complete before we submit it to HW.
69 	 *
70 	 * Ostensibly, writes (including the WCB) should be flushed prior to
71 	 * an uncached write such as our mmio register access, the empirical
72 	 * evidence (esp. on Braswell) suggests that the WC write into memory
73 	 * may not be visible to the HW prior to the completion of the UC
74 	 * register write and that we may begin execution from the context
75 	 * before its image is complete leading to invalid PD chasing.
76 	 */
77 	wmb();
78 
79 	xe_mmio_write32(gt, RING_HWS_PGA(hwe->mmio_base),
80 			xe_bo_ggtt_addr(hwe->hwsp));
81 	xe_mmio_read32(gt, RING_HWS_PGA(hwe->mmio_base));
82 	xe_mmio_write32(gt, RING_MODE(hwe->mmio_base),
83 			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
84 
85 	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
86 			lower_32_bits(lrc_desc));
87 	xe_mmio_write32(gt, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
88 			upper_32_bits(lrc_desc));
89 	xe_mmio_write32(gt, RING_EXECLIST_CONTROL(hwe->mmio_base),
90 			EL_CTRL_LOAD);
91 }
92 
93 static void __xe_execlist_port_start(struct xe_execlist_port *port,
94 				     struct xe_execlist_exec_queue *exl)
95 {
96 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
97 	int max_ctx = FIELD_MAX(SW_CTX_ID);
98 
99 	if (GRAPHICS_VERx100(xe) >= 1250)
100 		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
101 
102 	xe_execlist_port_assert_held(port);
103 
104 	if (port->running_exl != exl || !exl->has_run) {
105 		port->last_ctx_id++;
106 
107 		/* 0 is reserved for the kernel context */
108 		if (port->last_ctx_id > max_ctx)
109 			port->last_ctx_id = 1;
110 	}
111 
112 	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
113 	port->running_exl = exl;
114 	exl->has_run = true;
115 }
116 
117 static void __xe_execlist_port_idle(struct xe_execlist_port *port)
118 {
119 	u32 noop[2] = { MI_NOOP, MI_NOOP };
120 
121 	xe_execlist_port_assert_held(port);
122 
123 	if (!port->running_exl)
124 		return;
125 
126 	xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
127 	__start_lrc(port->hwe, port->lrc, 0);
128 	port->running_exl = NULL;
129 }
130 
131 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
132 {
133 	struct xe_lrc *lrc = exl->q->lrc[0];
134 
135 	return lrc->ring.tail == lrc->ring.old_tail;
136 }
137 
138 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
139 {
140 	struct xe_execlist_exec_queue *exl = NULL;
141 	int i;
142 
143 	xe_execlist_port_assert_held(port);
144 
145 	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
146 		while (!list_empty(&port->active[i])) {
147 			exl = list_first_entry(&port->active[i],
148 					       struct xe_execlist_exec_queue,
149 					       active_link);
150 			list_del(&exl->active_link);
151 
152 			if (xe_execlist_is_idle(exl)) {
153 				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
154 				continue;
155 			}
156 
157 			list_add_tail(&exl->active_link, &port->active[i]);
158 			__xe_execlist_port_start(port, exl);
159 			return;
160 		}
161 	}
162 
163 	__xe_execlist_port_idle(port);
164 }
165 
166 static u64 read_execlist_status(struct xe_hw_engine *hwe)
167 {
168 	struct xe_gt *gt = hwe->gt;
169 	u32 hi, lo;
170 
171 	lo = xe_mmio_read32(gt, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
172 	hi = xe_mmio_read32(gt, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
173 
174 	return lo | (u64)hi << 32;
175 }
176 
177 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
178 {
179 	u64 status;
180 
181 	xe_execlist_port_assert_held(port);
182 
183 	status = read_execlist_status(port->hwe);
184 	if (status & BIT(7))
185 		return;
186 
187 	__xe_execlist_port_start_next_active(port);
188 }
189 
190 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
191 					 u16 intr_vec)
192 {
193 	struct xe_execlist_port *port = hwe->exl_port;
194 
195 	spin_lock(&port->lock);
196 	xe_execlist_port_irq_handler_locked(port);
197 	spin_unlock(&port->lock);
198 }
199 
200 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
201 					 enum xe_exec_queue_priority priority)
202 {
203 	xe_execlist_port_assert_held(port);
204 
205 	if (port->running_exl && port->running_exl->active_priority >= priority)
206 		return;
207 
208 	__xe_execlist_port_start_next_active(port);
209 }
210 
211 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
212 {
213 	struct xe_execlist_port *port = exl->port;
214 	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
215 
216 	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
217 	XE_WARN_ON(priority < 0);
218 	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
219 
220 	spin_lock_irq(&port->lock);
221 
222 	if (exl->active_priority != priority &&
223 	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
224 		/* Priority changed, move it to the right list */
225 		list_del(&exl->active_link);
226 		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
227 	}
228 
229 	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
230 		exl->active_priority = priority;
231 		list_add_tail(&exl->active_link, &port->active[priority]);
232 	}
233 
234 	xe_execlist_port_wake_locked(exl->port, priority);
235 
236 	spin_unlock_irq(&port->lock);
237 }
238 
239 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
240 {
241 	struct xe_execlist_port *port =
242 		container_of(timer, struct xe_execlist_port, irq_fail);
243 
244 	spin_lock_irq(&port->lock);
245 	xe_execlist_port_irq_handler_locked(port);
246 	spin_unlock_irq(&port->lock);
247 
248 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
249 	add_timer(&port->irq_fail);
250 }
251 
252 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
253 						 struct xe_hw_engine *hwe)
254 {
255 	struct drm_device *drm = &xe->drm;
256 	struct xe_execlist_port *port;
257 	int i, err;
258 
259 	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
260 	if (!port) {
261 		err = -ENOMEM;
262 		goto err;
263 	}
264 
265 	port->hwe = hwe;
266 
267 	port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
268 	if (IS_ERR(port->lrc)) {
269 		err = PTR_ERR(port->lrc);
270 		goto err;
271 	}
272 
273 	spin_lock_init(&port->lock);
274 	for (i = 0; i < ARRAY_SIZE(port->active); i++)
275 		INIT_LIST_HEAD(&port->active[i]);
276 
277 	port->last_ctx_id = 1;
278 	port->running_exl = NULL;
279 
280 	hwe->irq_handler = xe_execlist_port_irq_handler;
281 
282 	/* TODO: Fix the interrupt code so it doesn't race like mad */
283 	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
284 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
285 	add_timer(&port->irq_fail);
286 
287 	return port;
288 
289 err:
290 	return ERR_PTR(err);
291 }
292 
293 void xe_execlist_port_destroy(struct xe_execlist_port *port)
294 {
295 	del_timer(&port->irq_fail);
296 
297 	/* Prevent an interrupt while we're destroying */
298 	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
299 	port->hwe->irq_handler = NULL;
300 	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
301 
302 	xe_lrc_put(port->lrc);
303 }
304 
305 static struct dma_fence *
306 execlist_run_job(struct drm_sched_job *drm_job)
307 {
308 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
309 	struct xe_exec_queue *q = job->q;
310 	struct xe_execlist_exec_queue *exl = job->q->execlist;
311 
312 	q->ring_ops->emit_job(job);
313 	xe_execlist_make_active(exl);
314 
315 	return dma_fence_get(job->fence);
316 }
317 
318 static void execlist_job_free(struct drm_sched_job *drm_job)
319 {
320 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
321 
322 	xe_exec_queue_update_run_ticks(job->q);
323 	xe_sched_job_put(job);
324 }
325 
326 static const struct drm_sched_backend_ops drm_sched_ops = {
327 	.run_job = execlist_run_job,
328 	.free_job = execlist_job_free,
329 };
330 
331 static int execlist_exec_queue_init(struct xe_exec_queue *q)
332 {
333 	struct drm_gpu_scheduler *sched;
334 	struct xe_execlist_exec_queue *exl;
335 	struct xe_device *xe = gt_to_xe(q->gt);
336 	int err;
337 
338 	xe_assert(xe, !xe_device_uc_enabled(xe));
339 
340 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
341 
342 	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
343 	if (!exl)
344 		return -ENOMEM;
345 
346 	exl->q = q;
347 
348 	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
349 			     q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
350 			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
351 			     NULL, NULL, q->hwe->name,
352 			     gt_to_xe(q->gt)->drm.dev);
353 	if (err)
354 		goto err_free;
355 
356 	sched = &exl->sched;
357 	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
358 	if (err)
359 		goto err_sched;
360 
361 	exl->port = q->hwe->exl_port;
362 	exl->has_run = false;
363 	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
364 	q->execlist = exl;
365 	q->entity = &exl->entity;
366 
367 	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
368 
369 	return 0;
370 
371 err_sched:
372 	drm_sched_fini(&exl->sched);
373 err_free:
374 	kfree(exl);
375 	return err;
376 }
377 
378 static void execlist_exec_queue_fini_async(struct work_struct *w)
379 {
380 	struct xe_execlist_exec_queue *ee =
381 		container_of(w, struct xe_execlist_exec_queue, fini_async);
382 	struct xe_exec_queue *q = ee->q;
383 	struct xe_execlist_exec_queue *exl = q->execlist;
384 	struct xe_device *xe = gt_to_xe(q->gt);
385 	unsigned long flags;
386 
387 	xe_assert(xe, !xe_device_uc_enabled(xe));
388 
389 	spin_lock_irqsave(&exl->port->lock, flags);
390 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
391 		list_del(&exl->active_link);
392 	spin_unlock_irqrestore(&exl->port->lock, flags);
393 
394 	drm_sched_entity_fini(&exl->entity);
395 	drm_sched_fini(&exl->sched);
396 	kfree(exl);
397 
398 	xe_exec_queue_fini(q);
399 }
400 
401 static void execlist_exec_queue_kill(struct xe_exec_queue *q)
402 {
403 	/* NIY */
404 }
405 
406 static void execlist_exec_queue_fini(struct xe_exec_queue *q)
407 {
408 	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
409 	queue_work(system_unbound_wq, &q->execlist->fini_async);
410 }
411 
412 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
413 					    enum xe_exec_queue_priority priority)
414 {
415 	/* NIY */
416 	return 0;
417 }
418 
419 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
420 {
421 	/* NIY */
422 	return 0;
423 }
424 
425 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
426 						   u32 preempt_timeout_us)
427 {
428 	/* NIY */
429 	return 0;
430 }
431 
432 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
433 {
434 	/* NIY */
435 	return 0;
436 }
437 
438 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
439 
440 {
441 	/* NIY */
442 	return 0;
443 }
444 
445 static void execlist_exec_queue_resume(struct xe_exec_queue *q)
446 {
447 	/* NIY */
448 }
449 
450 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
451 {
452 	/* NIY */
453 	return false;
454 }
455 
456 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
457 	.init = execlist_exec_queue_init,
458 	.kill = execlist_exec_queue_kill,
459 	.fini = execlist_exec_queue_fini,
460 	.set_priority = execlist_exec_queue_set_priority,
461 	.set_timeslice = execlist_exec_queue_set_timeslice,
462 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
463 	.suspend = execlist_exec_queue_suspend,
464 	.suspend_wait = execlist_exec_queue_suspend_wait,
465 	.resume = execlist_exec_queue_resume,
466 	.reset_status = execlist_exec_queue_reset_status,
467 };
468 
469 int xe_execlist_init(struct xe_gt *gt)
470 {
471 	/* GuC submission enabled, nothing to do */
472 	if (xe_device_uc_enabled(gt_to_xe(gt)))
473 		return 0;
474 
475 	gt->exec_queue_ops = &execlist_exec_queue_ops;
476 
477 	return 0;
478 }
479