xref: /linux/drivers/gpu/drm/xe/xe_execlist.c (revision 7f4f3b14e8079ecde096bd734af10e30d40c27b7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include "xe_execlist.h"
7 
8 #include <drm/drm_managed.h>
9 
10 #include "instructions/xe_mi_commands.h"
11 #include "regs/xe_engine_regs.h"
12 #include "regs/xe_gt_regs.h"
13 #include "regs/xe_lrc_layout.h"
14 #include "xe_assert.h"
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_exec_queue.h"
18 #include "xe_gt.h"
19 #include "xe_hw_fence.h"
20 #include "xe_lrc.h"
21 #include "xe_macros.h"
22 #include "xe_mmio.h"
23 #include "xe_mocs.h"
24 #include "xe_ring_ops_types.h"
25 #include "xe_sched_job.h"
26 
27 #define XE_EXECLIST_HANG_LIMIT 1
28 
29 #define SW_CTX_ID_SHIFT 37
30 #define SW_CTX_ID_WIDTH 11
31 #define XEHP_SW_CTX_ID_SHIFT  39
32 #define XEHP_SW_CTX_ID_WIDTH  16
33 
34 #define SW_CTX_ID \
35 	GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
36 		    SW_CTX_ID_SHIFT)
37 
38 #define XEHP_SW_CTX_ID \
39 	GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
40 		    XEHP_SW_CTX_ID_SHIFT)
41 
42 
43 static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
44 			u32 ctx_id)
45 {
46 	struct xe_gt *gt = hwe->gt;
47 	struct xe_mmio *mmio = &gt->mmio;
48 	struct xe_device *xe = gt_to_xe(gt);
49 	u64 lrc_desc;
50 
51 	lrc_desc = xe_lrc_descriptor(lrc);
52 
53 	if (GRAPHICS_VERx100(xe) >= 1250) {
54 		xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
55 		lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
56 	} else {
57 		xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
58 		lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
59 	}
60 
61 	if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
62 		xe_mmio_write32(mmio, RCU_MODE,
63 				_MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
64 
65 	xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
66 	lrc->ring.old_tail = lrc->ring.tail;
67 
68 	/*
69 	 * Make sure the context image is complete before we submit it to HW.
70 	 *
71 	 * Ostensibly, writes (including the WCB) should be flushed prior to
72 	 * an uncached write such as our mmio register access, the empirical
73 	 * evidence (esp. on Braswell) suggests that the WC write into memory
74 	 * may not be visible to the HW prior to the completion of the UC
75 	 * register write and that we may begin execution from the context
76 	 * before its image is complete leading to invalid PD chasing.
77 	 */
78 	wmb();
79 
80 	xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
81 			xe_bo_ggtt_addr(hwe->hwsp));
82 	xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
83 	xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base),
84 			_MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
85 
86 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
87 			lower_32_bits(lrc_desc));
88 	xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
89 			upper_32_bits(lrc_desc));
90 	xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
91 			EL_CTRL_LOAD);
92 }
93 
94 static void __xe_execlist_port_start(struct xe_execlist_port *port,
95 				     struct xe_execlist_exec_queue *exl)
96 {
97 	struct xe_device *xe = gt_to_xe(port->hwe->gt);
98 	int max_ctx = FIELD_MAX(SW_CTX_ID);
99 
100 	if (GRAPHICS_VERx100(xe) >= 1250)
101 		max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
102 
103 	xe_execlist_port_assert_held(port);
104 
105 	if (port->running_exl != exl || !exl->has_run) {
106 		port->last_ctx_id++;
107 
108 		/* 0 is reserved for the kernel context */
109 		if (port->last_ctx_id > max_ctx)
110 			port->last_ctx_id = 1;
111 	}
112 
113 	__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
114 	port->running_exl = exl;
115 	exl->has_run = true;
116 }
117 
118 static void __xe_execlist_port_idle(struct xe_execlist_port *port)
119 {
120 	u32 noop[2] = { MI_NOOP, MI_NOOP };
121 
122 	xe_execlist_port_assert_held(port);
123 
124 	if (!port->running_exl)
125 		return;
126 
127 	xe_lrc_write_ring(port->lrc, noop, sizeof(noop));
128 	__start_lrc(port->hwe, port->lrc, 0);
129 	port->running_exl = NULL;
130 }
131 
132 static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
133 {
134 	struct xe_lrc *lrc = exl->q->lrc[0];
135 
136 	return lrc->ring.tail == lrc->ring.old_tail;
137 }
138 
139 static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
140 {
141 	struct xe_execlist_exec_queue *exl = NULL;
142 	int i;
143 
144 	xe_execlist_port_assert_held(port);
145 
146 	for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
147 		while (!list_empty(&port->active[i])) {
148 			exl = list_first_entry(&port->active[i],
149 					       struct xe_execlist_exec_queue,
150 					       active_link);
151 			list_del(&exl->active_link);
152 
153 			if (xe_execlist_is_idle(exl)) {
154 				exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
155 				continue;
156 			}
157 
158 			list_add_tail(&exl->active_link, &port->active[i]);
159 			__xe_execlist_port_start(port, exl);
160 			return;
161 		}
162 	}
163 
164 	__xe_execlist_port_idle(port);
165 }
166 
167 static u64 read_execlist_status(struct xe_hw_engine *hwe)
168 {
169 	struct xe_gt *gt = hwe->gt;
170 	u32 hi, lo;
171 
172 	lo = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
173 	hi = xe_mmio_read32(&gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
174 
175 	return lo | (u64)hi << 32;
176 }
177 
178 static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
179 {
180 	u64 status;
181 
182 	xe_execlist_port_assert_held(port);
183 
184 	status = read_execlist_status(port->hwe);
185 	if (status & BIT(7))
186 		return;
187 
188 	__xe_execlist_port_start_next_active(port);
189 }
190 
191 static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
192 					 u16 intr_vec)
193 {
194 	struct xe_execlist_port *port = hwe->exl_port;
195 
196 	spin_lock(&port->lock);
197 	xe_execlist_port_irq_handler_locked(port);
198 	spin_unlock(&port->lock);
199 }
200 
201 static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
202 					 enum xe_exec_queue_priority priority)
203 {
204 	xe_execlist_port_assert_held(port);
205 
206 	if (port->running_exl && port->running_exl->active_priority >= priority)
207 		return;
208 
209 	__xe_execlist_port_start_next_active(port);
210 }
211 
212 static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
213 {
214 	struct xe_execlist_port *port = exl->port;
215 	enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
216 
217 	XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
218 	XE_WARN_ON(priority < 0);
219 	XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
220 
221 	spin_lock_irq(&port->lock);
222 
223 	if (exl->active_priority != priority &&
224 	    exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
225 		/* Priority changed, move it to the right list */
226 		list_del(&exl->active_link);
227 		exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
228 	}
229 
230 	if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
231 		exl->active_priority = priority;
232 		list_add_tail(&exl->active_link, &port->active[priority]);
233 	}
234 
235 	xe_execlist_port_wake_locked(exl->port, priority);
236 
237 	spin_unlock_irq(&port->lock);
238 }
239 
240 static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
241 {
242 	struct xe_execlist_port *port =
243 		container_of(timer, struct xe_execlist_port, irq_fail);
244 
245 	spin_lock_irq(&port->lock);
246 	xe_execlist_port_irq_handler_locked(port);
247 	spin_unlock_irq(&port->lock);
248 
249 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
250 	add_timer(&port->irq_fail);
251 }
252 
253 struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
254 						 struct xe_hw_engine *hwe)
255 {
256 	struct drm_device *drm = &xe->drm;
257 	struct xe_execlist_port *port;
258 	int i, err;
259 
260 	port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
261 	if (!port) {
262 		err = -ENOMEM;
263 		goto err;
264 	}
265 
266 	port->hwe = hwe;
267 
268 	port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
269 	if (IS_ERR(port->lrc)) {
270 		err = PTR_ERR(port->lrc);
271 		goto err;
272 	}
273 
274 	spin_lock_init(&port->lock);
275 	for (i = 0; i < ARRAY_SIZE(port->active); i++)
276 		INIT_LIST_HEAD(&port->active[i]);
277 
278 	port->last_ctx_id = 1;
279 	port->running_exl = NULL;
280 
281 	hwe->irq_handler = xe_execlist_port_irq_handler;
282 
283 	/* TODO: Fix the interrupt code so it doesn't race like mad */
284 	timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
285 	port->irq_fail.expires = jiffies + msecs_to_jiffies(1000);
286 	add_timer(&port->irq_fail);
287 
288 	return port;
289 
290 err:
291 	return ERR_PTR(err);
292 }
293 
294 void xe_execlist_port_destroy(struct xe_execlist_port *port)
295 {
296 	del_timer(&port->irq_fail);
297 
298 	/* Prevent an interrupt while we're destroying */
299 	spin_lock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
300 	port->hwe->irq_handler = NULL;
301 	spin_unlock_irq(&gt_to_xe(port->hwe->gt)->irq.lock);
302 
303 	xe_lrc_put(port->lrc);
304 }
305 
306 static struct dma_fence *
307 execlist_run_job(struct drm_sched_job *drm_job)
308 {
309 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
310 	struct xe_exec_queue *q = job->q;
311 	struct xe_execlist_exec_queue *exl = job->q->execlist;
312 
313 	q->ring_ops->emit_job(job);
314 	xe_execlist_make_active(exl);
315 
316 	return job->fence;
317 }
318 
319 static void execlist_job_free(struct drm_sched_job *drm_job)
320 {
321 	struct xe_sched_job *job = to_xe_sched_job(drm_job);
322 
323 	xe_exec_queue_update_run_ticks(job->q);
324 	xe_sched_job_put(job);
325 }
326 
327 static const struct drm_sched_backend_ops drm_sched_ops = {
328 	.run_job = execlist_run_job,
329 	.free_job = execlist_job_free,
330 };
331 
332 static int execlist_exec_queue_init(struct xe_exec_queue *q)
333 {
334 	struct drm_gpu_scheduler *sched;
335 	struct xe_execlist_exec_queue *exl;
336 	struct xe_device *xe = gt_to_xe(q->gt);
337 	int err;
338 
339 	xe_assert(xe, !xe_device_uc_enabled(xe));
340 
341 	drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
342 
343 	exl = kzalloc(sizeof(*exl), GFP_KERNEL);
344 	if (!exl)
345 		return -ENOMEM;
346 
347 	exl->q = q;
348 
349 	err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
350 			     q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
351 			     XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
352 			     NULL, NULL, q->hwe->name,
353 			     gt_to_xe(q->gt)->drm.dev);
354 	if (err)
355 		goto err_free;
356 
357 	sched = &exl->sched;
358 	err = drm_sched_entity_init(&exl->entity, 0, &sched, 1, NULL);
359 	if (err)
360 		goto err_sched;
361 
362 	exl->port = q->hwe->exl_port;
363 	exl->has_run = false;
364 	exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
365 	q->execlist = exl;
366 	q->entity = &exl->entity;
367 
368 	xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
369 
370 	return 0;
371 
372 err_sched:
373 	drm_sched_fini(&exl->sched);
374 err_free:
375 	kfree(exl);
376 	return err;
377 }
378 
379 static void execlist_exec_queue_fini_async(struct work_struct *w)
380 {
381 	struct xe_execlist_exec_queue *ee =
382 		container_of(w, struct xe_execlist_exec_queue, fini_async);
383 	struct xe_exec_queue *q = ee->q;
384 	struct xe_execlist_exec_queue *exl = q->execlist;
385 	struct xe_device *xe = gt_to_xe(q->gt);
386 	unsigned long flags;
387 
388 	xe_assert(xe, !xe_device_uc_enabled(xe));
389 
390 	spin_lock_irqsave(&exl->port->lock, flags);
391 	if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
392 		list_del(&exl->active_link);
393 	spin_unlock_irqrestore(&exl->port->lock, flags);
394 
395 	drm_sched_entity_fini(&exl->entity);
396 	drm_sched_fini(&exl->sched);
397 	kfree(exl);
398 
399 	xe_exec_queue_fini(q);
400 }
401 
402 static void execlist_exec_queue_kill(struct xe_exec_queue *q)
403 {
404 	/* NIY */
405 }
406 
407 static void execlist_exec_queue_fini(struct xe_exec_queue *q)
408 {
409 	INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
410 	queue_work(system_unbound_wq, &q->execlist->fini_async);
411 }
412 
413 static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
414 					    enum xe_exec_queue_priority priority)
415 {
416 	/* NIY */
417 	return 0;
418 }
419 
420 static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
421 {
422 	/* NIY */
423 	return 0;
424 }
425 
426 static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
427 						   u32 preempt_timeout_us)
428 {
429 	/* NIY */
430 	return 0;
431 }
432 
433 static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
434 {
435 	/* NIY */
436 	return 0;
437 }
438 
439 static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
440 
441 {
442 	/* NIY */
443 	return 0;
444 }
445 
446 static void execlist_exec_queue_resume(struct xe_exec_queue *q)
447 {
448 	/* NIY */
449 }
450 
451 static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
452 {
453 	/* NIY */
454 	return false;
455 }
456 
457 static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
458 	.init = execlist_exec_queue_init,
459 	.kill = execlist_exec_queue_kill,
460 	.fini = execlist_exec_queue_fini,
461 	.set_priority = execlist_exec_queue_set_priority,
462 	.set_timeslice = execlist_exec_queue_set_timeslice,
463 	.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
464 	.suspend = execlist_exec_queue_suspend,
465 	.suspend_wait = execlist_exec_queue_suspend_wait,
466 	.resume = execlist_exec_queue_resume,
467 	.reset_status = execlist_exec_queue_reset_status,
468 };
469 
470 int xe_execlist_init(struct xe_gt *gt)
471 {
472 	/* GuC submission enabled, nothing to do */
473 	if (xe_device_uc_enabled(gt_to_xe(gt)))
474 		return 0;
475 
476 	gt->exec_queue_ops = &execlist_exec_queue_ops;
477 
478 	return 0;
479 }
480