xref: /linux/drivers/gpu/drm/i915/gt/intel_engine_cs.c (revision b8d312aa075f33282565467662c4628dae0a2aff)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drm_print.h>
26 
27 #include "gem/i915_gem_context.h"
28 
29 #include "i915_drv.h"
30 
31 #include "gt/intel_gt.h"
32 
33 #include "intel_engine.h"
34 #include "intel_engine_pm.h"
35 #include "intel_engine_pool.h"
36 #include "intel_engine_user.h"
37 #include "intel_context.h"
38 #include "intel_lrc.h"
39 #include "intel_reset.h"
40 
41 /* Haswell does have the CXT_SIZE register however it does not appear to be
42  * valid. Now, docs explain in dwords what is in the context object. The full
43  * size is 70720 bytes, however, the power context and execlist context will
44  * never be saved (power context is stored elsewhere, and execlists don't work
45  * on HSW) - so the final size, including the extra state required for the
46  * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
47  */
48 #define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
49 
50 #define DEFAULT_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
51 #define GEN8_LR_CONTEXT_RENDER_SIZE	(20 * PAGE_SIZE)
52 #define GEN9_LR_CONTEXT_RENDER_SIZE	(22 * PAGE_SIZE)
53 #define GEN10_LR_CONTEXT_RENDER_SIZE	(18 * PAGE_SIZE)
54 #define GEN11_LR_CONTEXT_RENDER_SIZE	(14 * PAGE_SIZE)
55 
56 #define GEN8_LR_CONTEXT_OTHER_SIZE	( 2 * PAGE_SIZE)
57 
58 #define MAX_MMIO_BASES 3
59 struct engine_info {
60 	unsigned int hw_id;
61 	u8 class;
62 	u8 instance;
63 	/* mmio bases table *must* be sorted in reverse gen order */
64 	struct engine_mmio_base {
65 		u32 gen : 8;
66 		u32 base : 24;
67 	} mmio_bases[MAX_MMIO_BASES];
68 };
69 
70 static const struct engine_info intel_engines[] = {
71 	[RCS0] = {
72 		.hw_id = RCS0_HW,
73 		.class = RENDER_CLASS,
74 		.instance = 0,
75 		.mmio_bases = {
76 			{ .gen = 1, .base = RENDER_RING_BASE }
77 		},
78 	},
79 	[BCS0] = {
80 		.hw_id = BCS0_HW,
81 		.class = COPY_ENGINE_CLASS,
82 		.instance = 0,
83 		.mmio_bases = {
84 			{ .gen = 6, .base = BLT_RING_BASE }
85 		},
86 	},
87 	[VCS0] = {
88 		.hw_id = VCS0_HW,
89 		.class = VIDEO_DECODE_CLASS,
90 		.instance = 0,
91 		.mmio_bases = {
92 			{ .gen = 11, .base = GEN11_BSD_RING_BASE },
93 			{ .gen = 6, .base = GEN6_BSD_RING_BASE },
94 			{ .gen = 4, .base = BSD_RING_BASE }
95 		},
96 	},
97 	[VCS1] = {
98 		.hw_id = VCS1_HW,
99 		.class = VIDEO_DECODE_CLASS,
100 		.instance = 1,
101 		.mmio_bases = {
102 			{ .gen = 11, .base = GEN11_BSD2_RING_BASE },
103 			{ .gen = 8, .base = GEN8_BSD2_RING_BASE }
104 		},
105 	},
106 	[VCS2] = {
107 		.hw_id = VCS2_HW,
108 		.class = VIDEO_DECODE_CLASS,
109 		.instance = 2,
110 		.mmio_bases = {
111 			{ .gen = 11, .base = GEN11_BSD3_RING_BASE }
112 		},
113 	},
114 	[VCS3] = {
115 		.hw_id = VCS3_HW,
116 		.class = VIDEO_DECODE_CLASS,
117 		.instance = 3,
118 		.mmio_bases = {
119 			{ .gen = 11, .base = GEN11_BSD4_RING_BASE }
120 		},
121 	},
122 	[VECS0] = {
123 		.hw_id = VECS0_HW,
124 		.class = VIDEO_ENHANCEMENT_CLASS,
125 		.instance = 0,
126 		.mmio_bases = {
127 			{ .gen = 11, .base = GEN11_VEBOX_RING_BASE },
128 			{ .gen = 7, .base = VEBOX_RING_BASE }
129 		},
130 	},
131 	[VECS1] = {
132 		.hw_id = VECS1_HW,
133 		.class = VIDEO_ENHANCEMENT_CLASS,
134 		.instance = 1,
135 		.mmio_bases = {
136 			{ .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
137 		},
138 	},
139 };
140 
141 /**
142  * intel_engine_context_size() - return the size of the context for an engine
143  * @dev_priv: i915 device private
144  * @class: engine class
145  *
146  * Each engine class may require a different amount of space for a context
147  * image.
148  *
149  * Return: size (in bytes) of an engine class specific context image
150  *
151  * Note: this size includes the HWSP, which is part of the context image
152  * in LRC mode, but does not include the "shared data page" used with
153  * GuC submission. The caller should account for this if using the GuC.
154  */
155 u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
156 {
157 	u32 cxt_size;
158 
159 	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
160 
161 	switch (class) {
162 	case RENDER_CLASS:
163 		switch (INTEL_GEN(dev_priv)) {
164 		default:
165 			MISSING_CASE(INTEL_GEN(dev_priv));
166 			return DEFAULT_LR_CONTEXT_RENDER_SIZE;
167 		case 12:
168 		case 11:
169 			return GEN11_LR_CONTEXT_RENDER_SIZE;
170 		case 10:
171 			return GEN10_LR_CONTEXT_RENDER_SIZE;
172 		case 9:
173 			return GEN9_LR_CONTEXT_RENDER_SIZE;
174 		case 8:
175 			return GEN8_LR_CONTEXT_RENDER_SIZE;
176 		case 7:
177 			if (IS_HASWELL(dev_priv))
178 				return HSW_CXT_TOTAL_SIZE;
179 
180 			cxt_size = I915_READ(GEN7_CXT_SIZE);
181 			return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
182 					PAGE_SIZE);
183 		case 6:
184 			cxt_size = I915_READ(CXT_SIZE);
185 			return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
186 					PAGE_SIZE);
187 		case 5:
188 		case 4:
189 			/*
190 			 * There is a discrepancy here between the size reported
191 			 * by the register and the size of the context layout
192 			 * in the docs. Both are described as authorative!
193 			 *
194 			 * The discrepancy is on the order of a few cachelines,
195 			 * but the total is under one page (4k), which is our
196 			 * minimum allocation anyway so it should all come
197 			 * out in the wash.
198 			 */
199 			cxt_size = I915_READ(CXT_SIZE) + 1;
200 			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
201 					 INTEL_GEN(dev_priv),
202 					 cxt_size * 64,
203 					 cxt_size - 1);
204 			return round_up(cxt_size * 64, PAGE_SIZE);
205 		case 3:
206 		case 2:
207 		/* For the special day when i810 gets merged. */
208 		case 1:
209 			return 0;
210 		}
211 		break;
212 	default:
213 		MISSING_CASE(class);
214 		/* fall through */
215 	case VIDEO_DECODE_CLASS:
216 	case VIDEO_ENHANCEMENT_CLASS:
217 	case COPY_ENGINE_CLASS:
218 		if (INTEL_GEN(dev_priv) < 8)
219 			return 0;
220 		return GEN8_LR_CONTEXT_OTHER_SIZE;
221 	}
222 }
223 
224 static u32 __engine_mmio_base(struct drm_i915_private *i915,
225 			      const struct engine_mmio_base *bases)
226 {
227 	int i;
228 
229 	for (i = 0; i < MAX_MMIO_BASES; i++)
230 		if (INTEL_GEN(i915) >= bases[i].gen)
231 			break;
232 
233 	GEM_BUG_ON(i == MAX_MMIO_BASES);
234 	GEM_BUG_ON(!bases[i].base);
235 
236 	return bases[i].base;
237 }
238 
239 static void __sprint_engine_name(struct intel_engine_cs *engine)
240 {
241 	/*
242 	 * Before we know what the uABI name for this engine will be,
243 	 * we still would like to keep track of this engine in the debug logs.
244 	 * We throw in a ' here as a reminder that this isn't its final name.
245 	 */
246 	GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
247 			     intel_engine_class_repr(engine->class),
248 			     engine->instance) >= sizeof(engine->name));
249 }
250 
251 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
252 {
253 	/*
254 	 * Though they added more rings on g4x/ilk, they did not add
255 	 * per-engine HWSTAM until gen6.
256 	 */
257 	if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS)
258 		return;
259 
260 	if (INTEL_GEN(engine->i915) >= 3)
261 		ENGINE_WRITE(engine, RING_HWSTAM, mask);
262 	else
263 		ENGINE_WRITE16(engine, RING_HWSTAM, mask);
264 }
265 
266 static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
267 {
268 	/* Mask off all writes into the unknown HWSP */
269 	intel_engine_set_hwsp_writemask(engine, ~0u);
270 }
271 
272 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
273 {
274 	const struct engine_info *info = &intel_engines[id];
275 	struct intel_engine_cs *engine;
276 
277 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
278 	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
279 
280 	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
281 		return -EINVAL;
282 
283 	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
284 		return -EINVAL;
285 
286 	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
287 		return -EINVAL;
288 
289 	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
290 	if (!engine)
291 		return -ENOMEM;
292 
293 	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
294 
295 	engine->id = id;
296 	engine->mask = BIT(id);
297 	engine->i915 = gt->i915;
298 	engine->gt = gt;
299 	engine->uncore = gt->uncore;
300 	engine->hw_id = engine->guc_id = info->hw_id;
301 	engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
302 
303 	engine->class = info->class;
304 	engine->instance = info->instance;
305 	__sprint_engine_name(engine);
306 
307 	/*
308 	 * To be overridden by the backend on setup. However to facilitate
309 	 * cleanup on error during setup, we always provide the destroy vfunc.
310 	 */
311 	engine->destroy = (typeof(engine->destroy))kfree;
312 
313 	engine->context_size = intel_engine_context_size(gt->i915,
314 							 engine->class);
315 	if (WARN_ON(engine->context_size > BIT(20)))
316 		engine->context_size = 0;
317 	if (engine->context_size)
318 		DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
319 
320 	/* Nothing to do here, execute in order of dependencies */
321 	engine->schedule = NULL;
322 
323 	seqlock_init(&engine->stats.lock);
324 
325 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
326 
327 	/* Scrub mmio state on takeover */
328 	intel_engine_sanitize_mmio(engine);
329 
330 	gt->engine_class[info->class][info->instance] = engine;
331 
332 	intel_engine_add_user(engine);
333 	gt->i915->engine[id] = engine;
334 
335 	return 0;
336 }
337 
338 static void __setup_engine_capabilities(struct intel_engine_cs *engine)
339 {
340 	struct drm_i915_private *i915 = engine->i915;
341 
342 	if (engine->class == VIDEO_DECODE_CLASS) {
343 		/*
344 		 * HEVC support is present on first engine instance
345 		 * before Gen11 and on all instances afterwards.
346 		 */
347 		if (INTEL_GEN(i915) >= 11 ||
348 		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
349 			engine->uabi_capabilities |=
350 				I915_VIDEO_CLASS_CAPABILITY_HEVC;
351 
352 		/*
353 		 * SFC block is present only on even logical engine
354 		 * instances.
355 		 */
356 		if ((INTEL_GEN(i915) >= 11 &&
357 		     RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
358 		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
359 			engine->uabi_capabilities |=
360 				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
361 	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
362 		if (INTEL_GEN(i915) >= 9)
363 			engine->uabi_capabilities |=
364 				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
365 	}
366 }
367 
368 static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
369 {
370 	struct intel_engine_cs *engine;
371 	enum intel_engine_id id;
372 
373 	for_each_engine(engine, i915, id)
374 		__setup_engine_capabilities(engine);
375 }
376 
377 /**
378  * intel_engines_cleanup() - free the resources allocated for Command Streamers
379  * @i915: the i915 devic
380  */
381 void intel_engines_cleanup(struct drm_i915_private *i915)
382 {
383 	struct intel_engine_cs *engine;
384 	enum intel_engine_id id;
385 
386 	for_each_engine(engine, i915, id) {
387 		engine->destroy(engine);
388 		i915->engine[id] = NULL;
389 	}
390 }
391 
392 /**
393  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
394  * @i915: the i915 device
395  *
396  * Return: non-zero if the initialization failed.
397  */
398 int intel_engines_init_mmio(struct drm_i915_private *i915)
399 {
400 	struct intel_device_info *device_info = mkwrite_device_info(i915);
401 	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
402 	unsigned int mask = 0;
403 	unsigned int i;
404 	int err;
405 
406 	WARN_ON(engine_mask == 0);
407 	WARN_ON(engine_mask &
408 		GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
409 
410 	if (i915_inject_probe_failure(i915))
411 		return -ENODEV;
412 
413 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
414 		if (!HAS_ENGINE(i915, i))
415 			continue;
416 
417 		err = intel_engine_setup(&i915->gt, i);
418 		if (err)
419 			goto cleanup;
420 
421 		mask |= BIT(i);
422 	}
423 
424 	/*
425 	 * Catch failures to update intel_engines table when the new engines
426 	 * are added to the driver by a warning and disabling the forgotten
427 	 * engines.
428 	 */
429 	if (WARN_ON(mask != engine_mask))
430 		device_info->engine_mask = mask;
431 
432 	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
433 
434 	intel_gt_check_and_clear_faults(&i915->gt);
435 
436 	intel_setup_engine_capabilities(i915);
437 
438 	return 0;
439 
440 cleanup:
441 	intel_engines_cleanup(i915);
442 	return err;
443 }
444 
445 /**
446  * intel_engines_init() - init the Engine Command Streamers
447  * @i915: i915 device private
448  *
449  * Return: non-zero if the initialization failed.
450  */
451 int intel_engines_init(struct drm_i915_private *i915)
452 {
453 	int (*init)(struct intel_engine_cs *engine);
454 	struct intel_engine_cs *engine;
455 	enum intel_engine_id id;
456 	int err;
457 
458 	if (HAS_EXECLISTS(i915))
459 		init = intel_execlists_submission_init;
460 	else
461 		init = intel_ring_submission_init;
462 
463 	for_each_engine(engine, i915, id) {
464 		err = init(engine);
465 		if (err)
466 			goto cleanup;
467 	}
468 
469 	return 0;
470 
471 cleanup:
472 	intel_engines_cleanup(i915);
473 	return err;
474 }
475 
476 void intel_engine_init_execlists(struct intel_engine_cs *engine)
477 {
478 	struct intel_engine_execlists * const execlists = &engine->execlists;
479 
480 	execlists->port_mask = 1;
481 	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
482 	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
483 
484 	memset(execlists->pending, 0, sizeof(execlists->pending));
485 	execlists->active =
486 		memset(execlists->inflight, 0, sizeof(execlists->inflight));
487 
488 	execlists->queue_priority_hint = INT_MIN;
489 	execlists->queue = RB_ROOT_CACHED;
490 }
491 
492 static void cleanup_status_page(struct intel_engine_cs *engine)
493 {
494 	struct i915_vma *vma;
495 
496 	/* Prevent writes into HWSP after returning the page to the system */
497 	intel_engine_set_hwsp_writemask(engine, ~0u);
498 
499 	vma = fetch_and_zero(&engine->status_page.vma);
500 	if (!vma)
501 		return;
502 
503 	if (!HWS_NEEDS_PHYSICAL(engine->i915))
504 		i915_vma_unpin(vma);
505 
506 	i915_gem_object_unpin_map(vma->obj);
507 	i915_gem_object_put(vma->obj);
508 }
509 
510 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
511 				struct i915_vma *vma)
512 {
513 	unsigned int flags;
514 
515 	flags = PIN_GLOBAL;
516 	if (!HAS_LLC(engine->i915))
517 		/*
518 		 * On g33, we cannot place HWS above 256MiB, so
519 		 * restrict its pinning to the low mappable arena.
520 		 * Though this restriction is not documented for
521 		 * gen4, gen5, or byt, they also behave similarly
522 		 * and hang if the HWS is placed at the top of the
523 		 * GTT. To generalise, it appears that all !llc
524 		 * platforms have issues with us placing the HWS
525 		 * above the mappable region (even though we never
526 		 * actually map it).
527 		 */
528 		flags |= PIN_MAPPABLE;
529 	else
530 		flags |= PIN_HIGH;
531 
532 	return i915_vma_pin(vma, 0, 0, flags);
533 }
534 
535 static int init_status_page(struct intel_engine_cs *engine)
536 {
537 	struct drm_i915_gem_object *obj;
538 	struct i915_vma *vma;
539 	void *vaddr;
540 	int ret;
541 
542 	/*
543 	 * Though the HWS register does support 36bit addresses, historically
544 	 * we have had hangs and corruption reported due to wild writes if
545 	 * the HWS is placed above 4G. We only allow objects to be allocated
546 	 * in GFP_DMA32 for i965, and no earlier physical address users had
547 	 * access to more than 4G.
548 	 */
549 	obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
550 	if (IS_ERR(obj)) {
551 		DRM_ERROR("Failed to allocate status page\n");
552 		return PTR_ERR(obj);
553 	}
554 
555 	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
556 
557 	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
558 	if (IS_ERR(vma)) {
559 		ret = PTR_ERR(vma);
560 		goto err;
561 	}
562 
563 	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
564 	if (IS_ERR(vaddr)) {
565 		ret = PTR_ERR(vaddr);
566 		goto err;
567 	}
568 
569 	engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
570 	engine->status_page.vma = vma;
571 
572 	if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
573 		ret = pin_ggtt_status_page(engine, vma);
574 		if (ret)
575 			goto err_unpin;
576 	}
577 
578 	return 0;
579 
580 err_unpin:
581 	i915_gem_object_unpin_map(obj);
582 err:
583 	i915_gem_object_put(obj);
584 	return ret;
585 }
586 
587 static int intel_engine_setup_common(struct intel_engine_cs *engine)
588 {
589 	int err;
590 
591 	init_llist_head(&engine->barrier_tasks);
592 
593 	err = init_status_page(engine);
594 	if (err)
595 		return err;
596 
597 	intel_engine_init_active(engine, ENGINE_PHYSICAL);
598 	intel_engine_init_breadcrumbs(engine);
599 	intel_engine_init_execlists(engine);
600 	intel_engine_init_hangcheck(engine);
601 	intel_engine_init_cmd_parser(engine);
602 	intel_engine_init__pm(engine);
603 
604 	intel_engine_pool_init(&engine->pool);
605 
606 	/* Use the whole device by default */
607 	engine->sseu =
608 		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
609 
610 	intel_engine_init_workarounds(engine);
611 	intel_engine_init_whitelist(engine);
612 	intel_engine_init_ctx_wa(engine);
613 
614 	return 0;
615 }
616 
617 /**
618  * intel_engines_setup- setup engine state not requiring hw access
619  * @i915: Device to setup.
620  *
621  * Initializes engine structure members shared between legacy and execlists
622  * submission modes which do not require hardware access.
623  *
624  * Typically done early in the submission mode specific engine setup stage.
625  */
626 int intel_engines_setup(struct drm_i915_private *i915)
627 {
628 	int (*setup)(struct intel_engine_cs *engine);
629 	struct intel_engine_cs *engine;
630 	enum intel_engine_id id;
631 	int err;
632 
633 	if (HAS_EXECLISTS(i915))
634 		setup = intel_execlists_submission_setup;
635 	else
636 		setup = intel_ring_submission_setup;
637 
638 	for_each_engine(engine, i915, id) {
639 		err = intel_engine_setup_common(engine);
640 		if (err)
641 			goto cleanup;
642 
643 		err = setup(engine);
644 		if (err)
645 			goto cleanup;
646 
647 		/* We expect the backend to take control over its state */
648 		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
649 
650 		GEM_BUG_ON(!engine->cops);
651 	}
652 
653 	return 0;
654 
655 cleanup:
656 	intel_engines_cleanup(i915);
657 	return err;
658 }
659 
660 struct measure_breadcrumb {
661 	struct i915_request rq;
662 	struct intel_timeline timeline;
663 	struct intel_ring ring;
664 	u32 cs[1024];
665 };
666 
667 static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
668 {
669 	struct measure_breadcrumb *frame;
670 	int dw = -ENOMEM;
671 
672 	GEM_BUG_ON(!engine->gt->scratch);
673 
674 	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
675 	if (!frame)
676 		return -ENOMEM;
677 
678 	if (intel_timeline_init(&frame->timeline,
679 				engine->gt,
680 				engine->status_page.vma))
681 		goto out_frame;
682 
683 	frame->ring.vaddr = frame->cs;
684 	frame->ring.size = sizeof(frame->cs);
685 	frame->ring.effective_size = frame->ring.size;
686 	intel_ring_update_space(&frame->ring);
687 
688 	frame->rq.i915 = engine->i915;
689 	frame->rq.engine = engine;
690 	frame->rq.ring = &frame->ring;
691 	frame->rq.timeline = &frame->timeline;
692 
693 	dw = intel_timeline_pin(&frame->timeline);
694 	if (dw < 0)
695 		goto out_timeline;
696 
697 	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
698 	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
699 
700 	intel_timeline_unpin(&frame->timeline);
701 
702 out_timeline:
703 	intel_timeline_fini(&frame->timeline);
704 out_frame:
705 	kfree(frame);
706 	return dw;
707 }
708 
709 void
710 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
711 {
712 	INIT_LIST_HEAD(&engine->active.requests);
713 
714 	spin_lock_init(&engine->active.lock);
715 	lockdep_set_subclass(&engine->active.lock, subclass);
716 
717 	/*
718 	 * Due to an interesting quirk in lockdep's internal debug tracking,
719 	 * after setting a subclass we must ensure the lock is used. Otherwise,
720 	 * nr_unused_locks is incremented once too often.
721 	 */
722 #ifdef CONFIG_DEBUG_LOCK_ALLOC
723 	local_irq_disable();
724 	lock_map_acquire(&engine->active.lock.dep_map);
725 	lock_map_release(&engine->active.lock.dep_map);
726 	local_irq_enable();
727 #endif
728 }
729 
730 static struct intel_context *
731 create_kernel_context(struct intel_engine_cs *engine)
732 {
733 	struct intel_context *ce;
734 	int err;
735 
736 	ce = intel_context_create(engine->i915->kernel_context, engine);
737 	if (IS_ERR(ce))
738 		return ce;
739 
740 	ce->ring = __intel_context_ring_size(SZ_4K);
741 
742 	err = intel_context_pin(ce);
743 	if (err) {
744 		intel_context_put(ce);
745 		return ERR_PTR(err);
746 	}
747 
748 	return ce;
749 }
750 
751 /**
752  * intel_engines_init_common - initialize cengine state which might require hw access
753  * @engine: Engine to initialize.
754  *
755  * Initializes @engine@ structure members shared between legacy and execlists
756  * submission modes which do require hardware access.
757  *
758  * Typcally done at later stages of submission mode specific engine setup.
759  *
760  * Returns zero on success or an error code on failure.
761  */
762 int intel_engine_init_common(struct intel_engine_cs *engine)
763 {
764 	struct intel_context *ce;
765 	int ret;
766 
767 	engine->set_default_submission(engine);
768 
769 	/*
770 	 * We may need to do things with the shrinker which
771 	 * require us to immediately switch back to the default
772 	 * context. This can cause a problem as pinning the
773 	 * default context also requires GTT space which may not
774 	 * be available. To avoid this we always pin the default
775 	 * context.
776 	 */
777 	ce = create_kernel_context(engine);
778 	if (IS_ERR(ce))
779 		return PTR_ERR(ce);
780 
781 	engine->kernel_context = ce;
782 
783 	ret = measure_breadcrumb_dw(engine);
784 	if (ret < 0)
785 		goto err_unpin;
786 
787 	engine->emit_fini_breadcrumb_dw = ret;
788 
789 	return 0;
790 
791 err_unpin:
792 	intel_context_unpin(ce);
793 	intel_context_put(ce);
794 	return ret;
795 }
796 
797 /**
798  * intel_engines_cleanup_common - cleans up the engine state created by
799  *                                the common initiailizers.
800  * @engine: Engine to cleanup.
801  *
802  * This cleans up everything created by the common helpers.
803  */
804 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
805 {
806 	GEM_BUG_ON(!list_empty(&engine->active.requests));
807 
808 	cleanup_status_page(engine);
809 
810 	intel_engine_pool_fini(&engine->pool);
811 	intel_engine_fini_breadcrumbs(engine);
812 	intel_engine_cleanup_cmd_parser(engine);
813 
814 	if (engine->default_state)
815 		i915_gem_object_put(engine->default_state);
816 
817 	intel_context_unpin(engine->kernel_context);
818 	intel_context_put(engine->kernel_context);
819 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
820 
821 	intel_wa_list_free(&engine->ctx_wa_list);
822 	intel_wa_list_free(&engine->wa_list);
823 	intel_wa_list_free(&engine->whitelist);
824 }
825 
826 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
827 {
828 	struct drm_i915_private *i915 = engine->i915;
829 
830 	u64 acthd;
831 
832 	if (INTEL_GEN(i915) >= 8)
833 		acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
834 	else if (INTEL_GEN(i915) >= 4)
835 		acthd = ENGINE_READ(engine, RING_ACTHD);
836 	else
837 		acthd = ENGINE_READ(engine, ACTHD);
838 
839 	return acthd;
840 }
841 
842 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
843 {
844 	u64 bbaddr;
845 
846 	if (INTEL_GEN(engine->i915) >= 8)
847 		bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
848 	else
849 		bbaddr = ENGINE_READ(engine, RING_BBADDR);
850 
851 	return bbaddr;
852 }
853 
854 int intel_engine_stop_cs(struct intel_engine_cs *engine)
855 {
856 	struct intel_uncore *uncore = engine->uncore;
857 	const u32 base = engine->mmio_base;
858 	const i915_reg_t mode = RING_MI_MODE(base);
859 	int err;
860 
861 	if (INTEL_GEN(engine->i915) < 3)
862 		return -ENODEV;
863 
864 	GEM_TRACE("%s\n", engine->name);
865 
866 	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
867 
868 	err = 0;
869 	if (__intel_wait_for_register_fw(uncore,
870 					 mode, MODE_IDLE, MODE_IDLE,
871 					 1000, 0,
872 					 NULL)) {
873 		GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
874 		err = -ETIMEDOUT;
875 	}
876 
877 	/* A final mmio read to let GPU writes be hopefully flushed to memory */
878 	intel_uncore_posting_read_fw(uncore, mode);
879 
880 	return err;
881 }
882 
883 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
884 {
885 	GEM_TRACE("%s\n", engine->name);
886 
887 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
888 }
889 
890 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
891 {
892 	switch (type) {
893 	case I915_CACHE_NONE: return " uncached";
894 	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
895 	case I915_CACHE_L3_LLC: return " L3+LLC";
896 	case I915_CACHE_WT: return " WT";
897 	default: return "";
898 	}
899 }
900 
901 static u32
902 read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
903 		  i915_reg_t reg)
904 {
905 	struct drm_i915_private *i915 = engine->i915;
906 	struct intel_uncore *uncore = engine->uncore;
907 	u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
908 	enum forcewake_domains fw_domains;
909 
910 	if (INTEL_GEN(i915) >= 11) {
911 		mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
912 		mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
913 	} else {
914 		mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
915 		mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
916 	}
917 
918 	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
919 						    FW_REG_READ);
920 	fw_domains |= intel_uncore_forcewake_for_reg(uncore,
921 						     GEN8_MCR_SELECTOR,
922 						     FW_REG_READ | FW_REG_WRITE);
923 
924 	spin_lock_irq(&uncore->lock);
925 	intel_uncore_forcewake_get__locked(uncore, fw_domains);
926 
927 	old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
928 
929 	mcr &= ~mcr_mask;
930 	mcr |= mcr_ss;
931 	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
932 
933 	val = intel_uncore_read_fw(uncore, reg);
934 
935 	mcr &= ~mcr_mask;
936 	mcr |= old_mcr & mcr_mask;
937 
938 	intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
939 
940 	intel_uncore_forcewake_put__locked(uncore, fw_domains);
941 	spin_unlock_irq(&uncore->lock);
942 
943 	return val;
944 }
945 
946 /* NB: please notice the memset */
947 void intel_engine_get_instdone(struct intel_engine_cs *engine,
948 			       struct intel_instdone *instdone)
949 {
950 	struct drm_i915_private *i915 = engine->i915;
951 	struct intel_uncore *uncore = engine->uncore;
952 	u32 mmio_base = engine->mmio_base;
953 	int slice;
954 	int subslice;
955 
956 	memset(instdone, 0, sizeof(*instdone));
957 
958 	switch (INTEL_GEN(i915)) {
959 	default:
960 		instdone->instdone =
961 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
962 
963 		if (engine->id != RCS0)
964 			break;
965 
966 		instdone->slice_common =
967 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
968 		for_each_instdone_slice_subslice(i915, slice, subslice) {
969 			instdone->sampler[slice][subslice] =
970 				read_subslice_reg(engine, slice, subslice,
971 						  GEN7_SAMPLER_INSTDONE);
972 			instdone->row[slice][subslice] =
973 				read_subslice_reg(engine, slice, subslice,
974 						  GEN7_ROW_INSTDONE);
975 		}
976 		break;
977 	case 7:
978 		instdone->instdone =
979 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
980 
981 		if (engine->id != RCS0)
982 			break;
983 
984 		instdone->slice_common =
985 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
986 		instdone->sampler[0][0] =
987 			intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
988 		instdone->row[0][0] =
989 			intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
990 
991 		break;
992 	case 6:
993 	case 5:
994 	case 4:
995 		instdone->instdone =
996 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
997 		if (engine->id == RCS0)
998 			/* HACK: Using the wrong struct member */
999 			instdone->slice_common =
1000 				intel_uncore_read(uncore, GEN4_INSTDONE1);
1001 		break;
1002 	case 3:
1003 	case 2:
1004 		instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1005 		break;
1006 	}
1007 }
1008 
1009 static bool ring_is_idle(struct intel_engine_cs *engine)
1010 {
1011 	bool idle = true;
1012 
1013 	if (I915_SELFTEST_ONLY(!engine->mmio_base))
1014 		return true;
1015 
1016 	if (!intel_engine_pm_get_if_awake(engine))
1017 		return true;
1018 
1019 	/* First check that no commands are left in the ring */
1020 	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1021 	    (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1022 		idle = false;
1023 
1024 	/* No bit for gen2, so assume the CS parser is idle */
1025 	if (INTEL_GEN(engine->i915) > 2 &&
1026 	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1027 		idle = false;
1028 
1029 	intel_engine_pm_put(engine);
1030 
1031 	return idle;
1032 }
1033 
1034 /**
1035  * intel_engine_is_idle() - Report if the engine has finished process all work
1036  * @engine: the intel_engine_cs
1037  *
1038  * Return true if there are no requests pending, nothing left to be submitted
1039  * to hardware, and that the engine is idle.
1040  */
1041 bool intel_engine_is_idle(struct intel_engine_cs *engine)
1042 {
1043 	/* More white lies, if wedged, hw state is inconsistent */
1044 	if (intel_gt_is_wedged(engine->gt))
1045 		return true;
1046 
1047 	if (!intel_engine_pm_is_awake(engine))
1048 		return true;
1049 
1050 	/* Waiting to drain ELSP? */
1051 	if (execlists_active(&engine->execlists)) {
1052 		struct tasklet_struct *t = &engine->execlists.tasklet;
1053 
1054 		synchronize_hardirq(engine->i915->drm.pdev->irq);
1055 
1056 		local_bh_disable();
1057 		if (tasklet_trylock(t)) {
1058 			/* Must wait for any GPU reset in progress. */
1059 			if (__tasklet_is_enabled(t))
1060 				t->func(t->data);
1061 			tasklet_unlock(t);
1062 		}
1063 		local_bh_enable();
1064 
1065 		/* Otherwise flush the tasklet if it was on another cpu */
1066 		tasklet_unlock_wait(t);
1067 
1068 		if (execlists_active(&engine->execlists))
1069 			return false;
1070 	}
1071 
1072 	/* ELSP is empty, but there are ready requests? E.g. after reset */
1073 	if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
1074 		return false;
1075 
1076 	/* Ring stopped? */
1077 	return ring_is_idle(engine);
1078 }
1079 
1080 bool intel_engines_are_idle(struct intel_gt *gt)
1081 {
1082 	struct intel_engine_cs *engine;
1083 	enum intel_engine_id id;
1084 
1085 	/*
1086 	 * If the driver is wedged, HW state may be very inconsistent and
1087 	 * report that it is still busy, even though we have stopped using it.
1088 	 */
1089 	if (intel_gt_is_wedged(gt))
1090 		return true;
1091 
1092 	/* Already parked (and passed an idleness test); must still be idle */
1093 	if (!READ_ONCE(gt->awake))
1094 		return true;
1095 
1096 	for_each_engine(engine, gt->i915, id) {
1097 		if (!intel_engine_is_idle(engine))
1098 			return false;
1099 	}
1100 
1101 	return true;
1102 }
1103 
1104 void intel_engines_reset_default_submission(struct intel_gt *gt)
1105 {
1106 	struct intel_engine_cs *engine;
1107 	enum intel_engine_id id;
1108 
1109 	for_each_engine(engine, gt->i915, id)
1110 		engine->set_default_submission(engine);
1111 }
1112 
1113 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1114 {
1115 	switch (INTEL_GEN(engine->i915)) {
1116 	case 2:
1117 		return false; /* uses physical not virtual addresses */
1118 	case 3:
1119 		/* maybe only uses physical not virtual addresses */
1120 		return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1121 	case 6:
1122 		return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1123 	default:
1124 		return true;
1125 	}
1126 }
1127 
1128 static int print_sched_attr(struct drm_i915_private *i915,
1129 			    const struct i915_sched_attr *attr,
1130 			    char *buf, int x, int len)
1131 {
1132 	if (attr->priority == I915_PRIORITY_INVALID)
1133 		return x;
1134 
1135 	x += snprintf(buf + x, len - x,
1136 		      " prio=%d", attr->priority);
1137 
1138 	return x;
1139 }
1140 
1141 static void print_request(struct drm_printer *m,
1142 			  struct i915_request *rq,
1143 			  const char *prefix)
1144 {
1145 	const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
1146 	char buf[80] = "";
1147 	int x = 0;
1148 
1149 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
1150 
1151 	drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
1152 		   prefix,
1153 		   rq->fence.context, rq->fence.seqno,
1154 		   i915_request_completed(rq) ? "!" :
1155 		   i915_request_started(rq) ? "*" :
1156 		   "",
1157 		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
1158 			    &rq->fence.flags) ? "+" :
1159 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
1160 			    &rq->fence.flags) ? "-" :
1161 		   "",
1162 		   buf,
1163 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1164 		   name);
1165 }
1166 
1167 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
1168 {
1169 	const size_t rowsize = 8 * sizeof(u32);
1170 	const void *prev = NULL;
1171 	bool skip = false;
1172 	size_t pos;
1173 
1174 	for (pos = 0; pos < len; pos += rowsize) {
1175 		char line[128];
1176 
1177 		if (prev && !memcmp(prev, buf + pos, rowsize)) {
1178 			if (!skip) {
1179 				drm_printf(m, "*\n");
1180 				skip = true;
1181 			}
1182 			continue;
1183 		}
1184 
1185 		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
1186 						rowsize, sizeof(u32),
1187 						line, sizeof(line),
1188 						false) >= sizeof(line));
1189 		drm_printf(m, "[%04zx] %s\n", pos, line);
1190 
1191 		prev = buf + pos;
1192 		skip = false;
1193 	}
1194 }
1195 
1196 static void intel_engine_print_registers(struct intel_engine_cs *engine,
1197 					 struct drm_printer *m)
1198 {
1199 	struct drm_i915_private *dev_priv = engine->i915;
1200 	const struct intel_engine_execlists * const execlists =
1201 		&engine->execlists;
1202 	unsigned long flags;
1203 	u64 addr;
1204 
1205 	if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
1206 		drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
1207 	drm_printf(m, "\tRING_START: 0x%08x\n",
1208 		   ENGINE_READ(engine, RING_START));
1209 	drm_printf(m, "\tRING_HEAD:  0x%08x\n",
1210 		   ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
1211 	drm_printf(m, "\tRING_TAIL:  0x%08x\n",
1212 		   ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
1213 	drm_printf(m, "\tRING_CTL:   0x%08x%s\n",
1214 		   ENGINE_READ(engine, RING_CTL),
1215 		   ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
1216 	if (INTEL_GEN(engine->i915) > 2) {
1217 		drm_printf(m, "\tRING_MODE:  0x%08x%s\n",
1218 			   ENGINE_READ(engine, RING_MI_MODE),
1219 			   ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
1220 	}
1221 
1222 	if (INTEL_GEN(dev_priv) >= 6) {
1223 		drm_printf(m, "\tRING_IMR: %08x\n",
1224 			   ENGINE_READ(engine, RING_IMR));
1225 	}
1226 
1227 	addr = intel_engine_get_active_head(engine);
1228 	drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
1229 		   upper_32_bits(addr), lower_32_bits(addr));
1230 	addr = intel_engine_get_last_batch_head(engine);
1231 	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1232 		   upper_32_bits(addr), lower_32_bits(addr));
1233 	if (INTEL_GEN(dev_priv) >= 8)
1234 		addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
1235 	else if (INTEL_GEN(dev_priv) >= 4)
1236 		addr = ENGINE_READ(engine, RING_DMA_FADD);
1237 	else
1238 		addr = ENGINE_READ(engine, DMA_FADD_I8XX);
1239 	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
1240 		   upper_32_bits(addr), lower_32_bits(addr));
1241 	if (INTEL_GEN(dev_priv) >= 4) {
1242 		drm_printf(m, "\tIPEIR: 0x%08x\n",
1243 			   ENGINE_READ(engine, RING_IPEIR));
1244 		drm_printf(m, "\tIPEHR: 0x%08x\n",
1245 			   ENGINE_READ(engine, RING_IPEHR));
1246 	} else {
1247 		drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
1248 		drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
1249 	}
1250 
1251 	if (HAS_EXECLISTS(dev_priv)) {
1252 		struct i915_request * const *port, *rq;
1253 		const u32 *hws =
1254 			&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
1255 		const u8 num_entries = execlists->csb_size;
1256 		unsigned int idx;
1257 		u8 read, write;
1258 
1259 		drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
1260 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
1261 			   ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
1262 			   num_entries);
1263 
1264 		read = execlists->csb_head;
1265 		write = READ_ONCE(*execlists->csb_write);
1266 
1267 		drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
1268 			   read, write,
1269 			   yesno(test_bit(TASKLET_STATE_SCHED,
1270 					  &engine->execlists.tasklet.state)),
1271 			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
1272 		if (read >= num_entries)
1273 			read = 0;
1274 		if (write >= num_entries)
1275 			write = 0;
1276 		if (read > write)
1277 			write += num_entries;
1278 		while (read < write) {
1279 			idx = ++read % num_entries;
1280 			drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
1281 				   idx, hws[idx * 2], hws[idx * 2 + 1]);
1282 		}
1283 
1284 		spin_lock_irqsave(&engine->active.lock, flags);
1285 		for (port = execlists->active; (rq = *port); port++) {
1286 			char hdr[80];
1287 			int len;
1288 
1289 			len = snprintf(hdr, sizeof(hdr),
1290 				       "\t\tActive[%d: ",
1291 				       (int)(port - execlists->active));
1292 			if (!i915_request_signaled(rq))
1293 				len += snprintf(hdr + len, sizeof(hdr) - len,
1294 						"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
1295 						i915_ggtt_offset(rq->ring->vma),
1296 						rq->timeline->hwsp_offset,
1297 						hwsp_seqno(rq));
1298 			snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
1299 			print_request(m, rq, hdr);
1300 		}
1301 		for (port = execlists->pending; (rq = *port); port++) {
1302 			char hdr[80];
1303 
1304 			snprintf(hdr, sizeof(hdr),
1305 				 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
1306 				 (int)(port - execlists->pending),
1307 				 i915_ggtt_offset(rq->ring->vma),
1308 				 rq->timeline->hwsp_offset,
1309 				 hwsp_seqno(rq));
1310 			print_request(m, rq, hdr);
1311 		}
1312 		spin_unlock_irqrestore(&engine->active.lock, flags);
1313 	} else if (INTEL_GEN(dev_priv) > 6) {
1314 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1315 			   ENGINE_READ(engine, RING_PP_DIR_BASE));
1316 		drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1317 			   ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
1318 		drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1319 			   ENGINE_READ(engine, RING_PP_DIR_DCLV));
1320 	}
1321 }
1322 
1323 static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
1324 {
1325 	void *ring;
1326 	int size;
1327 
1328 	drm_printf(m,
1329 		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
1330 		   rq->head, rq->postfix, rq->tail,
1331 		   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1332 		   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1333 
1334 	size = rq->tail - rq->head;
1335 	if (rq->tail < rq->head)
1336 		size += rq->ring->size;
1337 
1338 	ring = kmalloc(size, GFP_ATOMIC);
1339 	if (ring) {
1340 		const void *vaddr = rq->ring->vaddr;
1341 		unsigned int head = rq->head;
1342 		unsigned int len = 0;
1343 
1344 		if (rq->tail < head) {
1345 			len = rq->ring->size - head;
1346 			memcpy(ring, vaddr + head, len);
1347 			head = 0;
1348 		}
1349 		memcpy(ring + len, vaddr + head, size - len);
1350 
1351 		hexdump(m, ring, size);
1352 		kfree(ring);
1353 	}
1354 }
1355 
1356 void intel_engine_dump(struct intel_engine_cs *engine,
1357 		       struct drm_printer *m,
1358 		       const char *header, ...)
1359 {
1360 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
1361 	struct i915_request *rq;
1362 	intel_wakeref_t wakeref;
1363 	unsigned long flags;
1364 
1365 	if (header) {
1366 		va_list ap;
1367 
1368 		va_start(ap, header);
1369 		drm_vprintf(m, header, &ap);
1370 		va_end(ap);
1371 	}
1372 
1373 	if (intel_gt_is_wedged(engine->gt))
1374 		drm_printf(m, "*** WEDGED ***\n");
1375 
1376 	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
1377 	drm_printf(m, "\tHangcheck: %d ms ago\n",
1378 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
1379 	drm_printf(m, "\tReset count: %d (global %d)\n",
1380 		   i915_reset_engine_count(error, engine),
1381 		   i915_reset_count(error));
1382 
1383 	drm_printf(m, "\tRequests:\n");
1384 
1385 	spin_lock_irqsave(&engine->active.lock, flags);
1386 	rq = intel_engine_find_active_request(engine);
1387 	if (rq) {
1388 		print_request(m, rq, "\t\tactive ");
1389 
1390 		drm_printf(m, "\t\tring->start:  0x%08x\n",
1391 			   i915_ggtt_offset(rq->ring->vma));
1392 		drm_printf(m, "\t\tring->head:   0x%08x\n",
1393 			   rq->ring->head);
1394 		drm_printf(m, "\t\tring->tail:   0x%08x\n",
1395 			   rq->ring->tail);
1396 		drm_printf(m, "\t\tring->emit:   0x%08x\n",
1397 			   rq->ring->emit);
1398 		drm_printf(m, "\t\tring->space:  0x%08x\n",
1399 			   rq->ring->space);
1400 		drm_printf(m, "\t\tring->hwsp:   0x%08x\n",
1401 			   rq->timeline->hwsp_offset);
1402 
1403 		print_request_ring(m, rq);
1404 	}
1405 	spin_unlock_irqrestore(&engine->active.lock, flags);
1406 
1407 	drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
1408 	wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
1409 	if (wakeref) {
1410 		intel_engine_print_registers(engine, m);
1411 		intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
1412 	} else {
1413 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
1414 	}
1415 
1416 	intel_execlists_show_requests(engine, m, print_request, 8);
1417 
1418 	drm_printf(m, "HWSP:\n");
1419 	hexdump(m, engine->status_page.addr, PAGE_SIZE);
1420 
1421 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
1422 
1423 	intel_engine_print_breadcrumbs(engine, m);
1424 }
1425 
1426 /**
1427  * intel_enable_engine_stats() - Enable engine busy tracking on engine
1428  * @engine: engine to enable stats collection
1429  *
1430  * Start collecting the engine busyness data for @engine.
1431  *
1432  * Returns 0 on success or a negative error code.
1433  */
1434 int intel_enable_engine_stats(struct intel_engine_cs *engine)
1435 {
1436 	struct intel_engine_execlists *execlists = &engine->execlists;
1437 	unsigned long flags;
1438 	int err = 0;
1439 
1440 	if (!intel_engine_supports_stats(engine))
1441 		return -ENODEV;
1442 
1443 	spin_lock_irqsave(&engine->active.lock, flags);
1444 	write_seqlock(&engine->stats.lock);
1445 
1446 	if (unlikely(engine->stats.enabled == ~0)) {
1447 		err = -EBUSY;
1448 		goto unlock;
1449 	}
1450 
1451 	if (engine->stats.enabled++ == 0) {
1452 		struct i915_request * const *port;
1453 		struct i915_request *rq;
1454 
1455 		engine->stats.enabled_at = ktime_get();
1456 
1457 		/* XXX submission method oblivious? */
1458 		for (port = execlists->active; (rq = *port); port++)
1459 			engine->stats.active++;
1460 
1461 		for (port = execlists->pending; (rq = *port); port++) {
1462 			/* Exclude any contexts already counted in active */
1463 			if (!intel_context_inflight_count(rq->hw_context))
1464 				engine->stats.active++;
1465 		}
1466 
1467 		if (engine->stats.active)
1468 			engine->stats.start = engine->stats.enabled_at;
1469 	}
1470 
1471 unlock:
1472 	write_sequnlock(&engine->stats.lock);
1473 	spin_unlock_irqrestore(&engine->active.lock, flags);
1474 
1475 	return err;
1476 }
1477 
1478 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine)
1479 {
1480 	ktime_t total = engine->stats.total;
1481 
1482 	/*
1483 	 * If the engine is executing something at the moment
1484 	 * add it to the total.
1485 	 */
1486 	if (engine->stats.active)
1487 		total = ktime_add(total,
1488 				  ktime_sub(ktime_get(), engine->stats.start));
1489 
1490 	return total;
1491 }
1492 
1493 /**
1494  * intel_engine_get_busy_time() - Return current accumulated engine busyness
1495  * @engine: engine to report on
1496  *
1497  * Returns accumulated time @engine was busy since engine stats were enabled.
1498  */
1499 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
1500 {
1501 	unsigned int seq;
1502 	ktime_t total;
1503 
1504 	do {
1505 		seq = read_seqbegin(&engine->stats.lock);
1506 		total = __intel_engine_get_busy_time(engine);
1507 	} while (read_seqretry(&engine->stats.lock, seq));
1508 
1509 	return total;
1510 }
1511 
1512 /**
1513  * intel_disable_engine_stats() - Disable engine busy tracking on engine
1514  * @engine: engine to disable stats collection
1515  *
1516  * Stops collecting the engine busyness data for @engine.
1517  */
1518 void intel_disable_engine_stats(struct intel_engine_cs *engine)
1519 {
1520 	unsigned long flags;
1521 
1522 	if (!intel_engine_supports_stats(engine))
1523 		return;
1524 
1525 	write_seqlock_irqsave(&engine->stats.lock, flags);
1526 	WARN_ON_ONCE(engine->stats.enabled == 0);
1527 	if (--engine->stats.enabled == 0) {
1528 		engine->stats.total = __intel_engine_get_busy_time(engine);
1529 		engine->stats.active = 0;
1530 	}
1531 	write_sequnlock_irqrestore(&engine->stats.lock, flags);
1532 }
1533 
1534 static bool match_ring(struct i915_request *rq)
1535 {
1536 	u32 ring = ENGINE_READ(rq->engine, RING_START);
1537 
1538 	return ring == i915_ggtt_offset(rq->ring->vma);
1539 }
1540 
1541 struct i915_request *
1542 intel_engine_find_active_request(struct intel_engine_cs *engine)
1543 {
1544 	struct i915_request *request, *active = NULL;
1545 
1546 	/*
1547 	 * We are called by the error capture, reset and to dump engine
1548 	 * state at random points in time. In particular, note that neither is
1549 	 * crucially ordered with an interrupt. After a hang, the GPU is dead
1550 	 * and we assume that no more writes can happen (we waited long enough
1551 	 * for all writes that were in transaction to be flushed) - adding an
1552 	 * extra delay for a recent interrupt is pointless. Hence, we do
1553 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
1554 	 * At all other times, we must assume the GPU is still running, but
1555 	 * we only care about the snapshot of this moment.
1556 	 */
1557 	lockdep_assert_held(&engine->active.lock);
1558 	list_for_each_entry(request, &engine->active.requests, sched.link) {
1559 		if (i915_request_completed(request))
1560 			continue;
1561 
1562 		if (!i915_request_started(request))
1563 			continue;
1564 
1565 		/* More than one preemptible request may match! */
1566 		if (!match_ring(request))
1567 			continue;
1568 
1569 		active = request;
1570 		break;
1571 	}
1572 
1573 	return active;
1574 }
1575 
1576 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1577 #include "mock_engine.c"
1578 #include "selftest_engine.c"
1579 #include "selftest_engine_cs.c"
1580 #endif
1581