xref: /linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
124f90d66SChris Wilson // SPDX-License-Identifier: MIT
2b5e8e954SChris Wilson /*
3b5e8e954SChris Wilson  * Copyright © 2018 Intel Corporation
4b5e8e954SChris Wilson  */
5b5e8e954SChris Wilson 
6058179e7SChris Wilson #include <linux/sort.h>
7058179e7SChris Wilson 
8b5e8e954SChris Wilson #include "i915_drv.h"
9b5e8e954SChris Wilson 
10b5e8e954SChris Wilson #include "intel_gt_requests.h"
11b5e8e954SChris Wilson #include "i915_selftest.h"
121b90e4a4SChris Wilson #include "selftest_engine_heartbeat.h"
13b5e8e954SChris Wilson 
reset_heartbeat(struct intel_engine_cs * engine)1410c5585bSChris Wilson static void reset_heartbeat(struct intel_engine_cs *engine)
1510c5585bSChris Wilson {
1610c5585bSChris Wilson 	intel_engine_set_heartbeat(engine,
1710c5585bSChris Wilson 				   engine->defaults.heartbeat_interval_ms);
1810c5585bSChris Wilson }
1910c5585bSChris Wilson 
timeline_sync(struct intel_timeline * tl)200cb7da10SChris Wilson static int timeline_sync(struct intel_timeline *tl)
210cb7da10SChris Wilson {
220cb7da10SChris Wilson 	struct dma_fence *fence;
230cb7da10SChris Wilson 	long timeout;
240cb7da10SChris Wilson 
250cb7da10SChris Wilson 	fence = i915_active_fence_get(&tl->last_request);
260cb7da10SChris Wilson 	if (!fence)
270cb7da10SChris Wilson 		return 0;
280cb7da10SChris Wilson 
290cb7da10SChris Wilson 	timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
300cb7da10SChris Wilson 	dma_fence_put(fence);
310cb7da10SChris Wilson 	if (timeout < 0)
320cb7da10SChris Wilson 		return timeout;
330cb7da10SChris Wilson 
340cb7da10SChris Wilson 	return 0;
350cb7da10SChris Wilson }
360cb7da10SChris Wilson 
engine_sync_barrier(struct intel_engine_cs * engine)370cb7da10SChris Wilson static int engine_sync_barrier(struct intel_engine_cs *engine)
380cb7da10SChris Wilson {
390cb7da10SChris Wilson 	return timeline_sync(engine->kernel_context->timeline);
400cb7da10SChris Wilson }
410cb7da10SChris Wilson 
42b5e8e954SChris Wilson struct pulse {
43b5e8e954SChris Wilson 	struct i915_active active;
44b5e8e954SChris Wilson 	struct kref kref;
45b5e8e954SChris Wilson };
46b5e8e954SChris Wilson 
pulse_active(struct i915_active * active)47b5e8e954SChris Wilson static int pulse_active(struct i915_active *active)
48b5e8e954SChris Wilson {
49b5e8e954SChris Wilson 	kref_get(&container_of(active, struct pulse, active)->kref);
50b5e8e954SChris Wilson 	return 0;
51b5e8e954SChris Wilson }
52b5e8e954SChris Wilson 
pulse_free(struct kref * kref)53b5e8e954SChris Wilson static void pulse_free(struct kref *kref)
54b5e8e954SChris Wilson {
559a0a3bebSChris Wilson 	struct pulse *p = container_of(kref, typeof(*p), kref);
569a0a3bebSChris Wilson 
579a0a3bebSChris Wilson 	i915_active_fini(&p->active);
589a0a3bebSChris Wilson 	kfree(p);
59b5e8e954SChris Wilson }
60b5e8e954SChris Wilson 
pulse_put(struct pulse * p)61b5e8e954SChris Wilson static void pulse_put(struct pulse *p)
62b5e8e954SChris Wilson {
63b5e8e954SChris Wilson 	kref_put(&p->kref, pulse_free);
64b5e8e954SChris Wilson }
65b5e8e954SChris Wilson 
pulse_retire(struct i915_active * active)66c3b14760SMatthew Auld static void pulse_retire(struct i915_active *active)
67b5e8e954SChris Wilson {
68b5e8e954SChris Wilson 	pulse_put(container_of(active, struct pulse, active));
69b5e8e954SChris Wilson }
70b5e8e954SChris Wilson 
pulse_create(void)71b5e8e954SChris Wilson static struct pulse *pulse_create(void)
72b5e8e954SChris Wilson {
73b5e8e954SChris Wilson 	struct pulse *p;
74b5e8e954SChris Wilson 
75b5e8e954SChris Wilson 	p = kmalloc(sizeof(*p), GFP_KERNEL);
76b5e8e954SChris Wilson 	if (!p)
77b5e8e954SChris Wilson 		return p;
78b5e8e954SChris Wilson 
79b5e8e954SChris Wilson 	kref_init(&p->kref);
80c3b14760SMatthew Auld 	i915_active_init(&p->active, pulse_active, pulse_retire, 0);
81b5e8e954SChris Wilson 
82b5e8e954SChris Wilson 	return p;
83b5e8e954SChris Wilson }
84b5e8e954SChris Wilson 
pulse_unlock_wait(struct pulse * p)85f79520bbSChris Wilson static void pulse_unlock_wait(struct pulse *p)
86f79520bbSChris Wilson {
8734880b18SAndrzej Hajda 	wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
88f79520bbSChris Wilson }
89f79520bbSChris Wilson 
__live_idle_pulse(struct intel_engine_cs * engine,int (* fn)(struct intel_engine_cs * cs))90b5e8e954SChris Wilson static int __live_idle_pulse(struct intel_engine_cs *engine,
91b5e8e954SChris Wilson 			     int (*fn)(struct intel_engine_cs *cs))
92b5e8e954SChris Wilson {
93b5e8e954SChris Wilson 	struct pulse *p;
94b5e8e954SChris Wilson 	int err;
95b5e8e954SChris Wilson 
96f79520bbSChris Wilson 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
97f79520bbSChris Wilson 
98b5e8e954SChris Wilson 	p = pulse_create();
99b5e8e954SChris Wilson 	if (!p)
100b5e8e954SChris Wilson 		return -ENOMEM;
101b5e8e954SChris Wilson 
102b5e8e954SChris Wilson 	err = i915_active_acquire(&p->active);
103b5e8e954SChris Wilson 	if (err)
104b5e8e954SChris Wilson 		goto out;
105b5e8e954SChris Wilson 
106b5e8e954SChris Wilson 	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
107b5e8e954SChris Wilson 	if (err) {
108b5e8e954SChris Wilson 		i915_active_release(&p->active);
109b5e8e954SChris Wilson 		goto out;
110b5e8e954SChris Wilson 	}
111b5e8e954SChris Wilson 
112b5e8e954SChris Wilson 	i915_active_acquire_barrier(&p->active);
113b5e8e954SChris Wilson 	i915_active_release(&p->active);
114b5e8e954SChris Wilson 
115b5e8e954SChris Wilson 	GEM_BUG_ON(i915_active_is_idle(&p->active));
116f79520bbSChris Wilson 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
117b5e8e954SChris Wilson 
118b5e8e954SChris Wilson 	err = fn(engine);
119b5e8e954SChris Wilson 	if (err)
120b5e8e954SChris Wilson 		goto out;
121b5e8e954SChris Wilson 
122f79520bbSChris Wilson 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
123f79520bbSChris Wilson 
1240cb7da10SChris Wilson 	if (engine_sync_barrier(engine)) {
1255e0c04c8SJani Nikula 		struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
1267983990cSChris Wilson 
127*d2dda3bfSJani Nikula 		drm_printf(&m, "%s: no heartbeat pulse?\n", engine->name);
1287983990cSChris Wilson 		intel_engine_dump(engine, &m, "%s", engine->name);
1297983990cSChris Wilson 
130b5e8e954SChris Wilson 		err = -ETIME;
131b5e8e954SChris Wilson 		goto out;
132b5e8e954SChris Wilson 	}
133b5e8e954SChris Wilson 
1341db257c5SChris Wilson 	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
1351db257c5SChris Wilson 
136f79520bbSChris Wilson 	pulse_unlock_wait(p); /* synchronize with the retirement callback */
137f79520bbSChris Wilson 
138b5e8e954SChris Wilson 	if (!i915_active_is_idle(&p->active)) {
1395e0c04c8SJani Nikula 		struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
140164a4128SChris Wilson 
141*d2dda3bfSJani Nikula 		drm_printf(&m, "%s: heartbeat pulse did not flush idle tasks\n",
142b5e8e954SChris Wilson 			   engine->name);
143164a4128SChris Wilson 		i915_active_print(&p->active, &m);
144164a4128SChris Wilson 
145b5e8e954SChris Wilson 		err = -EINVAL;
146b5e8e954SChris Wilson 		goto out;
147b5e8e954SChris Wilson 	}
148b5e8e954SChris Wilson 
149b5e8e954SChris Wilson out:
150b5e8e954SChris Wilson 	pulse_put(p);
151b5e8e954SChris Wilson 	return err;
152b5e8e954SChris Wilson }
153b5e8e954SChris Wilson 
live_idle_flush(void * arg)154b5e8e954SChris Wilson static int live_idle_flush(void *arg)
155b5e8e954SChris Wilson {
156b5e8e954SChris Wilson 	struct intel_gt *gt = arg;
157b5e8e954SChris Wilson 	struct intel_engine_cs *engine;
158b5e8e954SChris Wilson 	enum intel_engine_id id;
159b5e8e954SChris Wilson 	int err = 0;
160b5e8e954SChris Wilson 
161b5e8e954SChris Wilson 	/* Check that we can flush the idle barriers */
162b5e8e954SChris Wilson 
163b5e8e954SChris Wilson 	for_each_engine(engine, gt, id) {
1641b90e4a4SChris Wilson 		st_engine_heartbeat_disable(engine);
165b5e8e954SChris Wilson 		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
1661b90e4a4SChris Wilson 		st_engine_heartbeat_enable(engine);
167b5e8e954SChris Wilson 		if (err)
168b5e8e954SChris Wilson 			break;
169b5e8e954SChris Wilson 	}
170b5e8e954SChris Wilson 
171b5e8e954SChris Wilson 	return err;
172b5e8e954SChris Wilson }
173b5e8e954SChris Wilson 
live_idle_pulse(void * arg)174b5e8e954SChris Wilson static int live_idle_pulse(void *arg)
175b5e8e954SChris Wilson {
176b5e8e954SChris Wilson 	struct intel_gt *gt = arg;
177b5e8e954SChris Wilson 	struct intel_engine_cs *engine;
178b5e8e954SChris Wilson 	enum intel_engine_id id;
179b5e8e954SChris Wilson 	int err = 0;
180b5e8e954SChris Wilson 
181b5e8e954SChris Wilson 	/* Check that heartbeat pulses flush the idle barriers */
182b5e8e954SChris Wilson 
183b5e8e954SChris Wilson 	for_each_engine(engine, gt, id) {
1841b90e4a4SChris Wilson 		st_engine_heartbeat_disable(engine);
185b5e8e954SChris Wilson 		err = __live_idle_pulse(engine, intel_engine_pulse);
1861b90e4a4SChris Wilson 		st_engine_heartbeat_enable(engine);
187b5e8e954SChris Wilson 		if (err && err != -ENODEV)
188b5e8e954SChris Wilson 			break;
189b5e8e954SChris Wilson 
190b5e8e954SChris Wilson 		err = 0;
191b5e8e954SChris Wilson 	}
192b5e8e954SChris Wilson 
193b5e8e954SChris Wilson 	return err;
194b5e8e954SChris Wilson }
195b5e8e954SChris Wilson 
__live_heartbeat_off(struct intel_engine_cs * engine)196058179e7SChris Wilson static int __live_heartbeat_off(struct intel_engine_cs *engine)
197058179e7SChris Wilson {
198058179e7SChris Wilson 	int err;
199058179e7SChris Wilson 
200058179e7SChris Wilson 	intel_engine_pm_get(engine);
201058179e7SChris Wilson 
202058179e7SChris Wilson 	engine->serial++;
203058179e7SChris Wilson 	flush_delayed_work(&engine->heartbeat.work);
204058179e7SChris Wilson 	if (!delayed_work_pending(&engine->heartbeat.work)) {
205058179e7SChris Wilson 		pr_err("%s: heartbeat not running\n",
206058179e7SChris Wilson 		       engine->name);
207058179e7SChris Wilson 		err = -EINVAL;
208058179e7SChris Wilson 		goto err_pm;
209058179e7SChris Wilson 	}
210058179e7SChris Wilson 
211058179e7SChris Wilson 	err = intel_engine_set_heartbeat(engine, 0);
212058179e7SChris Wilson 	if (err)
213058179e7SChris Wilson 		goto err_pm;
214058179e7SChris Wilson 
215058179e7SChris Wilson 	engine->serial++;
216058179e7SChris Wilson 	flush_delayed_work(&engine->heartbeat.work);
217058179e7SChris Wilson 	if (delayed_work_pending(&engine->heartbeat.work)) {
218058179e7SChris Wilson 		pr_err("%s: heartbeat still running\n",
219058179e7SChris Wilson 		       engine->name);
220058179e7SChris Wilson 		err = -EINVAL;
221058179e7SChris Wilson 		goto err_beat;
222058179e7SChris Wilson 	}
223058179e7SChris Wilson 
224058179e7SChris Wilson 	if (READ_ONCE(engine->heartbeat.systole)) {
225058179e7SChris Wilson 		pr_err("%s: heartbeat still allocated\n",
226058179e7SChris Wilson 		       engine->name);
227058179e7SChris Wilson 		err = -EINVAL;
228058179e7SChris Wilson 		goto err_beat;
229058179e7SChris Wilson 	}
230058179e7SChris Wilson 
231058179e7SChris Wilson err_beat:
23210c5585bSChris Wilson 	reset_heartbeat(engine);
233058179e7SChris Wilson err_pm:
234058179e7SChris Wilson 	intel_engine_pm_put(engine);
235058179e7SChris Wilson 	return err;
236058179e7SChris Wilson }
237058179e7SChris Wilson 
live_heartbeat_off(void * arg)238058179e7SChris Wilson static int live_heartbeat_off(void *arg)
239058179e7SChris Wilson {
240058179e7SChris Wilson 	struct intel_gt *gt = arg;
241058179e7SChris Wilson 	struct intel_engine_cs *engine;
242058179e7SChris Wilson 	enum intel_engine_id id;
243370831fcSChris Wilson 	int err = 0;
244058179e7SChris Wilson 
245058179e7SChris Wilson 	/* Check that we can turn off heartbeat and not interrupt VIP */
2461a839e01SLucas De Marchi 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
247058179e7SChris Wilson 		return 0;
248058179e7SChris Wilson 
249058179e7SChris Wilson 	for_each_engine(engine, gt, id) {
250058179e7SChris Wilson 		if (!intel_engine_has_preemption(engine))
251058179e7SChris Wilson 			continue;
252058179e7SChris Wilson 
253058179e7SChris Wilson 		err = __live_heartbeat_off(engine);
254058179e7SChris Wilson 		if (err)
255058179e7SChris Wilson 			break;
256058179e7SChris Wilson 	}
257058179e7SChris Wilson 
258058179e7SChris Wilson 	return err;
259058179e7SChris Wilson }
260058179e7SChris Wilson 
intel_heartbeat_live_selftests(struct drm_i915_private * i915)261b5e8e954SChris Wilson int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
262b5e8e954SChris Wilson {
263b5e8e954SChris Wilson 	static const struct i915_subtest tests[] = {
264b5e8e954SChris Wilson 		SUBTEST(live_idle_flush),
265b5e8e954SChris Wilson 		SUBTEST(live_idle_pulse),
266058179e7SChris Wilson 		SUBTEST(live_heartbeat_off),
267b5e8e954SChris Wilson 	};
268b5e8e954SChris Wilson 	int saved_hangcheck;
269b5e8e954SChris Wilson 	int err;
270b5e8e954SChris Wilson 
271c14adcbdSMichał Winiarski 	if (intel_gt_is_wedged(to_gt(i915)))
272b5e8e954SChris Wilson 		return 0;
273b5e8e954SChris Wilson 
2748a25c4beSJani Nikula 	saved_hangcheck = i915->params.enable_hangcheck;
2758a25c4beSJani Nikula 	i915->params.enable_hangcheck = INT_MAX;
276b5e8e954SChris Wilson 
277c14adcbdSMichał Winiarski 	err = intel_gt_live_subtests(tests, to_gt(i915));
278b5e8e954SChris Wilson 
2798a25c4beSJani Nikula 	i915->params.enable_hangcheck = saved_hangcheck;
280b5e8e954SChris Wilson 	return err;
281b5e8e954SChris Wilson }
2821b90e4a4SChris Wilson 
st_engine_heartbeat_disable(struct intel_engine_cs * engine)2831b90e4a4SChris Wilson void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
2841b90e4a4SChris Wilson {
2851b90e4a4SChris Wilson 	engine->props.heartbeat_interval_ms = 0;
2861b90e4a4SChris Wilson 
2871b90e4a4SChris Wilson 	intel_engine_pm_get(engine);
2881b90e4a4SChris Wilson 	intel_engine_park_heartbeat(engine);
2891b90e4a4SChris Wilson }
2901b90e4a4SChris Wilson 
st_engine_heartbeat_enable(struct intel_engine_cs * engine)2911b90e4a4SChris Wilson void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
2921b90e4a4SChris Wilson {
2931b90e4a4SChris Wilson 	intel_engine_pm_put(engine);
2941b90e4a4SChris Wilson 
2951b90e4a4SChris Wilson 	engine->props.heartbeat_interval_ms =
2961b90e4a4SChris Wilson 		engine->defaults.heartbeat_interval_ms;
2971b90e4a4SChris Wilson }
298617e87c0SJohn Harrison 
st_engine_heartbeat_disable_no_pm(struct intel_engine_cs * engine)299617e87c0SJohn Harrison void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
300617e87c0SJohn Harrison {
301617e87c0SJohn Harrison 	engine->props.heartbeat_interval_ms = 0;
302617e87c0SJohn Harrison 
303617e87c0SJohn Harrison 	/*
304617e87c0SJohn Harrison 	 * Park the heartbeat but without holding the PM lock as that
305617e87c0SJohn Harrison 	 * makes the engines appear not-idle. Note that if/when unpark
306617e87c0SJohn Harrison 	 * is called due to the PM lock being acquired later the
307617e87c0SJohn Harrison 	 * heartbeat still won't be enabled because of the above = 0.
308617e87c0SJohn Harrison 	 */
309617e87c0SJohn Harrison 	if (intel_engine_pm_get_if_awake(engine)) {
310617e87c0SJohn Harrison 		intel_engine_park_heartbeat(engine);
311617e87c0SJohn Harrison 		intel_engine_pm_put(engine);
312617e87c0SJohn Harrison 	}
313617e87c0SJohn Harrison }
314617e87c0SJohn Harrison 
st_engine_heartbeat_enable_no_pm(struct intel_engine_cs * engine)315617e87c0SJohn Harrison void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
316617e87c0SJohn Harrison {
317617e87c0SJohn Harrison 	engine->props.heartbeat_interval_ms =
318617e87c0SJohn Harrison 		engine->defaults.heartbeat_interval_ms;
319617e87c0SJohn Harrison }
320