xref: /linux/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c (revision add452d09a38c7a7c44aea55c1015392cebf9fa7)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2018 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "i915_drv.h"
9 
10 #include "intel_gt_requests.h"
11 #include "i915_selftest.h"
12 #include "selftest_engine_heartbeat.h"
13 
14 static void reset_heartbeat(struct intel_engine_cs *engine)
15 {
16 	intel_engine_set_heartbeat(engine,
17 				   engine->defaults.heartbeat_interval_ms);
18 }
19 
20 static int timeline_sync(struct intel_timeline *tl)
21 {
22 	struct dma_fence *fence;
23 	long timeout;
24 
25 	fence = i915_active_fence_get(&tl->last_request);
26 	if (!fence)
27 		return 0;
28 
29 	timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
30 	dma_fence_put(fence);
31 	if (timeout < 0)
32 		return timeout;
33 
34 	return 0;
35 }
36 
37 static int engine_sync_barrier(struct intel_engine_cs *engine)
38 {
39 	return timeline_sync(engine->kernel_context->timeline);
40 }
41 
42 struct pulse {
43 	struct i915_active active;
44 	struct kref kref;
45 };
46 
47 static int pulse_active(struct i915_active *active)
48 {
49 	kref_get(&container_of(active, struct pulse, active)->kref);
50 	return 0;
51 }
52 
53 static void pulse_free(struct kref *kref)
54 {
55 	struct pulse *p = container_of(kref, typeof(*p), kref);
56 
57 	i915_active_fini(&p->active);
58 	kfree(p);
59 }
60 
61 static void pulse_put(struct pulse *p)
62 {
63 	kref_put(&p->kref, pulse_free);
64 }
65 
66 static void pulse_retire(struct i915_active *active)
67 {
68 	pulse_put(container_of(active, struct pulse, active));
69 }
70 
71 static struct pulse *pulse_create(void)
72 {
73 	struct pulse *p;
74 
75 	p = kmalloc(sizeof(*p), GFP_KERNEL);
76 	if (!p)
77 		return p;
78 
79 	kref_init(&p->kref);
80 	i915_active_init(&p->active, pulse_active, pulse_retire, 0);
81 
82 	return p;
83 }
84 
85 static void pulse_unlock_wait(struct pulse *p)
86 {
87 	wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
88 }
89 
90 static int __live_idle_pulse(struct intel_engine_cs *engine,
91 			     int (*fn)(struct intel_engine_cs *cs))
92 {
93 	struct pulse *p;
94 	int err;
95 
96 	GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
97 
98 	p = pulse_create();
99 	if (!p)
100 		return -ENOMEM;
101 
102 	err = i915_active_acquire(&p->active);
103 	if (err)
104 		goto out;
105 
106 	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
107 	if (err) {
108 		i915_active_release(&p->active);
109 		goto out;
110 	}
111 
112 	i915_active_acquire_barrier(&p->active);
113 	i915_active_release(&p->active);
114 
115 	GEM_BUG_ON(i915_active_is_idle(&p->active));
116 	GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
117 
118 	err = fn(engine);
119 	if (err)
120 		goto out;
121 
122 	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
123 
124 	if (engine_sync_barrier(engine)) {
125 		struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
126 
127 		drm_printf(&m, "%s: no heartbeat pulse?\n", engine->name);
128 		intel_engine_dump(engine, &m, "%s", engine->name);
129 
130 		err = -ETIME;
131 		goto out;
132 	}
133 
134 	GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
135 
136 	pulse_unlock_wait(p); /* synchronize with the retirement callback */
137 
138 	if (!i915_active_is_idle(&p->active)) {
139 		struct drm_printer m = drm_err_printer(&engine->i915->drm, "pulse");
140 
141 		drm_printf(&m, "%s: heartbeat pulse did not flush idle tasks\n",
142 			   engine->name);
143 		i915_active_print(&p->active, &m);
144 
145 		err = -EINVAL;
146 		goto out;
147 	}
148 
149 out:
150 	pulse_put(p);
151 	return err;
152 }
153 
154 static int live_idle_flush(void *arg)
155 {
156 	struct intel_gt *gt = arg;
157 	struct intel_engine_cs *engine;
158 	enum intel_engine_id id;
159 	int err = 0;
160 
161 	/* Check that we can flush the idle barriers */
162 
163 	for_each_engine(engine, gt, id) {
164 		st_engine_heartbeat_disable(engine);
165 		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
166 		st_engine_heartbeat_enable(engine);
167 		if (err)
168 			break;
169 	}
170 
171 	return err;
172 }
173 
174 static int live_idle_pulse(void *arg)
175 {
176 	struct intel_gt *gt = arg;
177 	struct intel_engine_cs *engine;
178 	enum intel_engine_id id;
179 	int err = 0;
180 
181 	/* Check that heartbeat pulses flush the idle barriers */
182 
183 	for_each_engine(engine, gt, id) {
184 		st_engine_heartbeat_disable(engine);
185 		err = __live_idle_pulse(engine, intel_engine_pulse);
186 		st_engine_heartbeat_enable(engine);
187 		if (err && err != -ENODEV)
188 			break;
189 
190 		err = 0;
191 	}
192 
193 	return err;
194 }
195 
196 static int __live_heartbeat_off(struct intel_engine_cs *engine)
197 {
198 	int err;
199 
200 	intel_engine_pm_get(engine);
201 
202 	engine->serial++;
203 	flush_delayed_work(&engine->heartbeat.work);
204 	if (!delayed_work_pending(&engine->heartbeat.work)) {
205 		pr_err("%s: heartbeat not running\n",
206 		       engine->name);
207 		err = -EINVAL;
208 		goto err_pm;
209 	}
210 
211 	err = intel_engine_set_heartbeat(engine, 0);
212 	if (err)
213 		goto err_pm;
214 
215 	engine->serial++;
216 	flush_delayed_work(&engine->heartbeat.work);
217 	if (delayed_work_pending(&engine->heartbeat.work)) {
218 		pr_err("%s: heartbeat still running\n",
219 		       engine->name);
220 		err = -EINVAL;
221 		goto err_beat;
222 	}
223 
224 	if (READ_ONCE(engine->heartbeat.systole)) {
225 		pr_err("%s: heartbeat still allocated\n",
226 		       engine->name);
227 		err = -EINVAL;
228 		goto err_beat;
229 	}
230 
231 err_beat:
232 	reset_heartbeat(engine);
233 err_pm:
234 	intel_engine_pm_put(engine);
235 	return err;
236 }
237 
238 static int live_heartbeat_off(void *arg)
239 {
240 	struct intel_gt *gt = arg;
241 	struct intel_engine_cs *engine;
242 	enum intel_engine_id id;
243 	int err = 0;
244 
245 	/* Check that we can turn off heartbeat and not interrupt VIP */
246 	if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
247 		return 0;
248 
249 	for_each_engine(engine, gt, id) {
250 		if (!intel_engine_has_preemption(engine))
251 			continue;
252 
253 		err = __live_heartbeat_off(engine);
254 		if (err)
255 			break;
256 	}
257 
258 	return err;
259 }
260 
261 int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
262 {
263 	static const struct i915_subtest tests[] = {
264 		SUBTEST(live_idle_flush),
265 		SUBTEST(live_idle_pulse),
266 		SUBTEST(live_heartbeat_off),
267 	};
268 	int saved_hangcheck;
269 	int err;
270 
271 	if (intel_gt_is_wedged(to_gt(i915)))
272 		return 0;
273 
274 	saved_hangcheck = i915->params.enable_hangcheck;
275 	i915->params.enable_hangcheck = INT_MAX;
276 
277 	err = intel_gt_live_subtests(tests, to_gt(i915));
278 
279 	i915->params.enable_hangcheck = saved_hangcheck;
280 	return err;
281 }
282 
283 void st_engine_heartbeat_disable(struct intel_engine_cs *engine)
284 {
285 	engine->props.heartbeat_interval_ms = 0;
286 
287 	intel_engine_pm_get(engine);
288 	intel_engine_park_heartbeat(engine);
289 }
290 
291 void st_engine_heartbeat_enable(struct intel_engine_cs *engine)
292 {
293 	intel_engine_pm_put(engine);
294 
295 	engine->props.heartbeat_interval_ms =
296 		engine->defaults.heartbeat_interval_ms;
297 }
298 
299 void st_engine_heartbeat_disable_no_pm(struct intel_engine_cs *engine)
300 {
301 	engine->props.heartbeat_interval_ms = 0;
302 
303 	/*
304 	 * Park the heartbeat but without holding the PM lock as that
305 	 * makes the engines appear not-idle. Note that if/when unpark
306 	 * is called due to the PM lock being acquired later the
307 	 * heartbeat still won't be enabled because of the above = 0.
308 	 */
309 	if (intel_engine_pm_get_if_awake(engine)) {
310 		intel_engine_park_heartbeat(engine);
311 		intel_engine_pm_put(engine);
312 	}
313 }
314 
315 void st_engine_heartbeat_enable_no_pm(struct intel_engine_cs *engine)
316 {
317 	engine->props.heartbeat_interval_ms =
318 		engine->defaults.heartbeat_interval_ms;
319 }
320