xref: /linux/drivers/gpu/drm/i915/gt/selftest_context.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include "i915_selftest.h"
8 #include "intel_engine_heartbeat.h"
9 #include "intel_engine_pm.h"
10 #include "intel_gt.h"
11 
12 #include "gem/selftests/mock_context.h"
13 #include "selftests/igt_flush_test.h"
14 #include "selftests/mock_drm.h"
15 
16 static int request_sync(struct i915_request *rq)
17 {
18 	struct intel_timeline *tl = i915_request_timeline(rq);
19 	long timeout;
20 	int err = 0;
21 
22 	intel_timeline_get(tl);
23 	i915_request_get(rq);
24 
25 	/* Opencode i915_request_add() so we can keep the timeline locked. */
26 	__i915_request_commit(rq);
27 	__i915_request_queue(rq, NULL);
28 
29 	timeout = i915_request_wait(rq, 0, HZ / 10);
30 	if (timeout < 0)
31 		err = timeout;
32 	else
33 		i915_request_retire_upto(rq);
34 
35 	lockdep_unpin_lock(&tl->mutex, rq->cookie);
36 	mutex_unlock(&tl->mutex);
37 
38 	i915_request_put(rq);
39 	intel_timeline_put(tl);
40 
41 	return err;
42 }
43 
44 static int context_sync(struct intel_context *ce)
45 {
46 	struct intel_timeline *tl = ce->timeline;
47 	int err = 0;
48 
49 	mutex_lock(&tl->mutex);
50 	do {
51 		struct i915_request *rq;
52 		long timeout;
53 
54 		if (list_empty(&tl->requests))
55 			break;
56 
57 		rq = list_last_entry(&tl->requests, typeof(*rq), link);
58 		i915_request_get(rq);
59 
60 		timeout = i915_request_wait(rq, 0, HZ / 10);
61 		if (timeout < 0)
62 			err = timeout;
63 		else
64 			i915_request_retire_upto(rq);
65 
66 		i915_request_put(rq);
67 	} while (!err);
68 	mutex_unlock(&tl->mutex);
69 
70 	return err;
71 }
72 
73 static int __live_context_size(struct intel_engine_cs *engine)
74 {
75 	struct intel_context *ce;
76 	struct i915_request *rq;
77 	void *vaddr;
78 	int err;
79 
80 	ce = intel_context_create(engine);
81 	if (IS_ERR(ce))
82 		return PTR_ERR(ce);
83 
84 	err = intel_context_pin(ce);
85 	if (err)
86 		goto err;
87 
88 	vaddr = i915_gem_object_pin_map(ce->state->obj,
89 					i915_coherent_map_type(engine->i915));
90 	if (IS_ERR(vaddr)) {
91 		err = PTR_ERR(vaddr);
92 		intel_context_unpin(ce);
93 		goto err;
94 	}
95 
96 	/*
97 	 * Note that execlists also applies a redzone which it checks on
98 	 * context unpin when debugging. We are using the same location
99 	 * and same poison value so that our checks overlap. Despite the
100 	 * redundancy, we want to keep this little selftest so that we
101 	 * get coverage of any and all submission backends, and we can
102 	 * always extend this test to ensure we trick the HW into a
103 	 * compromising position wrt to the various sections that need
104 	 * to be written into the context state.
105 	 *
106 	 * TLDR; this overlaps with the execlists redzone.
107 	 */
108 	vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
109 	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
110 
111 	rq = intel_context_create_request(ce);
112 	intel_context_unpin(ce);
113 	if (IS_ERR(rq)) {
114 		err = PTR_ERR(rq);
115 		goto err_unpin;
116 	}
117 
118 	err = request_sync(rq);
119 	if (err)
120 		goto err_unpin;
121 
122 	/* Force the context switch */
123 	rq = intel_engine_create_kernel_request(engine);
124 	if (IS_ERR(rq)) {
125 		err = PTR_ERR(rq);
126 		goto err_unpin;
127 	}
128 	err = request_sync(rq);
129 	if (err)
130 		goto err_unpin;
131 
132 	if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) {
133 		pr_err("%s context overwrote trailing red-zone!", engine->name);
134 		err = -EINVAL;
135 	}
136 
137 err_unpin:
138 	i915_gem_object_unpin_map(ce->state->obj);
139 err:
140 	intel_context_put(ce);
141 	return err;
142 }
143 
144 static int live_context_size(void *arg)
145 {
146 	struct intel_gt *gt = arg;
147 	struct intel_engine_cs *engine;
148 	enum intel_engine_id id;
149 	int err = 0;
150 
151 	/*
152 	 * Check that our context sizes are correct by seeing if the
153 	 * HW tries to write past the end of one.
154 	 */
155 
156 	for_each_engine(engine, gt, id) {
157 		struct {
158 			struct drm_i915_gem_object *state;
159 			void *pinned;
160 		} saved;
161 
162 		if (!engine->context_size)
163 			continue;
164 
165 		intel_engine_pm_get(engine);
166 
167 		/*
168 		 * Hide the old default state -- we lie about the context size
169 		 * and get confused when the default state is smaller than
170 		 * expected. For our do nothing request, inheriting the
171 		 * active state is sufficient, we are only checking that we
172 		 * don't use more than we planned.
173 		 */
174 		saved.state = fetch_and_zero(&engine->default_state);
175 		saved.pinned = fetch_and_zero(&engine->pinned_default_state);
176 
177 		/* Overlaps with the execlists redzone */
178 		engine->context_size += I915_GTT_PAGE_SIZE;
179 
180 		err = __live_context_size(engine);
181 
182 		engine->context_size -= I915_GTT_PAGE_SIZE;
183 
184 		engine->pinned_default_state = saved.pinned;
185 		engine->default_state = saved.state;
186 
187 		intel_engine_pm_put(engine);
188 
189 		if (err)
190 			break;
191 	}
192 
193 	return err;
194 }
195 
196 static int __live_active_context(struct intel_engine_cs *engine)
197 {
198 	unsigned long saved_heartbeat;
199 	struct intel_context *ce;
200 	int pass;
201 	int err;
202 
203 	/*
204 	 * We keep active contexts alive until after a subsequent context
205 	 * switch as the final write from the context-save will be after
206 	 * we retire the final request. We track when we unpin the context,
207 	 * under the presumption that the final pin is from the last request,
208 	 * and instead of immediately unpinning the context, we add a task
209 	 * to unpin the context from the next idle-barrier.
210 	 *
211 	 * This test makes sure that the context is kept alive until a
212 	 * subsequent idle-barrier (emitted when the engine wakeref hits 0
213 	 * with no more outstanding requests).
214 	 */
215 
216 	if (intel_engine_pm_is_awake(engine)) {
217 		pr_err("%s is awake before starting %s!\n",
218 		       engine->name, __func__);
219 		return -EINVAL;
220 	}
221 
222 	ce = intel_context_create(engine);
223 	if (IS_ERR(ce))
224 		return PTR_ERR(ce);
225 
226 	saved_heartbeat = engine->props.heartbeat_interval_ms;
227 	engine->props.heartbeat_interval_ms = 0;
228 
229 	for (pass = 0; pass <= 2; pass++) {
230 		struct i915_request *rq;
231 
232 		intel_engine_pm_get(engine);
233 
234 		rq = intel_context_create_request(ce);
235 		if (IS_ERR(rq)) {
236 			err = PTR_ERR(rq);
237 			goto out_engine;
238 		}
239 
240 		err = request_sync(rq);
241 		if (err)
242 			goto out_engine;
243 
244 		/* Context will be kept active until after an idle-barrier. */
245 		if (i915_active_is_idle(&ce->active)) {
246 			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
247 			       engine->name, pass);
248 			err = -EINVAL;
249 			goto out_engine;
250 		}
251 
252 		if (!intel_engine_pm_is_awake(engine)) {
253 			pr_err("%s is asleep before idle-barrier\n",
254 			       engine->name);
255 			err = -EINVAL;
256 			goto out_engine;
257 		}
258 
259 out_engine:
260 		intel_engine_pm_put(engine);
261 		if (err)
262 			goto err;
263 	}
264 
265 	/* Now make sure our idle-barriers are flushed */
266 	err = intel_engine_flush_barriers(engine);
267 	if (err)
268 		goto err;
269 
270 	/* Wait for the barrier and in the process wait for engine to park */
271 	err = context_sync(engine->kernel_context);
272 	if (err)
273 		goto err;
274 
275 	if (!i915_active_is_idle(&ce->active)) {
276 		pr_err("context is still active!");
277 		err = -EINVAL;
278 	}
279 
280 	intel_engine_pm_flush(engine);
281 
282 	if (intel_engine_pm_is_awake(engine)) {
283 		struct drm_printer p = drm_debug_printer(__func__);
284 
285 		intel_engine_dump(engine, &p,
286 				  "%s is still awake:%d after idle-barriers\n",
287 				  engine->name,
288 				  atomic_read(&engine->wakeref.count));
289 		GEM_TRACE_DUMP();
290 
291 		err = -EINVAL;
292 		goto err;
293 	}
294 
295 err:
296 	engine->props.heartbeat_interval_ms = saved_heartbeat;
297 	intel_context_put(ce);
298 	return err;
299 }
300 
301 static int live_active_context(void *arg)
302 {
303 	struct intel_gt *gt = arg;
304 	struct intel_engine_cs *engine;
305 	enum intel_engine_id id;
306 	int err = 0;
307 
308 	for_each_engine(engine, gt, id) {
309 		err = __live_active_context(engine);
310 		if (err)
311 			break;
312 
313 		err = igt_flush_test(gt->i915);
314 		if (err)
315 			break;
316 	}
317 
318 	return err;
319 }
320 
321 static int __remote_sync(struct intel_context *ce, struct intel_context *remote)
322 {
323 	struct i915_request *rq;
324 	int err;
325 
326 	err = intel_context_pin(remote);
327 	if (err)
328 		return err;
329 
330 	rq = intel_context_create_request(ce);
331 	if (IS_ERR(rq)) {
332 		err = PTR_ERR(rq);
333 		goto unpin;
334 	}
335 
336 	err = intel_context_prepare_remote_request(remote, rq);
337 	if (err) {
338 		i915_request_add(rq);
339 		goto unpin;
340 	}
341 
342 	err = request_sync(rq);
343 
344 unpin:
345 	intel_context_unpin(remote);
346 	return err;
347 }
348 
349 static int __live_remote_context(struct intel_engine_cs *engine)
350 {
351 	struct intel_context *local, *remote;
352 	unsigned long saved_heartbeat;
353 	int pass;
354 	int err;
355 
356 	/*
357 	 * Check that our idle barriers do not interfere with normal
358 	 * activity tracking. In particular, check that operating
359 	 * on the context image remotely (intel_context_prepare_remote_request),
360 	 * which inserts foreign fences into intel_context.active, does not
361 	 * clobber the idle-barrier.
362 	 */
363 
364 	if (intel_engine_pm_is_awake(engine)) {
365 		pr_err("%s is awake before starting %s!\n",
366 		       engine->name, __func__);
367 		return -EINVAL;
368 	}
369 
370 	remote = intel_context_create(engine);
371 	if (IS_ERR(remote))
372 		return PTR_ERR(remote);
373 
374 	local = intel_context_create(engine);
375 	if (IS_ERR(local)) {
376 		err = PTR_ERR(local);
377 		goto err_remote;
378 	}
379 
380 	saved_heartbeat = engine->props.heartbeat_interval_ms;
381 	engine->props.heartbeat_interval_ms = 0;
382 	intel_engine_pm_get(engine);
383 
384 	for (pass = 0; pass <= 2; pass++) {
385 		err = __remote_sync(local, remote);
386 		if (err)
387 			break;
388 
389 		err = __remote_sync(engine->kernel_context, remote);
390 		if (err)
391 			break;
392 
393 		if (i915_active_is_idle(&remote->active)) {
394 			pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n",
395 			       engine->name, pass);
396 			err = -EINVAL;
397 			break;
398 		}
399 	}
400 
401 	intel_engine_pm_put(engine);
402 	engine->props.heartbeat_interval_ms = saved_heartbeat;
403 
404 	intel_context_put(local);
405 err_remote:
406 	intel_context_put(remote);
407 	return err;
408 }
409 
410 static int live_remote_context(void *arg)
411 {
412 	struct intel_gt *gt = arg;
413 	struct intel_engine_cs *engine;
414 	enum intel_engine_id id;
415 	int err = 0;
416 
417 	for_each_engine(engine, gt, id) {
418 		err = __live_remote_context(engine);
419 		if (err)
420 			break;
421 
422 		err = igt_flush_test(gt->i915);
423 		if (err)
424 			break;
425 	}
426 
427 	return err;
428 }
429 
430 int intel_context_live_selftests(struct drm_i915_private *i915)
431 {
432 	static const struct i915_subtest tests[] = {
433 		SUBTEST(live_context_size),
434 		SUBTEST(live_active_context),
435 		SUBTEST(live_remote_context),
436 	};
437 	struct intel_gt *gt = &i915->gt;
438 
439 	if (intel_gt_is_wedged(gt))
440 		return 0;
441 
442 	return intel_gt_live_subtests(tests, gt);
443 }
444