xref: /linux/drivers/gpu/drm/i915/gt/selftest_context.c (revision 6fdcba32711044c35c0e1b094cbd8f3f0b4472c9)
1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include "i915_selftest.h"
8 #include "intel_engine_pm.h"
9 #include "intel_gt.h"
10 
11 #include "gem/selftests/mock_context.h"
12 #include "selftests/igt_flush_test.h"
13 #include "selftests/mock_drm.h"
14 
15 static int request_sync(struct i915_request *rq)
16 {
17 	struct intel_timeline *tl = i915_request_timeline(rq);
18 	long timeout;
19 	int err = 0;
20 
21 	intel_timeline_get(tl);
22 	i915_request_get(rq);
23 
24 	/* Opencode i915_request_add() so we can keep the timeline locked. */
25 	__i915_request_commit(rq);
26 	__i915_request_queue(rq, NULL);
27 
28 	timeout = i915_request_wait(rq, 0, HZ / 10);
29 	if (timeout < 0)
30 		err = timeout;
31 	else
32 		i915_request_retire_upto(rq);
33 
34 	lockdep_unpin_lock(&tl->mutex, rq->cookie);
35 	mutex_unlock(&tl->mutex);
36 
37 	i915_request_put(rq);
38 	intel_timeline_put(tl);
39 
40 	return err;
41 }
42 
43 static int context_sync(struct intel_context *ce)
44 {
45 	struct intel_timeline *tl = ce->timeline;
46 	int err = 0;
47 
48 	mutex_lock(&tl->mutex);
49 	do {
50 		struct dma_fence *fence;
51 		long timeout;
52 
53 		fence = i915_active_fence_get(&tl->last_request);
54 		if (!fence)
55 			break;
56 
57 		timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
58 		if (timeout < 0)
59 			err = timeout;
60 		else
61 			i915_request_retire_upto(to_request(fence));
62 
63 		dma_fence_put(fence);
64 	} while (!err);
65 	mutex_unlock(&tl->mutex);
66 
67 	return err;
68 }
69 
70 static int __live_context_size(struct intel_engine_cs *engine,
71 			       struct i915_gem_context *fixme)
72 {
73 	struct intel_context *ce;
74 	struct i915_request *rq;
75 	void *vaddr;
76 	int err;
77 
78 	ce = intel_context_create(fixme, engine);
79 	if (IS_ERR(ce))
80 		return PTR_ERR(ce);
81 
82 	err = intel_context_pin(ce);
83 	if (err)
84 		goto err;
85 
86 	vaddr = i915_gem_object_pin_map(ce->state->obj,
87 					i915_coherent_map_type(engine->i915));
88 	if (IS_ERR(vaddr)) {
89 		err = PTR_ERR(vaddr);
90 		intel_context_unpin(ce);
91 		goto err;
92 	}
93 
94 	/*
95 	 * Note that execlists also applies a redzone which it checks on
96 	 * context unpin when debugging. We are using the same location
97 	 * and same poison value so that our checks overlap. Despite the
98 	 * redundancy, we want to keep this little selftest so that we
99 	 * get coverage of any and all submission backends, and we can
100 	 * always extend this test to ensure we trick the HW into a
101 	 * compromising position wrt to the various sections that need
102 	 * to be written into the context state.
103 	 *
104 	 * TLDR; this overlaps with the execlists redzone.
105 	 */
106 	vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
107 	memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
108 
109 	rq = intel_context_create_request(ce);
110 	intel_context_unpin(ce);
111 	if (IS_ERR(rq)) {
112 		err = PTR_ERR(rq);
113 		goto err_unpin;
114 	}
115 
116 	err = request_sync(rq);
117 	if (err)
118 		goto err_unpin;
119 
120 	/* Force the context switch */
121 	rq = i915_request_create(engine->kernel_context);
122 	if (IS_ERR(rq)) {
123 		err = PTR_ERR(rq);
124 		goto err_unpin;
125 	}
126 	err = request_sync(rq);
127 	if (err)
128 		goto err_unpin;
129 
130 	if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) {
131 		pr_err("%s context overwrote trailing red-zone!", engine->name);
132 		err = -EINVAL;
133 	}
134 
135 err_unpin:
136 	i915_gem_object_unpin_map(ce->state->obj);
137 err:
138 	intel_context_put(ce);
139 	return err;
140 }
141 
142 static int live_context_size(void *arg)
143 {
144 	struct intel_gt *gt = arg;
145 	struct intel_engine_cs *engine;
146 	struct i915_gem_context *fixme;
147 	enum intel_engine_id id;
148 	int err = 0;
149 
150 	/*
151 	 * Check that our context sizes are correct by seeing if the
152 	 * HW tries to write past the end of one.
153 	 */
154 
155 	fixme = kernel_context(gt->i915);
156 	if (IS_ERR(fixme))
157 		return PTR_ERR(fixme);
158 
159 	for_each_engine(engine, gt, id) {
160 		struct {
161 			struct drm_i915_gem_object *state;
162 			void *pinned;
163 		} saved;
164 
165 		if (!engine->context_size)
166 			continue;
167 
168 		intel_engine_pm_get(engine);
169 
170 		/*
171 		 * Hide the old default state -- we lie about the context size
172 		 * and get confused when the default state is smaller than
173 		 * expected. For our do nothing request, inheriting the
174 		 * active state is sufficient, we are only checking that we
175 		 * don't use more than we planned.
176 		 */
177 		saved.state = fetch_and_zero(&engine->default_state);
178 		saved.pinned = fetch_and_zero(&engine->pinned_default_state);
179 
180 		/* Overlaps with the execlists redzone */
181 		engine->context_size += I915_GTT_PAGE_SIZE;
182 
183 		err = __live_context_size(engine, fixme);
184 
185 		engine->context_size -= I915_GTT_PAGE_SIZE;
186 
187 		engine->pinned_default_state = saved.pinned;
188 		engine->default_state = saved.state;
189 
190 		intel_engine_pm_put(engine);
191 
192 		if (err)
193 			break;
194 	}
195 
196 	kernel_context_close(fixme);
197 	return err;
198 }
199 
200 static int __live_active_context(struct intel_engine_cs *engine,
201 				 struct i915_gem_context *fixme)
202 {
203 	struct intel_context *ce;
204 	int pass;
205 	int err;
206 
207 	/*
208 	 * We keep active contexts alive until after a subsequent context
209 	 * switch as the final write from the context-save will be after
210 	 * we retire the final request. We track when we unpin the context,
211 	 * under the presumption that the final pin is from the last request,
212 	 * and instead of immediately unpinning the context, we add a task
213 	 * to unpin the context from the next idle-barrier.
214 	 *
215 	 * This test makes sure that the context is kept alive until a
216 	 * subsequent idle-barrier (emitted when the engine wakeref hits 0
217 	 * with no more outstanding requests).
218 	 */
219 
220 	if (intel_engine_pm_is_awake(engine)) {
221 		pr_err("%s is awake before starting %s!\n",
222 		       engine->name, __func__);
223 		return -EINVAL;
224 	}
225 
226 	ce = intel_context_create(fixme, engine);
227 	if (IS_ERR(ce))
228 		return PTR_ERR(ce);
229 
230 	for (pass = 0; pass <= 2; pass++) {
231 		struct i915_request *rq;
232 
233 		rq = intel_context_create_request(ce);
234 		if (IS_ERR(rq)) {
235 			err = PTR_ERR(rq);
236 			goto err;
237 		}
238 
239 		err = request_sync(rq);
240 		if (err)
241 			goto err;
242 
243 		/* Context will be kept active until after an idle-barrier. */
244 		if (i915_active_is_idle(&ce->active)) {
245 			pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
246 			       engine->name, pass);
247 			err = -EINVAL;
248 			goto err;
249 		}
250 
251 		if (!intel_engine_pm_is_awake(engine)) {
252 			pr_err("%s is asleep before idle-barrier\n",
253 			       engine->name);
254 			err = -EINVAL;
255 			goto err;
256 		}
257 	}
258 
259 	/* Now make sure our idle-barriers are flushed */
260 	err = context_sync(engine->kernel_context);
261 	if (err)
262 		goto err;
263 
264 	if (!i915_active_is_idle(&ce->active)) {
265 		pr_err("context is still active!");
266 		err = -EINVAL;
267 	}
268 
269 	if (intel_engine_pm_is_awake(engine)) {
270 		struct drm_printer p = drm_debug_printer(__func__);
271 
272 		intel_engine_dump(engine, &p,
273 				  "%s is still awake after idle-barriers\n",
274 				  engine->name);
275 		GEM_TRACE_DUMP();
276 
277 		err = -EINVAL;
278 		goto err;
279 	}
280 
281 err:
282 	intel_context_put(ce);
283 	return err;
284 }
285 
286 static int live_active_context(void *arg)
287 {
288 	struct intel_gt *gt = arg;
289 	struct intel_engine_cs *engine;
290 	struct i915_gem_context *fixme;
291 	enum intel_engine_id id;
292 	struct drm_file *file;
293 	int err = 0;
294 
295 	file = mock_file(gt->i915);
296 	if (IS_ERR(file))
297 		return PTR_ERR(file);
298 
299 	fixme = live_context(gt->i915, file);
300 	if (IS_ERR(fixme)) {
301 		err = PTR_ERR(fixme);
302 		goto out_file;
303 	}
304 
305 	for_each_engine(engine, gt, id) {
306 		err = __live_active_context(engine, fixme);
307 		if (err)
308 			break;
309 
310 		err = igt_flush_test(gt->i915);
311 		if (err)
312 			break;
313 	}
314 
315 out_file:
316 	mock_file_free(gt->i915, file);
317 	return err;
318 }
319 
320 static int __remote_sync(struct intel_context *ce, struct intel_context *remote)
321 {
322 	struct i915_request *rq;
323 	int err;
324 
325 	err = intel_context_pin(remote);
326 	if (err)
327 		return err;
328 
329 	rq = intel_context_create_request(ce);
330 	if (IS_ERR(rq)) {
331 		err = PTR_ERR(rq);
332 		goto unpin;
333 	}
334 
335 	err = intel_context_prepare_remote_request(remote, rq);
336 	if (err) {
337 		i915_request_add(rq);
338 		goto unpin;
339 	}
340 
341 	err = request_sync(rq);
342 
343 unpin:
344 	intel_context_unpin(remote);
345 	return err;
346 }
347 
348 static int __live_remote_context(struct intel_engine_cs *engine,
349 				 struct i915_gem_context *fixme)
350 {
351 	struct intel_context *local, *remote;
352 	int pass;
353 	int err;
354 
355 	/*
356 	 * Check that our idle barriers do not interfere with normal
357 	 * activity tracking. In particular, check that operating
358 	 * on the context image remotely (intel_context_prepare_remote_request),
359 	 * which inserts foreign fences into intel_context.active, does not
360 	 * clobber the idle-barrier.
361 	 */
362 
363 	remote = intel_context_create(fixme, engine);
364 	if (IS_ERR(remote))
365 		return PTR_ERR(remote);
366 
367 	local = intel_context_create(fixme, engine);
368 	if (IS_ERR(local)) {
369 		err = PTR_ERR(local);
370 		goto err_remote;
371 	}
372 
373 	for (pass = 0; pass <= 2; pass++) {
374 		err = __remote_sync(local, remote);
375 		if (err)
376 			break;
377 
378 		err = __remote_sync(engine->kernel_context, remote);
379 		if (err)
380 			break;
381 
382 		if (i915_active_is_idle(&remote->active)) {
383 			pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n",
384 			       engine->name, pass);
385 			err = -EINVAL;
386 			break;
387 		}
388 	}
389 
390 	intel_context_put(local);
391 err_remote:
392 	intel_context_put(remote);
393 	return err;
394 }
395 
396 static int live_remote_context(void *arg)
397 {
398 	struct intel_gt *gt = arg;
399 	struct intel_engine_cs *engine;
400 	struct i915_gem_context *fixme;
401 	enum intel_engine_id id;
402 	struct drm_file *file;
403 	int err = 0;
404 
405 	file = mock_file(gt->i915);
406 	if (IS_ERR(file))
407 		return PTR_ERR(file);
408 
409 	fixme = live_context(gt->i915, file);
410 	if (IS_ERR(fixme)) {
411 		err = PTR_ERR(fixme);
412 		goto out_file;
413 	}
414 
415 	for_each_engine(engine, gt, id) {
416 		err = __live_remote_context(engine, fixme);
417 		if (err)
418 			break;
419 
420 		err = igt_flush_test(gt->i915);
421 		if (err)
422 			break;
423 	}
424 
425 out_file:
426 	mock_file_free(gt->i915, file);
427 	return err;
428 }
429 
430 int intel_context_live_selftests(struct drm_i915_private *i915)
431 {
432 	static const struct i915_subtest tests[] = {
433 		SUBTEST(live_context_size),
434 		SUBTEST(live_active_context),
435 		SUBTEST(live_remote_context),
436 	};
437 	struct intel_gt *gt = &i915->gt;
438 
439 	if (intel_gt_is_wedged(gt))
440 		return 0;
441 
442 	return intel_gt_live_subtests(tests, gt);
443 }
444