xref: /linux/drivers/gpu/drm/i915/gt/uc/selftest_guc.c (revision db5d28c0bfe566908719bec8e25443aabecbb802)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright �� 2021 Intel Corporation
4  */
5 
6 #include "gt/intel_gt_print.h"
7 #include "intel_guc_print.h"
8 #include "selftests/igt_spinner.h"
9 #include "selftests/intel_scheduler_helpers.h"
10 
request_add_spin(struct i915_request * rq,struct igt_spinner * spin)11 static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
12 {
13 	int err = 0;
14 
15 	i915_request_get(rq);
16 	i915_request_add(rq);
17 	if (spin && !igt_wait_for_spinner(spin, rq))
18 		err = -ETIMEDOUT;
19 
20 	return err;
21 }
22 
nop_user_request(struct intel_context * ce,struct i915_request * from)23 static struct i915_request *nop_user_request(struct intel_context *ce,
24 					     struct i915_request *from)
25 {
26 	struct i915_request *rq;
27 	int ret;
28 
29 	rq = intel_context_create_request(ce);
30 	if (IS_ERR(rq))
31 		return rq;
32 
33 	if (from) {
34 		ret = i915_sw_fence_await_dma_fence(&rq->submit,
35 						    &from->fence, 0,
36 						    I915_FENCE_GFP);
37 		if (ret < 0) {
38 			i915_request_put(rq);
39 			return ERR_PTR(ret);
40 		}
41 	}
42 
43 	i915_request_get(rq);
44 	i915_request_add(rq);
45 
46 	return rq;
47 }
48 
intel_guc_scrub_ctbs(void * arg)49 static int intel_guc_scrub_ctbs(void *arg)
50 {
51 	struct intel_gt *gt = arg;
52 	int ret = 0;
53 	int i;
54 	struct i915_request *last[3] = {NULL, NULL, NULL}, *rq;
55 	intel_wakeref_t wakeref;
56 	struct intel_engine_cs *engine;
57 	struct intel_context *ce;
58 
59 	if (!intel_has_gpu_reset(gt))
60 		return 0;
61 
62 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
63 	engine = intel_selftest_find_any_engine(gt);
64 
65 	/* Submit requests and inject errors forcing G2H to be dropped */
66 	for (i = 0; i < 3; ++i) {
67 		ce = intel_context_create(engine);
68 		if (IS_ERR(ce)) {
69 			ret = PTR_ERR(ce);
70 			gt_err(gt, "Failed to create context %d: %pe\n", i, ce);
71 			goto err;
72 		}
73 
74 		switch (i) {
75 		case 0:
76 			ce->drop_schedule_enable = true;
77 			break;
78 		case 1:
79 			ce->drop_schedule_disable = true;
80 			break;
81 		case 2:
82 			ce->drop_deregister = true;
83 			break;
84 		}
85 
86 		rq = nop_user_request(ce, NULL);
87 		intel_context_put(ce);
88 
89 		if (IS_ERR(rq)) {
90 			ret = PTR_ERR(rq);
91 			gt_err(gt, "Failed to create request %d: %pe\n", i, rq);
92 			goto err;
93 		}
94 
95 		last[i] = rq;
96 	}
97 
98 	for (i = 0; i < 3; ++i) {
99 		ret = i915_request_wait(last[i], 0, HZ);
100 		if (ret < 0) {
101 			gt_err(gt, "Last request failed to complete: %pe\n", ERR_PTR(ret));
102 			goto err;
103 		}
104 		i915_request_put(last[i]);
105 		last[i] = NULL;
106 	}
107 
108 	/* Force all H2G / G2H to be submitted / processed */
109 	intel_gt_retire_requests(gt);
110 	msleep(500);
111 
112 	/* Scrub missing G2H */
113 	intel_gt_handle_error(engine->gt, -1, 0, "selftest reset");
114 
115 	/* GT will not idle if G2H are lost */
116 	ret = intel_gt_wait_for_idle(gt, HZ);
117 	if (ret < 0) {
118 		gt_err(gt, "GT failed to idle: %pe\n", ERR_PTR(ret));
119 		goto err;
120 	}
121 
122 err:
123 	for (i = 0; i < 3; ++i)
124 		if (last[i])
125 			i915_request_put(last[i]);
126 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
127 
128 	return ret;
129 }
130 
131 /*
132  * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
133  *
134  * This test creates a spinner which is used to block all subsequent submissions
135  * until it completes. Next, a loop creates a context and a NOP request each
136  * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
137  * The spinner is ended, unblocking all requests created in the loop. At this
138  * point all guc_ids are exhausted but are available to steal. Try to create
139  * another request which should successfully steal a guc_id. Wait on last
140  * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
141  * exit the test. Test also artificially reduces the number of guc_ids so the
142  * test runs in a timely manner.
143  */
intel_guc_steal_guc_ids(void * arg)144 static int intel_guc_steal_guc_ids(void *arg)
145 {
146 	struct intel_gt *gt = arg;
147 	struct intel_guc *guc = gt_to_guc(gt);
148 	int ret, sv, context_index = 0;
149 	intel_wakeref_t wakeref;
150 	struct intel_engine_cs *engine;
151 	struct intel_context **ce;
152 	struct igt_spinner spin;
153 	struct i915_request *spin_rq = NULL, *rq, *last = NULL;
154 	int number_guc_id_stolen = guc->number_guc_id_stolen;
155 
156 	ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
157 	if (!ce) {
158 		guc_err(guc, "Context array allocation failed\n");
159 		return -ENOMEM;
160 	}
161 
162 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
163 	engine = intel_selftest_find_any_engine(gt);
164 	sv = guc->submission_state.num_guc_ids;
165 	guc->submission_state.num_guc_ids = 512;
166 
167 	/* Create spinner to block requests in below loop */
168 	ce[context_index] = intel_context_create(engine);
169 	if (IS_ERR(ce[context_index])) {
170 		ret = PTR_ERR(ce[context_index]);
171 		guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
172 		ce[context_index] = NULL;
173 		goto err_wakeref;
174 	}
175 	ret = igt_spinner_init(&spin, engine->gt);
176 	if (ret) {
177 		guc_err(guc, "Failed to create spinner: %pe\n", ERR_PTR(ret));
178 		goto err_contexts;
179 	}
180 	spin_rq = igt_spinner_create_request(&spin, ce[context_index],
181 					     MI_ARB_CHECK);
182 	if (IS_ERR(spin_rq)) {
183 		ret = PTR_ERR(spin_rq);
184 		guc_err(guc, "Failed to create spinner request: %pe\n", spin_rq);
185 		goto err_contexts;
186 	}
187 	ret = request_add_spin(spin_rq, &spin);
188 	if (ret) {
189 		guc_err(guc, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
190 		goto err_spin_rq;
191 	}
192 
193 	/* Use all guc_ids */
194 	while (ret != -EAGAIN) {
195 		ce[++context_index] = intel_context_create(engine);
196 		if (IS_ERR(ce[context_index])) {
197 			ret = PTR_ERR(ce[context_index]);
198 			guc_err(guc, "Failed to create context: %pe\n", ce[context_index]);
199 			ce[context_index--] = NULL;
200 			goto err_spin_rq;
201 		}
202 
203 		rq = nop_user_request(ce[context_index], spin_rq);
204 		if (IS_ERR(rq)) {
205 			ret = PTR_ERR(rq);
206 			rq = NULL;
207 			if ((ret != -EAGAIN) || !last) {
208 				guc_err(guc, "Failed to create %srequest %d: %pe\n",
209 					last ? "" : "first ", context_index, ERR_PTR(ret));
210 				goto err_spin_rq;
211 			}
212 		} else {
213 			if (last)
214 				i915_request_put(last);
215 			last = rq;
216 		}
217 	}
218 
219 	/* Release blocked requests */
220 	igt_spinner_end(&spin);
221 	ret = intel_selftest_wait_for_rq(spin_rq);
222 	if (ret) {
223 		guc_err(guc, "Spin request failed to complete: %pe\n", ERR_PTR(ret));
224 		i915_request_put(last);
225 		goto err_spin_rq;
226 	}
227 	i915_request_put(spin_rq);
228 	igt_spinner_fini(&spin);
229 	spin_rq = NULL;
230 
231 	/* Wait for last request */
232 	ret = i915_request_wait(last, 0, HZ * 30);
233 	i915_request_put(last);
234 	if (ret < 0) {
235 		guc_err(guc, "Last request failed to complete: %pe\n", ERR_PTR(ret));
236 		goto err_spin_rq;
237 	}
238 
239 	/* Try to steal guc_id */
240 	rq = nop_user_request(ce[context_index], NULL);
241 	if (IS_ERR(rq)) {
242 		ret = PTR_ERR(rq);
243 		guc_err(guc, "Failed to steal guc_id %d: %pe\n", context_index, rq);
244 		goto err_spin_rq;
245 	}
246 
247 	/* Wait for request with stolen guc_id */
248 	ret = i915_request_wait(rq, 0, HZ);
249 	i915_request_put(rq);
250 	if (ret < 0) {
251 		guc_err(guc, "Request with stolen guc_id failed to complete: %pe\n", ERR_PTR(ret));
252 		goto err_spin_rq;
253 	}
254 
255 	/* Wait for idle */
256 	ret = intel_gt_wait_for_idle(gt, HZ * 30);
257 	if (ret < 0) {
258 		guc_err(guc, "GT failed to idle: %pe\n", ERR_PTR(ret));
259 		goto err_spin_rq;
260 	}
261 
262 	/* Verify a guc_id was stolen */
263 	if (guc->number_guc_id_stolen == number_guc_id_stolen) {
264 		guc_err(guc, "No guc_id was stolen");
265 		ret = -EINVAL;
266 	} else {
267 		ret = 0;
268 	}
269 
270 err_spin_rq:
271 	if (spin_rq) {
272 		igt_spinner_end(&spin);
273 		intel_selftest_wait_for_rq(spin_rq);
274 		i915_request_put(spin_rq);
275 		igt_spinner_fini(&spin);
276 		intel_gt_wait_for_idle(gt, HZ * 30);
277 	}
278 err_contexts:
279 	for (; context_index >= 0 && ce[context_index]; --context_index)
280 		intel_context_put(ce[context_index]);
281 err_wakeref:
282 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
283 	kfree(ce);
284 	guc->submission_state.num_guc_ids = sv;
285 
286 	return ret;
287 }
288 
289 /*
290  * Send a context schedule H2G message with an invalid context id.
291  * This should generate a GUC_RESULT_INVALID_CONTEXT response.
292  */
bad_h2g(struct intel_guc * guc)293 static int bad_h2g(struct intel_guc *guc)
294 {
295 	u32 action[] = {
296 	   INTEL_GUC_ACTION_SCHED_CONTEXT,
297 	   0x12345678,
298 	};
299 
300 	return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
301 }
302 
303 /*
304  * Set a spinner running to make sure the system is alive and active,
305  * then send a bad but asynchronous H2G command and wait to see if an
306  * error response is returned. If no response is received or if the
307  * spinner dies then the test will fail.
308  */
309 #define FAST_RESPONSE_TIMEOUT_MS	1000
intel_guc_fast_request(void * arg)310 static int intel_guc_fast_request(void *arg)
311 {
312 	struct intel_gt *gt = arg;
313 	struct intel_context *ce;
314 	struct igt_spinner spin;
315 	struct i915_request *rq;
316 	intel_wakeref_t wakeref;
317 	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
318 	bool spinning = false;
319 	int ret = 0;
320 
321 	if (!engine)
322 		return 0;
323 
324 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
325 
326 	ce = intel_context_create(engine);
327 	if (IS_ERR(ce)) {
328 		ret = PTR_ERR(ce);
329 		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
330 		goto err_pm;
331 	}
332 
333 	ret = igt_spinner_init(&spin, engine->gt);
334 	if (ret) {
335 		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
336 		goto err_pm;
337 	}
338 	spinning = true;
339 
340 	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
341 	intel_context_put(ce);
342 	if (IS_ERR(rq)) {
343 		ret = PTR_ERR(rq);
344 		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
345 		goto err_spin;
346 	}
347 
348 	ret = request_add_spin(rq, &spin);
349 	if (ret) {
350 		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
351 		goto err_rq;
352 	}
353 
354 	gt->uc.guc.fast_response_selftest = 1;
355 
356 	ret = bad_h2g(&gt->uc.guc);
357 	if (ret) {
358 		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
359 		goto err_rq;
360 	}
361 
362 	ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
363 		       FAST_RESPONSE_TIMEOUT_MS);
364 	if (ret) {
365 		gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
366 		goto err_rq;
367 	}
368 
369 	if (i915_request_completed(rq)) {
370 		gt_err(gt, "Spinner died waiting for fast request error!\n");
371 		ret = -EIO;
372 		goto err_rq;
373 	}
374 
375 	if (gt->uc.guc.fast_response_selftest != 2) {
376 		gt_err(gt, "Unexpected fast response count: %d\n",
377 		       gt->uc.guc.fast_response_selftest);
378 		goto err_rq;
379 	}
380 
381 	igt_spinner_end(&spin);
382 	spinning = false;
383 
384 	ret = intel_selftest_wait_for_rq(rq);
385 	if (ret) {
386 		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
387 		goto err_rq;
388 	}
389 
390 err_rq:
391 	i915_request_put(rq);
392 
393 err_spin:
394 	if (spinning)
395 		igt_spinner_end(&spin);
396 	igt_spinner_fini(&spin);
397 
398 err_pm:
399 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
400 	return ret;
401 }
402 
intel_guc_live_selftests(struct drm_i915_private * i915)403 int intel_guc_live_selftests(struct drm_i915_private *i915)
404 {
405 	static const struct i915_subtest tests[] = {
406 		SUBTEST(intel_guc_scrub_ctbs),
407 		SUBTEST(intel_guc_steal_guc_ids),
408 		SUBTEST(intel_guc_fast_request),
409 	};
410 	struct intel_gt *gt = to_gt(i915);
411 
412 	if (intel_gt_is_wedged(gt))
413 		return 0;
414 
415 	if (!intel_uc_uses_guc_submission(&gt->uc))
416 		return 0;
417 
418 	return intel_gt_live_subtests(tests, gt);
419 }
420