xref: /linux/drivers/gpu/drm/i915/gt/uc/selftest_guc.c (revision 90e0d94d369d342e735a75174439482119b6c393)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright �� 2021 Intel Corporation
4  */
5 
6 #include "selftests/igt_spinner.h"
7 #include "selftests/intel_scheduler_helpers.h"
8 
9 static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
10 {
11 	int err = 0;
12 
13 	i915_request_get(rq);
14 	i915_request_add(rq);
15 	if (spin && !igt_wait_for_spinner(spin, rq))
16 		err = -ETIMEDOUT;
17 
18 	return err;
19 }
20 
21 static struct i915_request *nop_user_request(struct intel_context *ce,
22 					     struct i915_request *from)
23 {
24 	struct i915_request *rq;
25 	int ret;
26 
27 	rq = intel_context_create_request(ce);
28 	if (IS_ERR(rq))
29 		return rq;
30 
31 	if (from) {
32 		ret = i915_sw_fence_await_dma_fence(&rq->submit,
33 						    &from->fence, 0,
34 						    I915_FENCE_GFP);
35 		if (ret < 0) {
36 			i915_request_put(rq);
37 			return ERR_PTR(ret);
38 		}
39 	}
40 
41 	i915_request_get(rq);
42 	i915_request_add(rq);
43 
44 	return rq;
45 }
46 
47 static int intel_guc_scrub_ctbs(void *arg)
48 {
49 	struct intel_gt *gt = arg;
50 	int ret = 0;
51 	int i;
52 	struct i915_request *last[3] = {NULL, NULL, NULL}, *rq;
53 	intel_wakeref_t wakeref;
54 	struct intel_engine_cs *engine;
55 	struct intel_context *ce;
56 
57 	if (!intel_has_gpu_reset(gt))
58 		return 0;
59 
60 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
61 	engine = intel_selftest_find_any_engine(gt);
62 
63 	/* Submit requests and inject errors forcing G2H to be dropped */
64 	for (i = 0; i < 3; ++i) {
65 		ce = intel_context_create(engine);
66 		if (IS_ERR(ce)) {
67 			ret = PTR_ERR(ce);
68 			drm_err(&gt->i915->drm, "Failed to create context, %d: %d\n", i, ret);
69 			goto err;
70 		}
71 
72 		switch (i) {
73 		case 0:
74 			ce->drop_schedule_enable = true;
75 			break;
76 		case 1:
77 			ce->drop_schedule_disable = true;
78 			break;
79 		case 2:
80 			ce->drop_deregister = true;
81 			break;
82 		}
83 
84 		rq = nop_user_request(ce, NULL);
85 		intel_context_put(ce);
86 
87 		if (IS_ERR(rq)) {
88 			ret = PTR_ERR(rq);
89 			drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n", i, ret);
90 			goto err;
91 		}
92 
93 		last[i] = rq;
94 	}
95 
96 	for (i = 0; i < 3; ++i) {
97 		ret = i915_request_wait(last[i], 0, HZ);
98 		if (ret < 0) {
99 			drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
100 			goto err;
101 		}
102 		i915_request_put(last[i]);
103 		last[i] = NULL;
104 	}
105 
106 	/* Force all H2G / G2H to be submitted / processed */
107 	intel_gt_retire_requests(gt);
108 	msleep(500);
109 
110 	/* Scrub missing G2H */
111 	intel_gt_handle_error(engine->gt, -1, 0, "selftest reset");
112 
113 	/* GT will not idle if G2H are lost */
114 	ret = intel_gt_wait_for_idle(gt, HZ);
115 	if (ret < 0) {
116 		drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
117 		goto err;
118 	}
119 
120 err:
121 	for (i = 0; i < 3; ++i)
122 		if (last[i])
123 			i915_request_put(last[i]);
124 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
125 
126 	return ret;
127 }
128 
129 /*
130  * intel_guc_steal_guc_ids - Test to exhaust all guc_ids and then steal one
131  *
132  * This test creates a spinner which is used to block all subsequent submissions
133  * until it completes. Next, a loop creates a context and a NOP request each
134  * iteration until the guc_ids are exhausted (request creation returns -EAGAIN).
135  * The spinner is ended, unblocking all requests created in the loop. At this
136  * point all guc_ids are exhausted but are available to steal. Try to create
137  * another request which should successfully steal a guc_id. Wait on last
138  * request to complete, idle GPU, verify a guc_id was stolen via a counter, and
139  * exit the test. Test also artificially reduces the number of guc_ids so the
140  * test runs in a timely manner.
141  */
142 static int intel_guc_steal_guc_ids(void *arg)
143 {
144 	struct intel_gt *gt = arg;
145 	struct intel_guc *guc = &gt->uc.guc;
146 	int ret, sv, context_index = 0;
147 	intel_wakeref_t wakeref;
148 	struct intel_engine_cs *engine;
149 	struct intel_context **ce;
150 	struct igt_spinner spin;
151 	struct i915_request *spin_rq = NULL, *rq, *last = NULL;
152 	int number_guc_id_stolen = guc->number_guc_id_stolen;
153 
154 	ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
155 	if (!ce) {
156 		drm_err(&gt->i915->drm, "Context array allocation failed\n");
157 		return -ENOMEM;
158 	}
159 
160 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
161 	engine = intel_selftest_find_any_engine(gt);
162 	sv = guc->submission_state.num_guc_ids;
163 	guc->submission_state.num_guc_ids = 512;
164 
165 	/* Create spinner to block requests in below loop */
166 	ce[context_index] = intel_context_create(engine);
167 	if (IS_ERR(ce[context_index])) {
168 		ret = PTR_ERR(ce[context_index]);
169 		ce[context_index] = NULL;
170 		drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
171 		goto err_wakeref;
172 	}
173 	ret = igt_spinner_init(&spin, engine->gt);
174 	if (ret) {
175 		drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
176 		goto err_contexts;
177 	}
178 	spin_rq = igt_spinner_create_request(&spin, ce[context_index],
179 					     MI_ARB_CHECK);
180 	if (IS_ERR(spin_rq)) {
181 		ret = PTR_ERR(spin_rq);
182 		drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
183 		goto err_contexts;
184 	}
185 	ret = request_add_spin(spin_rq, &spin);
186 	if (ret) {
187 		drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
188 		goto err_spin_rq;
189 	}
190 
191 	/* Use all guc_ids */
192 	while (ret != -EAGAIN) {
193 		ce[++context_index] = intel_context_create(engine);
194 		if (IS_ERR(ce[context_index])) {
195 			ret = PTR_ERR(ce[context_index--]);
196 			ce[context_index] = NULL;
197 			drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
198 			goto err_spin_rq;
199 		}
200 
201 		rq = nop_user_request(ce[context_index], spin_rq);
202 		if (IS_ERR(rq)) {
203 			ret = PTR_ERR(rq);
204 			rq = NULL;
205 			if (ret != -EAGAIN) {
206 				drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n",
207 					context_index, ret);
208 				goto err_spin_rq;
209 			}
210 		} else {
211 			if (last)
212 				i915_request_put(last);
213 			last = rq;
214 		}
215 	}
216 
217 	/* Release blocked requests */
218 	igt_spinner_end(&spin);
219 	ret = intel_selftest_wait_for_rq(spin_rq);
220 	if (ret) {
221 		drm_err(&gt->i915->drm, "Spin request failed to complete: %d\n", ret);
222 		i915_request_put(last);
223 		goto err_spin_rq;
224 	}
225 	i915_request_put(spin_rq);
226 	igt_spinner_fini(&spin);
227 	spin_rq = NULL;
228 
229 	/* Wait for last request */
230 	ret = i915_request_wait(last, 0, HZ * 30);
231 	i915_request_put(last);
232 	if (ret < 0) {
233 		drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
234 		goto err_spin_rq;
235 	}
236 
237 	/* Try to steal guc_id */
238 	rq = nop_user_request(ce[context_index], NULL);
239 	if (IS_ERR(rq)) {
240 		ret = PTR_ERR(rq);
241 		drm_err(&gt->i915->drm, "Failed to steal guc_id, %d: %d\n", context_index, ret);
242 		goto err_spin_rq;
243 	}
244 
245 	/* Wait for request with stolen guc_id */
246 	ret = i915_request_wait(rq, 0, HZ);
247 	i915_request_put(rq);
248 	if (ret < 0) {
249 		drm_err(&gt->i915->drm, "Request with stolen guc_id failed to complete: %d\n", ret);
250 		goto err_spin_rq;
251 	}
252 
253 	/* Wait for idle */
254 	ret = intel_gt_wait_for_idle(gt, HZ * 30);
255 	if (ret < 0) {
256 		drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
257 		goto err_spin_rq;
258 	}
259 
260 	/* Verify a guc_id was stolen */
261 	if (guc->number_guc_id_stolen == number_guc_id_stolen) {
262 		drm_err(&gt->i915->drm, "No guc_id was stolen");
263 		ret = -EINVAL;
264 	} else {
265 		ret = 0;
266 	}
267 
268 err_spin_rq:
269 	if (spin_rq) {
270 		igt_spinner_end(&spin);
271 		intel_selftest_wait_for_rq(spin_rq);
272 		i915_request_put(spin_rq);
273 		igt_spinner_fini(&spin);
274 		intel_gt_wait_for_idle(gt, HZ * 30);
275 	}
276 err_contexts:
277 	for (; context_index >= 0 && ce[context_index]; --context_index)
278 		intel_context_put(ce[context_index]);
279 err_wakeref:
280 	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
281 	kfree(ce);
282 	guc->submission_state.num_guc_ids = sv;
283 
284 	return ret;
285 }
286 
287 int intel_guc_live_selftests(struct drm_i915_private *i915)
288 {
289 	static const struct i915_subtest tests[] = {
290 		SUBTEST(intel_guc_scrub_ctbs),
291 		SUBTEST(intel_guc_steal_guc_ids),
292 	};
293 	struct intel_gt *gt = to_gt(i915);
294 
295 	if (intel_gt_is_wedged(gt))
296 		return 0;
297 
298 	if (!intel_uc_uses_guc_submission(&gt->uc))
299 		return 0;
300 
301 	return intel_gt_live_subtests(tests, gt);
302 }
303