xref: /linux/drivers/gpu/drm/i915/selftests/i915_perf.c (revision d7bf4786b5250b0e490a937d1f8a16ee3a54adbe)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2019 Intel Corporation
5  */
6 
7 #include <linux/kref.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_gt.h"
11 
12 #include "i915_selftest.h"
13 
14 #include "igt_flush_test.h"
15 #include "lib_sw_fence.h"
16 
17 #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
18 
19 static int
20 alloc_empty_config(struct i915_perf *perf)
21 {
22 	struct i915_oa_config *oa_config;
23 
24 	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
25 	if (!oa_config)
26 		return -ENOMEM;
27 
28 	oa_config->perf = perf;
29 	kref_init(&oa_config->ref);
30 
31 	strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
32 
33 	mutex_lock(&perf->metrics_lock);
34 
35 	oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
36 	if (oa_config->id < 0)  {
37 		mutex_unlock(&perf->metrics_lock);
38 		i915_oa_config_put(oa_config);
39 		return -ENOMEM;
40 	}
41 
42 	mutex_unlock(&perf->metrics_lock);
43 
44 	return 0;
45 }
46 
47 static void
48 destroy_empty_config(struct i915_perf *perf)
49 {
50 	struct i915_oa_config *oa_config = NULL, *tmp;
51 	int id;
52 
53 	mutex_lock(&perf->metrics_lock);
54 
55 	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
56 		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
57 			oa_config = tmp;
58 			break;
59 		}
60 	}
61 
62 	if (oa_config)
63 		idr_remove(&perf->metrics_idr, oa_config->id);
64 
65 	mutex_unlock(&perf->metrics_lock);
66 
67 	if (oa_config)
68 		i915_oa_config_put(oa_config);
69 }
70 
71 static struct i915_oa_config *
72 get_empty_config(struct i915_perf *perf)
73 {
74 	struct i915_oa_config *oa_config = NULL, *tmp;
75 	int id;
76 
77 	mutex_lock(&perf->metrics_lock);
78 
79 	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
80 		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
81 			oa_config = i915_oa_config_get(tmp);
82 			break;
83 		}
84 	}
85 
86 	mutex_unlock(&perf->metrics_lock);
87 
88 	return oa_config;
89 }
90 
91 static struct i915_perf_stream *
92 test_stream(struct i915_perf *perf)
93 {
94 	struct drm_i915_perf_open_param param = {};
95 	struct i915_oa_config *oa_config = get_empty_config(perf);
96 	struct perf_open_properties props = {
97 		.engine = intel_engine_lookup_user(perf->i915,
98 						   I915_ENGINE_CLASS_RENDER,
99 						   0),
100 		.sample_flags = SAMPLE_OA_REPORT,
101 		.oa_format = GRAPHICS_VER(perf->i915) == 12 ?
102 		I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
103 	};
104 	struct i915_perf_stream *stream;
105 	struct intel_gt *gt;
106 
107 	if (!props.engine)
108 		return NULL;
109 
110 	gt = props.engine->gt;
111 
112 	if (!oa_config)
113 		return NULL;
114 
115 	props.metrics_set = oa_config->id;
116 
117 	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
118 	if (!stream) {
119 		i915_oa_config_put(oa_config);
120 		return NULL;
121 	}
122 
123 	stream->perf = perf;
124 
125 	mutex_lock(&gt->perf.lock);
126 	if (i915_oa_stream_init(stream, &param, &props)) {
127 		kfree(stream);
128 		stream =  NULL;
129 	}
130 	mutex_unlock(&gt->perf.lock);
131 
132 	i915_oa_config_put(oa_config);
133 
134 	return stream;
135 }
136 
137 static void stream_destroy(struct i915_perf_stream *stream)
138 {
139 	struct intel_gt *gt = stream->engine->gt;
140 
141 	mutex_lock(&gt->perf.lock);
142 	i915_perf_destroy_locked(stream);
143 	mutex_unlock(&gt->perf.lock);
144 }
145 
146 static int live_sanitycheck(void *arg)
147 {
148 	struct drm_i915_private *i915 = arg;
149 	struct i915_perf_stream *stream;
150 
151 	/* Quick check we can create a perf stream */
152 
153 	stream = test_stream(&i915->perf);
154 	if (!stream)
155 		return -EINVAL;
156 
157 	stream_destroy(stream);
158 	return 0;
159 }
160 
161 static int write_timestamp(struct i915_request *rq, int slot)
162 {
163 	u32 *cs;
164 	int len;
165 
166 	cs = intel_ring_begin(rq, 6);
167 	if (IS_ERR(cs))
168 		return PTR_ERR(cs);
169 
170 	len = 5;
171 	if (GRAPHICS_VER(rq->i915) >= 8)
172 		len++;
173 
174 	*cs++ = GFX_OP_PIPE_CONTROL(len);
175 	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
176 		PIPE_CONTROL_STORE_DATA_INDEX |
177 		PIPE_CONTROL_WRITE_TIMESTAMP;
178 	*cs++ = slot * sizeof(u32);
179 	*cs++ = 0;
180 	*cs++ = 0;
181 	*cs++ = 0;
182 
183 	intel_ring_advance(rq, cs);
184 
185 	return 0;
186 }
187 
188 static ktime_t poll_status(struct i915_request *rq, int slot)
189 {
190 	while (!intel_read_status_page(rq->engine, slot) &&
191 	       !i915_request_completed(rq))
192 		cpu_relax();
193 
194 	return ktime_get();
195 }
196 
197 static int live_noa_delay(void *arg)
198 {
199 	struct drm_i915_private *i915 = arg;
200 	struct i915_perf_stream *stream;
201 	struct i915_request *rq;
202 	ktime_t t0, t1;
203 	u64 expected;
204 	u32 delay;
205 	int err;
206 	int i;
207 
208 	/* Check that the GPU delays matches expectations */
209 
210 	stream = test_stream(&i915->perf);
211 	if (!stream)
212 		return -ENOMEM;
213 
214 	expected = atomic64_read(&stream->perf->noa_programming_delay);
215 
216 	if (stream->engine->class != RENDER_CLASS) {
217 		err = -ENODEV;
218 		goto out;
219 	}
220 
221 	for (i = 0; i < 4; i++)
222 		intel_write_status_page(stream->engine, 0x100 + i, 0);
223 
224 	rq = intel_engine_create_kernel_request(stream->engine);
225 	if (IS_ERR(rq)) {
226 		err = PTR_ERR(rq);
227 		goto out;
228 	}
229 
230 	if (rq->engine->emit_init_breadcrumb) {
231 		err = rq->engine->emit_init_breadcrumb(rq);
232 		if (err) {
233 			i915_request_add(rq);
234 			goto out;
235 		}
236 	}
237 
238 	err = write_timestamp(rq, 0x100);
239 	if (err) {
240 		i915_request_add(rq);
241 		goto out;
242 	}
243 
244 	err = rq->engine->emit_bb_start(rq,
245 					i915_ggtt_offset(stream->noa_wait), 0,
246 					I915_DISPATCH_SECURE);
247 	if (err) {
248 		i915_request_add(rq);
249 		goto out;
250 	}
251 
252 	err = write_timestamp(rq, 0x102);
253 	if (err) {
254 		i915_request_add(rq);
255 		goto out;
256 	}
257 
258 	i915_request_get(rq);
259 	i915_request_add(rq);
260 
261 	preempt_disable();
262 	t0 = poll_status(rq, 0x100);
263 	t1 = poll_status(rq, 0x102);
264 	preempt_enable();
265 
266 	pr_info("CPU delay: %lluns, expected %lluns\n",
267 		ktime_sub(t1, t0), expected);
268 
269 	delay = intel_read_status_page(stream->engine, 0x102);
270 	delay -= intel_read_status_page(stream->engine, 0x100);
271 	delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay);
272 	pr_info("GPU delay: %uns, expected %lluns\n",
273 		delay, expected);
274 
275 	if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
276 		pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
277 		       delay / 1000,
278 		       div_u64(3 * expected, 4000),
279 		       div_u64(3 * expected, 2000));
280 		err = -EINVAL;
281 	}
282 
283 	i915_request_put(rq);
284 out:
285 	stream_destroy(stream);
286 	return err;
287 }
288 
289 static int live_noa_gpr(void *arg)
290 {
291 	struct drm_i915_private *i915 = arg;
292 	struct i915_perf_stream *stream;
293 	struct intel_context *ce;
294 	struct i915_request *rq;
295 	u32 *cs, *store;
296 	void *scratch;
297 	u32 gpr0;
298 	int err;
299 	int i;
300 
301 	/* Check that the delay does not clobber user context state (GPR) */
302 
303 	stream = test_stream(&i915->perf);
304 	if (!stream)
305 		return -ENOMEM;
306 
307 	gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
308 
309 	ce = intel_context_create(stream->engine);
310 	if (IS_ERR(ce)) {
311 		err = PTR_ERR(ce);
312 		goto out;
313 	}
314 
315 	/* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
316 	scratch = __px_vaddr(ce->vm->scratch[0]);
317 	memset(scratch, POISON_FREE, PAGE_SIZE);
318 
319 	rq = intel_context_create_request(ce);
320 	if (IS_ERR(rq)) {
321 		err = PTR_ERR(rq);
322 		goto out_ce;
323 	}
324 	i915_request_get(rq);
325 
326 	if (rq->engine->emit_init_breadcrumb) {
327 		err = rq->engine->emit_init_breadcrumb(rq);
328 		if (err) {
329 			i915_request_add(rq);
330 			goto out_rq;
331 		}
332 	}
333 
334 	/* Fill the 16 qword [32 dword] GPR with a known unlikely value */
335 	cs = intel_ring_begin(rq, 2 * 32 + 2);
336 	if (IS_ERR(cs)) {
337 		err = PTR_ERR(cs);
338 		i915_request_add(rq);
339 		goto out_rq;
340 	}
341 
342 	*cs++ = MI_LOAD_REGISTER_IMM(32);
343 	for (i = 0; i < 32; i++) {
344 		*cs++ = gpr0 + i * sizeof(u32);
345 		*cs++ = STACK_MAGIC;
346 	}
347 	*cs++ = MI_NOOP;
348 	intel_ring_advance(rq, cs);
349 
350 	/* Execute the GPU delay */
351 	err = rq->engine->emit_bb_start(rq,
352 					i915_ggtt_offset(stream->noa_wait), 0,
353 					I915_DISPATCH_SECURE);
354 	if (err) {
355 		i915_request_add(rq);
356 		goto out_rq;
357 	}
358 
359 	/* Read the GPR back, using the pinned global HWSP for convenience */
360 	store = memset32(rq->engine->status_page.addr + 512, 0, 32);
361 	for (i = 0; i < 32; i++) {
362 		u32 cmd;
363 
364 		cs = intel_ring_begin(rq, 4);
365 		if (IS_ERR(cs)) {
366 			err = PTR_ERR(cs);
367 			i915_request_add(rq);
368 			goto out_rq;
369 		}
370 
371 		cmd = MI_STORE_REGISTER_MEM;
372 		if (GRAPHICS_VER(i915) >= 8)
373 			cmd++;
374 		cmd |= MI_USE_GGTT;
375 
376 		*cs++ = cmd;
377 		*cs++ = gpr0 + i * sizeof(u32);
378 		*cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
379 			offset_in_page(store) +
380 			i * sizeof(u32);
381 		*cs++ = 0;
382 		intel_ring_advance(rq, cs);
383 	}
384 
385 	i915_request_add(rq);
386 
387 	if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
388 		pr_err("noa_wait timed out\n");
389 		intel_gt_set_wedged(stream->engine->gt);
390 		err = -EIO;
391 		goto out_rq;
392 	}
393 
394 	/* Verify that the GPR contain our expected values */
395 	for (i = 0; i < 32; i++) {
396 		if (store[i] == STACK_MAGIC)
397 			continue;
398 
399 		pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
400 		       i, store[i], STACK_MAGIC);
401 		err = -EINVAL;
402 	}
403 
404 	/* Verify that the user's scratch page was not used for GPR storage */
405 	if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
406 		pr_err("Scratch page overwritten!\n");
407 		igt_hexdump(scratch, 4096);
408 		err = -EINVAL;
409 	}
410 
411 out_rq:
412 	i915_request_put(rq);
413 out_ce:
414 	intel_context_put(ce);
415 out:
416 	stream_destroy(stream);
417 	return err;
418 }
419 
420 int i915_perf_live_selftests(struct drm_i915_private *i915)
421 {
422 	static const struct i915_subtest tests[] = {
423 		SUBTEST(live_sanitycheck),
424 		SUBTEST(live_noa_delay),
425 		SUBTEST(live_noa_gpr),
426 	};
427 	struct i915_perf *perf = &i915->perf;
428 	int err;
429 
430 	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
431 		return 0;
432 
433 	if (intel_gt_is_wedged(to_gt(i915)))
434 		return 0;
435 
436 	err = alloc_empty_config(&i915->perf);
437 	if (err)
438 		return err;
439 
440 	err = i915_live_subtests(tests, i915);
441 
442 	destroy_empty_config(&i915->perf);
443 
444 	return err;
445 }
446