1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include <linux/kref.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_gt.h" 11 12 #include "i915_selftest.h" 13 14 #include "igt_flush_test.h" 15 #include "lib_sw_fence.h" 16 17 static struct i915_perf_stream * 18 test_stream(struct i915_perf *perf) 19 { 20 struct drm_i915_perf_open_param param = {}; 21 struct perf_open_properties props = { 22 .engine = intel_engine_lookup_user(perf->i915, 23 I915_ENGINE_CLASS_RENDER, 24 0), 25 .sample_flags = SAMPLE_OA_REPORT, 26 .oa_format = I915_OA_FORMAT_C4_B8, 27 .metrics_set = 1, 28 }; 29 struct i915_perf_stream *stream; 30 31 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 32 if (!stream) 33 return NULL; 34 35 stream->perf = perf; 36 37 mutex_lock(&perf->lock); 38 if (i915_oa_stream_init(stream, ¶m, &props)) { 39 kfree(stream); 40 stream = NULL; 41 } 42 mutex_unlock(&perf->lock); 43 44 return stream; 45 } 46 47 static void stream_destroy(struct i915_perf_stream *stream) 48 { 49 struct i915_perf *perf = stream->perf; 50 51 mutex_lock(&perf->lock); 52 i915_perf_destroy_locked(stream); 53 mutex_unlock(&perf->lock); 54 } 55 56 static int live_sanitycheck(void *arg) 57 { 58 struct drm_i915_private *i915 = arg; 59 struct i915_perf_stream *stream; 60 61 /* Quick check we can create a perf stream */ 62 63 stream = test_stream(&i915->perf); 64 if (!stream) 65 return -EINVAL; 66 67 stream_destroy(stream); 68 return 0; 69 } 70 71 static int write_timestamp(struct i915_request *rq, int slot) 72 { 73 u32 *cs; 74 int len; 75 76 cs = intel_ring_begin(rq, 6); 77 if (IS_ERR(cs)) 78 return PTR_ERR(cs); 79 80 len = 5; 81 if (INTEL_GEN(rq->i915) >= 8) 82 len++; 83 84 *cs++ = GFX_OP_PIPE_CONTROL(len); 85 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | 86 PIPE_CONTROL_STORE_DATA_INDEX | 87 PIPE_CONTROL_WRITE_TIMESTAMP; 88 *cs++ = slot * sizeof(u32); 89 *cs++ = 0; 90 *cs++ = 0; 91 *cs++ = 0; 92 93 intel_ring_advance(rq, cs); 94 95 return 0; 96 } 97 98 static ktime_t poll_status(struct i915_request *rq, int slot) 99 { 100 while (!intel_read_status_page(rq->engine, slot) && 101 !i915_request_completed(rq)) 102 cpu_relax(); 103 104 return ktime_get(); 105 } 106 107 static int live_noa_delay(void *arg) 108 { 109 struct drm_i915_private *i915 = arg; 110 struct i915_perf_stream *stream; 111 struct i915_request *rq; 112 ktime_t t0, t1; 113 u64 expected; 114 u32 delay; 115 int err; 116 int i; 117 118 /* Check that the GPU delays matches expectations */ 119 120 stream = test_stream(&i915->perf); 121 if (!stream) 122 return -ENOMEM; 123 124 expected = atomic64_read(&stream->perf->noa_programming_delay); 125 126 if (stream->engine->class != RENDER_CLASS) { 127 err = -ENODEV; 128 goto out; 129 } 130 131 for (i = 0; i < 4; i++) 132 intel_write_status_page(stream->engine, 0x100 + i, 0); 133 134 rq = i915_request_create(stream->engine->kernel_context); 135 if (IS_ERR(rq)) { 136 err = PTR_ERR(rq); 137 goto out; 138 } 139 140 if (rq->engine->emit_init_breadcrumb && 141 i915_request_timeline(rq)->has_initial_breadcrumb) { 142 err = rq->engine->emit_init_breadcrumb(rq); 143 if (err) { 144 i915_request_add(rq); 145 goto out; 146 } 147 } 148 149 err = write_timestamp(rq, 0x100); 150 if (err) { 151 i915_request_add(rq); 152 goto out; 153 } 154 155 err = rq->engine->emit_bb_start(rq, 156 i915_ggtt_offset(stream->noa_wait), 0, 157 I915_DISPATCH_SECURE); 158 if (err) { 159 i915_request_add(rq); 160 goto out; 161 } 162 163 err = write_timestamp(rq, 0x102); 164 if (err) { 165 i915_request_add(rq); 166 goto out; 167 } 168 169 i915_request_get(rq); 170 i915_request_add(rq); 171 172 preempt_disable(); 173 t0 = poll_status(rq, 0x100); 174 t1 = poll_status(rq, 0x102); 175 preempt_enable(); 176 177 pr_info("CPU delay: %lluns, expected %lluns\n", 178 ktime_sub(t1, t0), expected); 179 180 delay = intel_read_status_page(stream->engine, 0x102); 181 delay -= intel_read_status_page(stream->engine, 0x100); 182 delay = div_u64(mul_u32_u32(delay, 1000 * 1000), 183 RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); 184 pr_info("GPU delay: %uns, expected %lluns\n", 185 delay, expected); 186 187 if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { 188 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", 189 delay / 1000, 190 div_u64(3 * expected, 4000), 191 div_u64(3 * expected, 2000)); 192 err = -EINVAL; 193 } 194 195 i915_request_put(rq); 196 out: 197 stream_destroy(stream); 198 return err; 199 } 200 201 int i915_perf_live_selftests(struct drm_i915_private *i915) 202 { 203 static const struct i915_subtest tests[] = { 204 SUBTEST(live_sanitycheck), 205 SUBTEST(live_noa_delay), 206 }; 207 struct i915_perf *perf = &i915->perf; 208 209 if (!perf->metrics_kobj || !perf->ops.enable_metric_set) 210 return 0; 211 212 if (intel_gt_is_wedged(&i915->gt)) 213 return 0; 214 215 return i915_subtests(tests, i915); 216 } 217