1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "intel_gpu_commands.h" 9 #include "intel_gt_pm.h" 10 #include "intel_rps.h" 11 12 #include "i915_selftest.h" 13 #include "selftests/igt_flush_test.h" 14 15 #define COUNT 5 16 17 static int cmp_u32(const void *A, const void *B) 18 { 19 const u32 *a = A, *b = B; 20 21 return *a - *b; 22 } 23 24 static intel_wakeref_t perf_begin(struct intel_gt *gt) 25 { 26 intel_wakeref_t wakeref = intel_gt_pm_get(gt); 27 28 /* Boost gpufreq to max [waitboost] and keep it fixed */ 29 atomic_inc(>->rps.num_waiters); 30 queue_work(gt->i915->unordered_wq, >->rps.work); 31 flush_work(>->rps.work); 32 33 return wakeref; 34 } 35 36 static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref) 37 { 38 atomic_dec(>->rps.num_waiters); 39 intel_gt_pm_put(gt, wakeref); 40 41 return igt_flush_test(gt->i915); 42 } 43 44 static i915_reg_t timestamp_reg(struct intel_engine_cs *engine) 45 { 46 struct drm_i915_private *i915 = engine->i915; 47 48 if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915)) 49 return RING_TIMESTAMP_UDW(engine->mmio_base); 50 else 51 return RING_TIMESTAMP(engine->mmio_base); 52 } 53 54 static int write_timestamp(struct i915_request *rq, int slot) 55 { 56 struct intel_timeline *tl = 57 rcu_dereference_protected(rq->timeline, 58 !i915_request_signaled(rq)); 59 u32 cmd; 60 u32 *cs; 61 62 cs = intel_ring_begin(rq, 4); 63 if (IS_ERR(cs)) 64 return PTR_ERR(cs); 65 66 cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 67 if (GRAPHICS_VER(rq->i915) >= 8) 68 cmd++; 69 *cs++ = cmd; 70 *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine)); 71 *cs++ = tl->hwsp_offset + slot * sizeof(u32); 72 *cs++ = 0; 73 74 intel_ring_advance(rq, cs); 75 76 return 0; 77 } 78 79 static struct i915_vma *create_empty_batch(struct intel_context *ce) 80 { 81 struct drm_i915_gem_object *obj; 82 struct i915_vma *vma; 83 u32 *cs; 84 int err; 85 86 obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE); 87 if (IS_ERR(obj)) 88 return ERR_CAST(obj); 89 90 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 91 if (IS_ERR(cs)) { 92 err = PTR_ERR(cs); 93 goto err_put; 94 } 95 96 cs[0] = MI_BATCH_BUFFER_END; 97 98 i915_gem_object_flush_map(obj); 99 100 vma = i915_vma_instance(obj, ce->vm, NULL); 101 if (IS_ERR(vma)) { 102 err = PTR_ERR(vma); 103 goto err_unpin; 104 } 105 106 err = i915_vma_pin(vma, 0, 0, PIN_USER); 107 if (err) 108 goto err_unpin; 109 110 i915_gem_object_unpin_map(obj); 111 return vma; 112 113 err_unpin: 114 i915_gem_object_unpin_map(obj); 115 err_put: 116 i915_gem_object_put(obj); 117 return ERR_PTR(err); 118 } 119 120 static u32 trifilter(u32 *a) 121 { 122 u64 sum; 123 124 sort(a, COUNT, sizeof(*a), cmp_u32, NULL); 125 126 sum = mul_u32_u32(a[2], 2); 127 sum += a[1]; 128 sum += a[3]; 129 130 return sum >> 2; 131 } 132 133 static int perf_mi_bb_start(void *arg) 134 { 135 struct intel_gt *gt = arg; 136 struct intel_engine_cs *engine; 137 enum intel_engine_id id; 138 intel_wakeref_t wakeref; 139 int err = 0; 140 141 if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ 142 return 0; 143 144 wakeref = perf_begin(gt); 145 for_each_engine(engine, gt, id) { 146 struct intel_context *ce = engine->kernel_context; 147 struct i915_vma *batch; 148 u32 cycles[COUNT]; 149 int i; 150 151 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) 152 continue; 153 154 intel_engine_pm_get(engine); 155 156 batch = create_empty_batch(ce); 157 if (IS_ERR(batch)) { 158 err = PTR_ERR(batch); 159 intel_engine_pm_put(engine); 160 break; 161 } 162 163 err = i915_vma_sync(batch); 164 if (err) { 165 intel_engine_pm_put(engine); 166 i915_vma_put(batch); 167 break; 168 } 169 170 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 171 struct i915_request *rq; 172 173 rq = i915_request_create(ce); 174 if (IS_ERR(rq)) { 175 err = PTR_ERR(rq); 176 break; 177 } 178 179 err = write_timestamp(rq, 2); 180 if (err) 181 goto out; 182 183 err = rq->engine->emit_bb_start(rq, 184 i915_vma_offset(batch), 8, 185 0); 186 if (err) 187 goto out; 188 189 err = write_timestamp(rq, 3); 190 if (err) 191 goto out; 192 193 out: 194 i915_request_get(rq); 195 i915_request_add(rq); 196 197 if (i915_request_wait(rq, 0, HZ / 5) < 0) 198 err = -EIO; 199 i915_request_put(rq); 200 if (err) 201 break; 202 203 cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2]; 204 } 205 i915_vma_put(batch); 206 intel_engine_pm_put(engine); 207 if (err) 208 break; 209 210 pr_info("%s: MI_BB_START cycles: %u\n", 211 engine->name, trifilter(cycles)); 212 } 213 if (perf_end(gt, wakeref)) 214 err = -EIO; 215 216 return err; 217 } 218 219 static struct i915_vma *create_nop_batch(struct intel_context *ce) 220 { 221 struct drm_i915_gem_object *obj; 222 struct i915_vma *vma; 223 u32 *cs; 224 int err; 225 226 obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K); 227 if (IS_ERR(obj)) 228 return ERR_CAST(obj); 229 230 cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 231 if (IS_ERR(cs)) { 232 err = PTR_ERR(cs); 233 goto err_put; 234 } 235 236 memset(cs, 0, SZ_64K); 237 cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END; 238 239 i915_gem_object_flush_map(obj); 240 241 vma = i915_vma_instance(obj, ce->vm, NULL); 242 if (IS_ERR(vma)) { 243 err = PTR_ERR(vma); 244 goto err_unpin; 245 } 246 247 err = i915_vma_pin(vma, 0, 0, PIN_USER); 248 if (err) 249 goto err_unpin; 250 251 i915_gem_object_unpin_map(obj); 252 return vma; 253 254 err_unpin: 255 i915_gem_object_unpin_map(obj); 256 err_put: 257 i915_gem_object_put(obj); 258 return ERR_PTR(err); 259 } 260 261 static int perf_mi_noop(void *arg) 262 { 263 struct intel_gt *gt = arg; 264 struct intel_engine_cs *engine; 265 enum intel_engine_id id; 266 intel_wakeref_t wakeref; 267 int err = 0; 268 269 if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ 270 return 0; 271 272 wakeref = perf_begin(gt); 273 for_each_engine(engine, gt, id) { 274 struct intel_context *ce = engine->kernel_context; 275 struct i915_vma *base, *nop; 276 u32 cycles[COUNT]; 277 int i; 278 279 if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0) 280 continue; 281 282 intel_engine_pm_get(engine); 283 284 base = create_empty_batch(ce); 285 if (IS_ERR(base)) { 286 err = PTR_ERR(base); 287 intel_engine_pm_put(engine); 288 break; 289 } 290 291 err = i915_vma_sync(base); 292 if (err) { 293 i915_vma_put(base); 294 intel_engine_pm_put(engine); 295 break; 296 } 297 298 nop = create_nop_batch(ce); 299 if (IS_ERR(nop)) { 300 err = PTR_ERR(nop); 301 i915_vma_put(base); 302 intel_engine_pm_put(engine); 303 break; 304 } 305 306 err = i915_vma_sync(nop); 307 if (err) { 308 i915_vma_put(nop); 309 i915_vma_put(base); 310 intel_engine_pm_put(engine); 311 break; 312 } 313 314 for (i = 0; i < ARRAY_SIZE(cycles); i++) { 315 struct i915_request *rq; 316 317 rq = i915_request_create(ce); 318 if (IS_ERR(rq)) { 319 err = PTR_ERR(rq); 320 break; 321 } 322 323 err = write_timestamp(rq, 2); 324 if (err) 325 goto out; 326 327 err = rq->engine->emit_bb_start(rq, 328 i915_vma_offset(base), 8, 329 0); 330 if (err) 331 goto out; 332 333 err = write_timestamp(rq, 3); 334 if (err) 335 goto out; 336 337 err = rq->engine->emit_bb_start(rq, 338 i915_vma_offset(nop), 339 i915_vma_size(nop), 340 0); 341 if (err) 342 goto out; 343 344 err = write_timestamp(rq, 4); 345 if (err) 346 goto out; 347 348 out: 349 i915_request_get(rq); 350 i915_request_add(rq); 351 352 if (i915_request_wait(rq, 0, HZ / 5) < 0) 353 err = -EIO; 354 i915_request_put(rq); 355 if (err) 356 break; 357 358 cycles[i] = 359 (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) - 360 (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]); 361 } 362 i915_vma_put(nop); 363 i915_vma_put(base); 364 intel_engine_pm_put(engine); 365 if (err) 366 break; 367 368 pr_info("%s: 16K MI_NOOP cycles: %u\n", 369 engine->name, trifilter(cycles)); 370 } 371 if (perf_end(gt, wakeref)) 372 err = -EIO; 373 374 return err; 375 } 376 377 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915) 378 { 379 static const struct i915_subtest tests[] = { 380 SUBTEST(perf_mi_bb_start), 381 SUBTEST(perf_mi_noop), 382 }; 383 384 if (intel_gt_is_wedged(to_gt(i915))) 385 return 0; 386 387 return intel_gt_live_subtests(tests, to_gt(i915)); 388 } 389 390 static int intel_mmio_bases_check(void *arg) 391 { 392 int i, j; 393 394 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 395 const struct engine_info *info = &intel_engines[i]; 396 u8 prev = U8_MAX; 397 398 for (j = 0; j < MAX_MMIO_BASES; j++) { 399 u8 ver = info->mmio_bases[j].graphics_ver; 400 u32 base = info->mmio_bases[j].base; 401 402 if (ver >= prev) { 403 pr_err("%s(%s, class:%d, instance:%d): mmio base for graphics ver %u is before the one for ver %u\n", 404 __func__, 405 intel_engine_class_repr(info->class), 406 info->class, info->instance, 407 prev, ver); 408 return -EINVAL; 409 } 410 411 if (ver == 0) 412 break; 413 414 if (!base) { 415 pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for graphics ver %u at entry %u\n", 416 __func__, 417 intel_engine_class_repr(info->class), 418 info->class, info->instance, 419 base, ver, j); 420 return -EINVAL; 421 } 422 423 prev = ver; 424 } 425 426 pr_debug("%s: min graphics version supported for %s%d is %u\n", 427 __func__, 428 intel_engine_class_repr(info->class), 429 info->instance, 430 prev); 431 } 432 433 return 0; 434 } 435 436 int intel_engine_cs_mock_selftests(void) 437 { 438 static const struct i915_subtest tests[] = { 439 SUBTEST(intel_mmio_bases_check), 440 }; 441 442 return i915_subtests(tests, NULL); 443 } 444