1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "gem/i915_gem_internal.h" 10 11 #include "intel_engine_heartbeat.h" 12 #include "intel_engine_pm.h" 13 #include "intel_engine_regs.h" 14 #include "intel_gpu_commands.h" 15 #include "intel_gt_clock_utils.h" 16 #include "intel_gt_pm.h" 17 #include "intel_rc6.h" 18 #include "selftest_engine_heartbeat.h" 19 #include "selftest_rps.h" 20 #include "selftests/igt_flush_test.h" 21 #include "selftests/igt_spinner.h" 22 #include "selftests/librapl.h" 23 24 /* Try to isolate the impact of cstates from determing frequency response */ 25 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 26 27 static void dummy_rps_work(struct work_struct *wrk) 28 { 29 } 30 31 static int cmp_u64(const void *A, const void *B) 32 { 33 const u64 *a = A, *b = B; 34 35 if (*a < *b) 36 return -1; 37 else if (*a > *b) 38 return 1; 39 else 40 return 0; 41 } 42 43 static int cmp_u32(const void *A, const void *B) 44 { 45 const u32 *a = A, *b = B; 46 47 if (*a < *b) 48 return -1; 49 else if (*a > *b) 50 return 1; 51 else 52 return 0; 53 } 54 55 static struct i915_vma * 56 create_spin_counter(struct intel_engine_cs *engine, 57 struct i915_address_space *vm, 58 bool srm, 59 u32 **cancel, 60 u32 **counter) 61 { 62 enum { 63 COUNT, 64 INC, 65 __NGPR__, 66 }; 67 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 68 struct drm_i915_gem_object *obj; 69 struct i915_vma *vma; 70 unsigned long end; 71 u32 *base, *cs; 72 int loop, i; 73 int err; 74 75 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 76 if (IS_ERR(obj)) 77 return ERR_CAST(obj); 78 79 end = obj->base.size / sizeof(u32) - 1; 80 81 vma = i915_vma_instance(obj, vm, NULL); 82 if (IS_ERR(vma)) { 83 err = PTR_ERR(vma); 84 goto err_put; 85 } 86 87 err = i915_vma_pin(vma, 0, 0, PIN_USER); 88 if (err) 89 goto err_unlock; 90 91 i915_vma_lock(vma); 92 93 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 94 if (IS_ERR(base)) { 95 err = PTR_ERR(base); 96 goto err_unpin; 97 } 98 cs = base; 99 100 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 101 for (i = 0; i < __NGPR__; i++) { 102 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 103 *cs++ = 0; 104 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 105 *cs++ = 0; 106 } 107 108 *cs++ = MI_LOAD_REGISTER_IMM(1); 109 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 110 *cs++ = 1; 111 112 loop = cs - base; 113 114 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 115 for (i = 0; i < 1024; i++) { 116 *cs++ = MI_MATH(4); 117 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 119 *cs++ = MI_MATH_ADD; 120 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 121 122 if (srm) { 123 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 124 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 125 *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 126 *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 127 } 128 } 129 130 *cs++ = MI_BATCH_BUFFER_START_GEN8; 131 *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 132 *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 133 GEM_BUG_ON(cs - base > end); 134 135 i915_gem_object_flush_map(obj); 136 137 *cancel = base + loop; 138 *counter = srm ? memset32(base + end, 0, 1) : NULL; 139 return vma; 140 141 err_unpin: 142 i915_vma_unpin(vma); 143 err_unlock: 144 i915_vma_unlock(vma); 145 err_put: 146 i915_gem_object_put(obj); 147 return ERR_PTR(err); 148 } 149 150 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 151 { 152 u8 history[64], i; 153 unsigned long end; 154 int sleep; 155 156 i = 0; 157 memset(history, freq, sizeof(history)); 158 sleep = 20; 159 160 /* The PCU does not change instantly, but drifts towards the goal? */ 161 end = jiffies + msecs_to_jiffies(timeout_ms); 162 do { 163 u8 act; 164 165 act = read_cagf(rps); 166 if (time_after(jiffies, end)) 167 return act; 168 169 /* Target acquired */ 170 if (act == freq) 171 return act; 172 173 /* Any change within the last N samples? */ 174 if (!memchr_inv(history, act, sizeof(history))) 175 return act; 176 177 history[i] = act; 178 i = (i + 1) % ARRAY_SIZE(history); 179 180 usleep_range(sleep, 2 * sleep); 181 sleep *= 2; 182 if (sleep > timeout_ms * 20) 183 sleep = timeout_ms * 20; 184 } while (1); 185 } 186 187 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 188 { 189 mutex_lock(&rps->lock); 190 GEM_BUG_ON(!intel_rps_is_active(rps)); 191 if (wait_for(!intel_rps_set(rps, freq), 50)) { 192 mutex_unlock(&rps->lock); 193 return 0; 194 } 195 GEM_BUG_ON(rps->last_freq != freq); 196 mutex_unlock(&rps->lock); 197 198 return wait_for_freq(rps, freq, 50); 199 } 200 201 static void show_pstate_limits(struct intel_rps *rps) 202 { 203 struct drm_i915_private *i915 = rps_to_i915(rps); 204 205 if (IS_BROXTON(i915)) { 206 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 207 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 208 intel_uncore_read(rps_to_uncore(rps), 209 BXT_RP_STATE_CAP)); 210 } else if (GRAPHICS_VER(i915) == 9) { 211 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 212 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 213 intel_uncore_read(rps_to_uncore(rps), 214 GEN9_RP_STATE_LIMITS)); 215 } 216 } 217 218 int live_rps_clock_interval(void *arg) 219 { 220 struct intel_gt *gt = arg; 221 struct intel_rps *rps = >->rps; 222 void (*saved_work)(struct work_struct *wrk); 223 struct intel_engine_cs *engine; 224 enum intel_engine_id id; 225 struct igt_spinner spin; 226 int err = 0; 227 228 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 229 return 0; 230 231 if (igt_spinner_init(&spin, gt)) 232 return -ENOMEM; 233 234 intel_gt_pm_wait_for_idle(gt); 235 saved_work = rps->work.func; 236 rps->work.func = dummy_rps_work; 237 238 intel_gt_pm_get(gt); 239 intel_rps_disable(>->rps); 240 241 intel_gt_check_clock_frequency(gt); 242 243 for_each_engine(engine, gt, id) { 244 struct i915_request *rq; 245 u32 cycles; 246 u64 dt; 247 248 if (!intel_engine_can_store_dword(engine)) 249 continue; 250 251 st_engine_heartbeat_disable(engine); 252 253 rq = igt_spinner_create_request(&spin, 254 engine->kernel_context, 255 MI_NOOP); 256 if (IS_ERR(rq)) { 257 st_engine_heartbeat_enable(engine); 258 err = PTR_ERR(rq); 259 break; 260 } 261 262 i915_request_add(rq); 263 264 if (!igt_wait_for_spinner(&spin, rq)) { 265 pr_err("%s: RPS spinner did not start\n", 266 engine->name); 267 igt_spinner_end(&spin); 268 st_engine_heartbeat_enable(engine); 269 intel_gt_set_wedged(engine->gt); 270 err = -EIO; 271 break; 272 } 273 274 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 275 276 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 277 278 /* Set the evaluation interval to infinity! */ 279 intel_uncore_write_fw(gt->uncore, 280 GEN6_RP_UP_EI, 0xffffffff); 281 intel_uncore_write_fw(gt->uncore, 282 GEN6_RP_UP_THRESHOLD, 0xffffffff); 283 284 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 285 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 286 287 if (wait_for(intel_uncore_read_fw(gt->uncore, 288 GEN6_RP_CUR_UP_EI), 289 10)) { 290 /* Just skip the test; assume lack of HW support */ 291 pr_notice("%s: rps evaluation interval not ticking\n", 292 engine->name); 293 err = -ENODEV; 294 } else { 295 ktime_t dt_[5]; 296 u32 cycles_[5]; 297 int i; 298 299 for (i = 0; i < 5; i++) { 300 preempt_disable(); 301 302 dt_[i] = ktime_get(); 303 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 304 305 udelay(1000); 306 307 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 308 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 309 310 preempt_enable(); 311 } 312 313 /* Use the median of both cycle/dt; close enough */ 314 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 315 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 316 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 317 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 318 } 319 320 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 321 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 322 323 igt_spinner_end(&spin); 324 st_engine_heartbeat_enable(engine); 325 326 if (err == 0) { 327 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 328 u32 expected = 329 intel_gt_ns_to_pm_interval(gt, dt); 330 331 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 332 engine->name, cycles, time, dt, expected, 333 gt->clock_frequency / 1000); 334 335 if (10 * time < 8 * dt || 336 8 * time > 10 * dt) { 337 pr_err("%s: rps clock time does not match walltime!\n", 338 engine->name); 339 err = -EINVAL; 340 } 341 342 if (10 * expected < 8 * cycles || 343 8 * expected > 10 * cycles) { 344 pr_err("%s: walltime does not match rps clock ticks!\n", 345 engine->name); 346 err = -EINVAL; 347 } 348 } 349 350 if (igt_flush_test(gt->i915)) 351 err = -EIO; 352 353 break; /* once is enough */ 354 } 355 356 intel_rps_enable(>->rps); 357 intel_gt_pm_put(gt); 358 359 igt_spinner_fini(&spin); 360 361 intel_gt_pm_wait_for_idle(gt); 362 rps->work.func = saved_work; 363 364 if (err == -ENODEV) /* skipped, don't report a fail */ 365 err = 0; 366 367 return err; 368 } 369 370 int live_rps_control(void *arg) 371 { 372 struct intel_gt *gt = arg; 373 struct intel_rps *rps = >->rps; 374 void (*saved_work)(struct work_struct *wrk); 375 struct intel_engine_cs *engine; 376 enum intel_engine_id id; 377 struct igt_spinner spin; 378 int err = 0; 379 380 /* 381 * Check that the actual frequency matches our requested frequency, 382 * to verify our control mechanism. We have to be careful that the 383 * PCU may throttle the GPU in which case the actual frequency used 384 * will be lowered than requested. 385 */ 386 387 if (!intel_rps_is_enabled(rps)) 388 return 0; 389 390 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 391 return 0; 392 393 if (igt_spinner_init(&spin, gt)) 394 return -ENOMEM; 395 396 intel_gt_pm_wait_for_idle(gt); 397 saved_work = rps->work.func; 398 rps->work.func = dummy_rps_work; 399 400 intel_gt_pm_get(gt); 401 for_each_engine(engine, gt, id) { 402 struct i915_request *rq; 403 ktime_t min_dt, max_dt; 404 int f, limit; 405 int min, max; 406 407 if (!intel_engine_can_store_dword(engine)) 408 continue; 409 410 st_engine_heartbeat_disable(engine); 411 412 rq = igt_spinner_create_request(&spin, 413 engine->kernel_context, 414 MI_NOOP); 415 if (IS_ERR(rq)) { 416 err = PTR_ERR(rq); 417 break; 418 } 419 420 i915_request_add(rq); 421 422 if (!igt_wait_for_spinner(&spin, rq)) { 423 pr_err("%s: RPS spinner did not start\n", 424 engine->name); 425 igt_spinner_end(&spin); 426 st_engine_heartbeat_enable(engine); 427 intel_gt_set_wedged(engine->gt); 428 err = -EIO; 429 break; 430 } 431 432 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 433 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 434 engine->name, rps->min_freq, read_cagf(rps)); 435 igt_spinner_end(&spin); 436 st_engine_heartbeat_enable(engine); 437 show_pstate_limits(rps); 438 err = -EINVAL; 439 break; 440 } 441 442 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 443 if (rps_set_check(rps, f) < f) 444 break; 445 } 446 447 limit = rps_set_check(rps, f); 448 449 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 450 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 451 engine->name, rps->min_freq, read_cagf(rps)); 452 igt_spinner_end(&spin); 453 st_engine_heartbeat_enable(engine); 454 show_pstate_limits(rps); 455 err = -EINVAL; 456 break; 457 } 458 459 max_dt = ktime_get(); 460 max = rps_set_check(rps, limit); 461 max_dt = ktime_sub(ktime_get(), max_dt); 462 463 min_dt = ktime_get(); 464 min = rps_set_check(rps, rps->min_freq); 465 min_dt = ktime_sub(ktime_get(), min_dt); 466 467 igt_spinner_end(&spin); 468 st_engine_heartbeat_enable(engine); 469 470 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 471 engine->name, 472 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 473 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 474 limit, intel_gpu_freq(rps, limit), 475 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 476 477 if (limit == rps->min_freq) { 478 pr_err("%s: GPU throttled to minimum!\n", 479 engine->name); 480 show_pstate_limits(rps); 481 err = -ENODEV; 482 break; 483 } 484 485 if (igt_flush_test(gt->i915)) { 486 err = -EIO; 487 break; 488 } 489 } 490 intel_gt_pm_put(gt); 491 492 igt_spinner_fini(&spin); 493 494 intel_gt_pm_wait_for_idle(gt); 495 rps->work.func = saved_work; 496 497 return err; 498 } 499 500 static void show_pcu_config(struct intel_rps *rps) 501 { 502 struct drm_i915_private *i915 = rps_to_i915(rps); 503 unsigned int max_gpu_freq, min_gpu_freq; 504 intel_wakeref_t wakeref; 505 int gpu_freq; 506 507 if (!HAS_LLC(i915)) 508 return; 509 510 min_gpu_freq = rps->min_freq; 511 max_gpu_freq = rps->max_freq; 512 if (GRAPHICS_VER(i915) >= 9) { 513 /* Convert GT frequency to 50 HZ units */ 514 min_gpu_freq /= GEN9_FREQ_SCALER; 515 max_gpu_freq /= GEN9_FREQ_SCALER; 516 } 517 518 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 519 520 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 521 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 522 int ia_freq = gpu_freq; 523 524 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE, 525 &ia_freq, NULL); 526 527 pr_info("%5d %5d %5d\n", 528 gpu_freq * 50, 529 ((ia_freq >> 0) & 0xff) * 100, 530 ((ia_freq >> 8) & 0xff) * 100); 531 } 532 533 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 534 } 535 536 static u64 __measure_frequency(u32 *cntr, int duration_ms) 537 { 538 u64 dc, dt; 539 540 dt = ktime_get(); 541 dc = READ_ONCE(*cntr); 542 usleep_range(1000 * duration_ms, 2000 * duration_ms); 543 dc = READ_ONCE(*cntr) - dc; 544 dt = ktime_get() - dt; 545 546 return div64_u64(1000 * 1000 * dc, dt); 547 } 548 549 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 550 { 551 u64 x[5]; 552 int i; 553 554 *freq = rps_set_check(rps, *freq); 555 for (i = 0; i < 5; i++) 556 x[i] = __measure_frequency(cntr, 2); 557 *freq = (*freq + read_cagf(rps)) / 2; 558 559 /* A simple triangle filter for better result stability */ 560 sort(x, 5, sizeof(*x), cmp_u64, NULL); 561 return div_u64(x[1] + 2 * x[2] + x[3], 4); 562 } 563 564 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 565 int duration_ms) 566 { 567 u64 dc, dt; 568 569 dt = ktime_get(); 570 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 571 usleep_range(1000 * duration_ms, 2000 * duration_ms); 572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 573 dt = ktime_get() - dt; 574 575 return div64_u64(1000 * 1000 * dc, dt); 576 } 577 578 static u64 measure_cs_frequency_at(struct intel_rps *rps, 579 struct intel_engine_cs *engine, 580 int *freq) 581 { 582 u64 x[5]; 583 int i; 584 585 *freq = rps_set_check(rps, *freq); 586 for (i = 0; i < 5; i++) 587 x[i] = __measure_cs_frequency(engine, 2); 588 *freq = (*freq + read_cagf(rps)) / 2; 589 590 /* A simple triangle filter for better result stability */ 591 sort(x, 5, sizeof(*x), cmp_u64, NULL); 592 return div_u64(x[1] + 2 * x[2] + x[3], 4); 593 } 594 595 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 596 { 597 return f_d * x > f_n * y && f_n * x < f_d * y; 598 } 599 600 int live_rps_frequency_cs(void *arg) 601 { 602 void (*saved_work)(struct work_struct *wrk); 603 struct intel_gt *gt = arg; 604 struct intel_rps *rps = >->rps; 605 struct intel_engine_cs *engine; 606 struct pm_qos_request qos; 607 enum intel_engine_id id; 608 int err = 0; 609 610 /* 611 * The premise is that the GPU does change frequency at our behest. 612 * Let's check there is a correspondence between the requested 613 * frequency, the actual frequency, and the observed clock rate. 614 */ 615 616 if (!intel_rps_is_enabled(rps)) 617 return 0; 618 619 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 620 return 0; 621 622 if (CPU_LATENCY >= 0) 623 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 624 625 intel_gt_pm_wait_for_idle(gt); 626 saved_work = rps->work.func; 627 rps->work.func = dummy_rps_work; 628 629 for_each_engine(engine, gt, id) { 630 struct i915_request *rq; 631 struct i915_vma *vma; 632 u32 *cancel, *cntr; 633 struct { 634 u64 count; 635 int freq; 636 } min, max; 637 638 st_engine_heartbeat_disable(engine); 639 640 vma = create_spin_counter(engine, 641 engine->kernel_context->vm, false, 642 &cancel, &cntr); 643 if (IS_ERR(vma)) { 644 err = PTR_ERR(vma); 645 st_engine_heartbeat_enable(engine); 646 break; 647 } 648 649 rq = intel_engine_create_kernel_request(engine); 650 if (IS_ERR(rq)) { 651 err = PTR_ERR(rq); 652 goto err_vma; 653 } 654 655 err = i915_vma_move_to_active(vma, rq, 0); 656 if (!err) 657 err = rq->engine->emit_bb_start(rq, 658 i915_vma_offset(vma), 659 PAGE_SIZE, 0); 660 i915_request_add(rq); 661 if (err) 662 goto err_vma; 663 664 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 665 10)) { 666 pr_err("%s: timed loop did not start\n", 667 engine->name); 668 goto err_vma; 669 } 670 671 min.freq = rps->min_freq; 672 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 673 674 max.freq = rps->max_freq; 675 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 676 677 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 678 engine->name, 679 min.count, intel_gpu_freq(rps, min.freq), 680 max.count, intel_gpu_freq(rps, max.freq), 681 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 682 max.freq * min.count)); 683 684 if (!scaled_within(max.freq * min.count, 685 min.freq * max.count, 686 2, 3)) { 687 int f; 688 689 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 690 engine->name, 691 max.freq * min.count, 692 min.freq * max.count); 693 show_pcu_config(rps); 694 695 for (f = min.freq + 1; f <= rps->max_freq; f++) { 696 int act = f; 697 u64 count; 698 699 count = measure_cs_frequency_at(rps, engine, &act); 700 if (act < f) 701 break; 702 703 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 704 engine->name, 705 act, intel_gpu_freq(rps, act), count, 706 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 707 act * min.count)); 708 709 f = act; /* may skip ahead [pcu granularity] */ 710 } 711 712 err = -EINTR; /* ignore error, continue on with test */ 713 } 714 715 err_vma: 716 *cancel = MI_BATCH_BUFFER_END; 717 i915_gem_object_flush_map(vma->obj); 718 i915_gem_object_unpin_map(vma->obj); 719 i915_vma_unpin(vma); 720 i915_vma_unlock(vma); 721 i915_vma_put(vma); 722 723 st_engine_heartbeat_enable(engine); 724 if (igt_flush_test(gt->i915)) 725 err = -EIO; 726 if (err) 727 break; 728 } 729 730 intel_gt_pm_wait_for_idle(gt); 731 rps->work.func = saved_work; 732 733 if (CPU_LATENCY >= 0) 734 cpu_latency_qos_remove_request(&qos); 735 736 return err; 737 } 738 739 int live_rps_frequency_srm(void *arg) 740 { 741 void (*saved_work)(struct work_struct *wrk); 742 struct intel_gt *gt = arg; 743 struct intel_rps *rps = >->rps; 744 struct intel_engine_cs *engine; 745 struct pm_qos_request qos; 746 enum intel_engine_id id; 747 int err = 0; 748 749 /* 750 * The premise is that the GPU does change frequency at our behest. 751 * Let's check there is a correspondence between the requested 752 * frequency, the actual frequency, and the observed clock rate. 753 */ 754 755 if (!intel_rps_is_enabled(rps)) 756 return 0; 757 758 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 759 return 0; 760 761 if (CPU_LATENCY >= 0) 762 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 763 764 intel_gt_pm_wait_for_idle(gt); 765 saved_work = rps->work.func; 766 rps->work.func = dummy_rps_work; 767 768 for_each_engine(engine, gt, id) { 769 struct i915_request *rq; 770 struct i915_vma *vma; 771 u32 *cancel, *cntr; 772 struct { 773 u64 count; 774 int freq; 775 } min, max; 776 777 st_engine_heartbeat_disable(engine); 778 779 vma = create_spin_counter(engine, 780 engine->kernel_context->vm, true, 781 &cancel, &cntr); 782 if (IS_ERR(vma)) { 783 err = PTR_ERR(vma); 784 st_engine_heartbeat_enable(engine); 785 break; 786 } 787 788 rq = intel_engine_create_kernel_request(engine); 789 if (IS_ERR(rq)) { 790 err = PTR_ERR(rq); 791 goto err_vma; 792 } 793 794 err = i915_vma_move_to_active(vma, rq, 0); 795 if (!err) 796 err = rq->engine->emit_bb_start(rq, 797 i915_vma_offset(vma), 798 PAGE_SIZE, 0); 799 i915_request_add(rq); 800 if (err) 801 goto err_vma; 802 803 if (wait_for(READ_ONCE(*cntr), 10)) { 804 pr_err("%s: timed loop did not start\n", 805 engine->name); 806 goto err_vma; 807 } 808 809 min.freq = rps->min_freq; 810 min.count = measure_frequency_at(rps, cntr, &min.freq); 811 812 max.freq = rps->max_freq; 813 max.count = measure_frequency_at(rps, cntr, &max.freq); 814 815 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 816 engine->name, 817 min.count, intel_gpu_freq(rps, min.freq), 818 max.count, intel_gpu_freq(rps, max.freq), 819 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 820 max.freq * min.count)); 821 822 if (!scaled_within(max.freq * min.count, 823 min.freq * max.count, 824 1, 2)) { 825 int f; 826 827 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 828 engine->name, 829 max.freq * min.count, 830 min.freq * max.count); 831 show_pcu_config(rps); 832 833 for (f = min.freq + 1; f <= rps->max_freq; f++) { 834 int act = f; 835 u64 count; 836 837 count = measure_frequency_at(rps, cntr, &act); 838 if (act < f) 839 break; 840 841 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 842 engine->name, 843 act, intel_gpu_freq(rps, act), count, 844 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 845 act * min.count)); 846 847 f = act; /* may skip ahead [pcu granularity] */ 848 } 849 850 err = -EINTR; /* ignore error, continue on with test */ 851 } 852 853 err_vma: 854 *cancel = MI_BATCH_BUFFER_END; 855 i915_gem_object_flush_map(vma->obj); 856 i915_gem_object_unpin_map(vma->obj); 857 i915_vma_unpin(vma); 858 i915_vma_unlock(vma); 859 i915_vma_put(vma); 860 861 st_engine_heartbeat_enable(engine); 862 if (igt_flush_test(gt->i915)) 863 err = -EIO; 864 if (err) 865 break; 866 } 867 868 intel_gt_pm_wait_for_idle(gt); 869 rps->work.func = saved_work; 870 871 if (CPU_LATENCY >= 0) 872 cpu_latency_qos_remove_request(&qos); 873 874 return err; 875 } 876 877 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 878 { 879 /* Flush any previous EI */ 880 usleep_range(timeout_us, 2 * timeout_us); 881 882 /* Reset the interrupt status */ 883 rps_disable_interrupts(rps); 884 GEM_BUG_ON(rps->pm_iir); 885 rps_enable_interrupts(rps); 886 887 /* And then wait for the timeout, for real this time */ 888 usleep_range(2 * timeout_us, 3 * timeout_us); 889 } 890 891 static int __rps_up_interrupt(struct intel_rps *rps, 892 struct intel_engine_cs *engine, 893 struct igt_spinner *spin) 894 { 895 struct intel_uncore *uncore = engine->uncore; 896 struct i915_request *rq; 897 u32 timeout; 898 899 if (!intel_engine_can_store_dword(engine)) 900 return 0; 901 902 rps_set_check(rps, rps->min_freq); 903 904 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 905 if (IS_ERR(rq)) 906 return PTR_ERR(rq); 907 908 i915_request_get(rq); 909 i915_request_add(rq); 910 911 if (!igt_wait_for_spinner(spin, rq)) { 912 pr_err("%s: RPS spinner did not start\n", 913 engine->name); 914 i915_request_put(rq); 915 intel_gt_set_wedged(engine->gt); 916 return -EIO; 917 } 918 919 if (!intel_rps_is_active(rps)) { 920 pr_err("%s: RPS not enabled on starting spinner\n", 921 engine->name); 922 igt_spinner_end(spin); 923 i915_request_put(rq); 924 return -EINVAL; 925 } 926 927 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 928 pr_err("%s: RPS did not register UP interrupt\n", 929 engine->name); 930 i915_request_put(rq); 931 return -EINVAL; 932 } 933 934 if (rps->last_freq != rps->min_freq) { 935 pr_err("%s: RPS did not program min frequency\n", 936 engine->name); 937 i915_request_put(rq); 938 return -EINVAL; 939 } 940 941 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 942 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 943 timeout = DIV_ROUND_UP(timeout, 1000); 944 945 sleep_for_ei(rps, timeout); 946 GEM_BUG_ON(i915_request_completed(rq)); 947 948 igt_spinner_end(spin); 949 i915_request_put(rq); 950 951 if (rps->cur_freq != rps->min_freq) { 952 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 953 engine->name, intel_rps_read_actual_frequency(rps)); 954 return -EINVAL; 955 } 956 957 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 958 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 959 engine->name, rps->pm_iir, 960 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 961 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 962 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 963 return -EINVAL; 964 } 965 966 return 0; 967 } 968 969 static int __rps_down_interrupt(struct intel_rps *rps, 970 struct intel_engine_cs *engine) 971 { 972 struct intel_uncore *uncore = engine->uncore; 973 u32 timeout; 974 975 rps_set_check(rps, rps->max_freq); 976 977 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 978 pr_err("%s: RPS did not register DOWN interrupt\n", 979 engine->name); 980 return -EINVAL; 981 } 982 983 if (rps->last_freq != rps->max_freq) { 984 pr_err("%s: RPS did not program max frequency\n", 985 engine->name); 986 return -EINVAL; 987 } 988 989 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 990 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 991 timeout = DIV_ROUND_UP(timeout, 1000); 992 993 sleep_for_ei(rps, timeout); 994 995 if (rps->cur_freq != rps->max_freq) { 996 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 997 engine->name, 998 intel_rps_read_actual_frequency(rps)); 999 return -EINVAL; 1000 } 1001 1002 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1003 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1004 engine->name, rps->pm_iir, 1005 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1006 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1007 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1008 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1009 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1010 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1011 return -EINVAL; 1012 } 1013 1014 return 0; 1015 } 1016 1017 int live_rps_interrupt(void *arg) 1018 { 1019 struct intel_gt *gt = arg; 1020 struct intel_rps *rps = >->rps; 1021 void (*saved_work)(struct work_struct *wrk); 1022 struct intel_engine_cs *engine; 1023 enum intel_engine_id id; 1024 struct igt_spinner spin; 1025 u32 pm_events; 1026 int err = 0; 1027 1028 /* 1029 * First, let's check whether or not we are receiving interrupts. 1030 */ 1031 1032 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1033 return 0; 1034 1035 intel_gt_pm_get(gt); 1036 pm_events = rps->pm_events; 1037 intel_gt_pm_put(gt); 1038 if (!pm_events) { 1039 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1040 return -ENODEV; 1041 } 1042 1043 if (igt_spinner_init(&spin, gt)) 1044 return -ENOMEM; 1045 1046 intel_gt_pm_wait_for_idle(gt); 1047 saved_work = rps->work.func; 1048 rps->work.func = dummy_rps_work; 1049 1050 for_each_engine(engine, gt, id) { 1051 /* Keep the engine busy with a spinner; expect an UP! */ 1052 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1053 intel_gt_pm_wait_for_idle(engine->gt); 1054 GEM_BUG_ON(intel_rps_is_active(rps)); 1055 1056 st_engine_heartbeat_disable(engine); 1057 1058 err = __rps_up_interrupt(rps, engine, &spin); 1059 1060 st_engine_heartbeat_enable(engine); 1061 if (err) 1062 goto out; 1063 1064 intel_gt_pm_wait_for_idle(engine->gt); 1065 } 1066 1067 /* Keep the engine awake but idle and check for DOWN */ 1068 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1069 st_engine_heartbeat_disable(engine); 1070 intel_rc6_disable(>->rc6); 1071 1072 err = __rps_down_interrupt(rps, engine); 1073 1074 intel_rc6_enable(>->rc6); 1075 st_engine_heartbeat_enable(engine); 1076 if (err) 1077 goto out; 1078 } 1079 } 1080 1081 out: 1082 if (igt_flush_test(gt->i915)) 1083 err = -EIO; 1084 1085 igt_spinner_fini(&spin); 1086 1087 intel_gt_pm_wait_for_idle(gt); 1088 rps->work.func = saved_work; 1089 1090 return err; 1091 } 1092 1093 static u64 __measure_power(int duration_ms) 1094 { 1095 u64 dE, dt; 1096 1097 dt = ktime_get(); 1098 dE = librapl_energy_uJ(); 1099 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1100 dE = librapl_energy_uJ() - dE; 1101 dt = ktime_get() - dt; 1102 1103 return div64_u64(1000 * 1000 * dE, dt); 1104 } 1105 1106 static u64 measure_power(struct intel_rps *rps, int *freq) 1107 { 1108 u64 x[5]; 1109 int i; 1110 1111 for (i = 0; i < 5; i++) 1112 x[i] = __measure_power(5); 1113 1114 *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; 1115 1116 /* A simple triangle filter for better result stability */ 1117 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1118 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1119 } 1120 1121 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1122 { 1123 *freq = rps_set_check(rps, *freq); 1124 return measure_power(rps, freq); 1125 } 1126 1127 int live_rps_power(void *arg) 1128 { 1129 struct intel_gt *gt = arg; 1130 struct intel_rps *rps = >->rps; 1131 void (*saved_work)(struct work_struct *wrk); 1132 struct intel_engine_cs *engine; 1133 enum intel_engine_id id; 1134 struct igt_spinner spin; 1135 int err = 0; 1136 1137 /* 1138 * Our fundamental assumption is that running at lower frequency 1139 * actually saves power. Let's see if our RAPL measurement support 1140 * that theory. 1141 */ 1142 1143 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1144 return 0; 1145 1146 if (!librapl_supported(gt->i915)) 1147 return 0; 1148 1149 if (igt_spinner_init(&spin, gt)) 1150 return -ENOMEM; 1151 1152 intel_gt_pm_wait_for_idle(gt); 1153 saved_work = rps->work.func; 1154 rps->work.func = dummy_rps_work; 1155 1156 for_each_engine(engine, gt, id) { 1157 struct i915_request *rq; 1158 struct { 1159 u64 power; 1160 int freq; 1161 } min, max; 1162 1163 if (!intel_engine_can_store_dword(engine)) 1164 continue; 1165 1166 st_engine_heartbeat_disable(engine); 1167 1168 rq = igt_spinner_create_request(&spin, 1169 engine->kernel_context, 1170 MI_NOOP); 1171 if (IS_ERR(rq)) { 1172 st_engine_heartbeat_enable(engine); 1173 err = PTR_ERR(rq); 1174 break; 1175 } 1176 1177 i915_request_add(rq); 1178 1179 if (!igt_wait_for_spinner(&spin, rq)) { 1180 pr_err("%s: RPS spinner did not start\n", 1181 engine->name); 1182 igt_spinner_end(&spin); 1183 st_engine_heartbeat_enable(engine); 1184 intel_gt_set_wedged(engine->gt); 1185 err = -EIO; 1186 break; 1187 } 1188 1189 max.freq = rps->max_freq; 1190 max.power = measure_power_at(rps, &max.freq); 1191 1192 min.freq = rps->min_freq; 1193 min.power = measure_power_at(rps, &min.freq); 1194 1195 igt_spinner_end(&spin); 1196 st_engine_heartbeat_enable(engine); 1197 1198 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1199 engine->name, 1200 min.power, intel_gpu_freq(rps, min.freq), 1201 max.power, intel_gpu_freq(rps, max.freq)); 1202 1203 if (10 * min.freq >= 9 * max.freq) { 1204 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1205 min.freq, intel_gpu_freq(rps, min.freq), 1206 max.freq, intel_gpu_freq(rps, max.freq)); 1207 continue; 1208 } 1209 1210 if (11 * min.power > 10 * max.power) { 1211 pr_err("%s: did not conserve power when setting lower frequency!\n", 1212 engine->name); 1213 err = -EINVAL; 1214 break; 1215 } 1216 1217 if (igt_flush_test(gt->i915)) { 1218 err = -EIO; 1219 break; 1220 } 1221 } 1222 1223 igt_spinner_fini(&spin); 1224 1225 intel_gt_pm_wait_for_idle(gt); 1226 rps->work.func = saved_work; 1227 1228 return err; 1229 } 1230 1231 int live_rps_dynamic(void *arg) 1232 { 1233 struct intel_gt *gt = arg; 1234 struct intel_rps *rps = >->rps; 1235 struct intel_engine_cs *engine; 1236 enum intel_engine_id id; 1237 struct igt_spinner spin; 1238 int err = 0; 1239 1240 /* 1241 * We've looked at the bascs, and have established that we 1242 * can change the clock frequency and that the HW will generate 1243 * interrupts based on load. Now we check how we integrate those 1244 * moving parts into dynamic reclocking based on load. 1245 */ 1246 1247 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1248 return 0; 1249 1250 if (igt_spinner_init(&spin, gt)) 1251 return -ENOMEM; 1252 1253 if (intel_rps_has_interrupts(rps)) 1254 pr_info("RPS has interrupt support\n"); 1255 if (intel_rps_uses_timer(rps)) 1256 pr_info("RPS has timer support\n"); 1257 1258 for_each_engine(engine, gt, id) { 1259 struct i915_request *rq; 1260 struct { 1261 ktime_t dt; 1262 u8 freq; 1263 } min, max; 1264 1265 if (!intel_engine_can_store_dword(engine)) 1266 continue; 1267 1268 intel_gt_pm_wait_for_idle(gt); 1269 GEM_BUG_ON(intel_rps_is_active(rps)); 1270 rps->cur_freq = rps->min_freq; 1271 1272 intel_engine_pm_get(engine); 1273 intel_rc6_disable(>->rc6); 1274 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1275 1276 rq = igt_spinner_create_request(&spin, 1277 engine->kernel_context, 1278 MI_NOOP); 1279 if (IS_ERR(rq)) { 1280 err = PTR_ERR(rq); 1281 goto err; 1282 } 1283 1284 i915_request_add(rq); 1285 1286 max.dt = ktime_get(); 1287 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1288 max.dt = ktime_sub(ktime_get(), max.dt); 1289 1290 igt_spinner_end(&spin); 1291 1292 min.dt = ktime_get(); 1293 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1294 min.dt = ktime_sub(ktime_get(), min.dt); 1295 1296 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1297 engine->name, 1298 max.freq, intel_gpu_freq(rps, max.freq), 1299 ktime_to_ns(max.dt), 1300 min.freq, intel_gpu_freq(rps, min.freq), 1301 ktime_to_ns(min.dt)); 1302 if (min.freq >= max.freq) { 1303 pr_err("%s: dynamic reclocking of spinner failed\n!", 1304 engine->name); 1305 err = -EINVAL; 1306 } 1307 1308 err: 1309 intel_rc6_enable(>->rc6); 1310 intel_engine_pm_put(engine); 1311 1312 if (igt_flush_test(gt->i915)) 1313 err = -EIO; 1314 if (err) 1315 break; 1316 } 1317 1318 igt_spinner_fini(&spin); 1319 1320 return err; 1321 } 1322