1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "gem/i915_gem_internal.h" 10 11 #include "i915_reg.h" 12 #include "intel_engine_heartbeat.h" 13 #include "intel_engine_pm.h" 14 #include "intel_engine_regs.h" 15 #include "intel_gpu_commands.h" 16 #include "intel_gt_clock_utils.h" 17 #include "intel_gt_pm.h" 18 #include "intel_rc6.h" 19 #include "selftest_engine_heartbeat.h" 20 #include "selftest_rps.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_spinner.h" 23 #include "selftests/librapl.h" 24 25 /* Try to isolate the impact of cstates from determining frequency response */ 26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 27 28 static void dummy_rps_work(struct work_struct *wrk) 29 { 30 } 31 32 static int cmp_u64(const void *A, const void *B) 33 { 34 const u64 *a = A, *b = B; 35 36 if (*a < *b) 37 return -1; 38 else if (*a > *b) 39 return 1; 40 else 41 return 0; 42 } 43 44 static int cmp_u32(const void *A, const void *B) 45 { 46 const u32 *a = A, *b = B; 47 48 if (*a < *b) 49 return -1; 50 else if (*a > *b) 51 return 1; 52 else 53 return 0; 54 } 55 56 static struct i915_vma * 57 create_spin_counter(struct intel_engine_cs *engine, 58 struct i915_address_space *vm, 59 bool srm, 60 u32 **cancel, 61 u32 **counter) 62 { 63 enum { 64 COUNT, 65 INC, 66 __NGPR__, 67 }; 68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 69 struct drm_i915_gem_object *obj; 70 struct i915_vma *vma; 71 unsigned long end; 72 u32 *base, *cs; 73 int loop, i; 74 int err; 75 76 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 77 if (IS_ERR(obj)) 78 return ERR_CAST(obj); 79 80 end = obj->base.size / sizeof(u32) - 1; 81 82 vma = i915_vma_instance(obj, vm, NULL); 83 if (IS_ERR(vma)) { 84 err = PTR_ERR(vma); 85 goto err_put; 86 } 87 88 err = i915_vma_pin(vma, 0, 0, PIN_USER); 89 if (err) 90 goto err_unlock; 91 92 i915_vma_lock(vma); 93 94 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 95 if (IS_ERR(base)) { 96 err = PTR_ERR(base); 97 goto err_unpin; 98 } 99 cs = base; 100 101 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 102 for (i = 0; i < __NGPR__; i++) { 103 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 104 *cs++ = 0; 105 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 106 *cs++ = 0; 107 } 108 109 *cs++ = MI_LOAD_REGISTER_IMM(1); 110 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 111 *cs++ = 1; 112 113 loop = cs - base; 114 115 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 116 for (i = 0; i < 1024; i++) { 117 *cs++ = MI_MATH(4); 118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 119 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 120 *cs++ = MI_MATH_ADD; 121 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 122 123 if (srm) { 124 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 125 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 126 *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 127 *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 128 } 129 } 130 131 *cs++ = MI_BATCH_BUFFER_START_GEN8; 132 *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 133 *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 134 GEM_BUG_ON(cs - base > end); 135 136 i915_gem_object_flush_map(obj); 137 138 *cancel = base + loop; 139 *counter = srm ? memset32(base + end, 0, 1) : NULL; 140 return vma; 141 142 err_unpin: 143 i915_vma_unpin(vma); 144 err_unlock: 145 i915_vma_unlock(vma); 146 err_put: 147 i915_gem_object_put(obj); 148 return ERR_PTR(err); 149 } 150 151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 152 { 153 u8 history[64], i; 154 unsigned long end; 155 int sleep; 156 157 i = 0; 158 memset(history, freq, sizeof(history)); 159 sleep = 20; 160 161 /* The PCU does not change instantly, but drifts towards the goal? */ 162 end = jiffies + msecs_to_jiffies(timeout_ms); 163 do { 164 u8 act; 165 166 act = read_cagf(rps); 167 if (time_after(jiffies, end)) 168 return act; 169 170 /* Target acquired */ 171 if (act == freq) 172 return act; 173 174 /* Any change within the last N samples? */ 175 if (!memchr_inv(history, act, sizeof(history))) 176 return act; 177 178 history[i] = act; 179 i = (i + 1) % ARRAY_SIZE(history); 180 181 usleep_range(sleep, 2 * sleep); 182 sleep *= 2; 183 if (sleep > timeout_ms * 20) 184 sleep = timeout_ms * 20; 185 } while (1); 186 } 187 188 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 189 { 190 mutex_lock(&rps->lock); 191 GEM_BUG_ON(!intel_rps_is_active(rps)); 192 if (wait_for(!intel_rps_set(rps, freq), 50)) { 193 mutex_unlock(&rps->lock); 194 return 0; 195 } 196 GEM_BUG_ON(rps->last_freq != freq); 197 mutex_unlock(&rps->lock); 198 199 return wait_for_freq(rps, freq, 50); 200 } 201 202 static void show_pstate_limits(struct intel_rps *rps) 203 { 204 struct drm_i915_private *i915 = rps_to_i915(rps); 205 206 if (IS_BROXTON(i915)) { 207 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 208 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 209 intel_uncore_read(rps_to_uncore(rps), 210 BXT_RP_STATE_CAP)); 211 } else if (GRAPHICS_VER(i915) == 9) { 212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 214 intel_uncore_read(rps_to_uncore(rps), 215 GEN9_RP_STATE_LIMITS)); 216 } 217 } 218 219 int live_rps_clock_interval(void *arg) 220 { 221 struct intel_gt *gt = arg; 222 struct intel_rps *rps = >->rps; 223 void (*saved_work)(struct work_struct *wrk); 224 struct intel_engine_cs *engine; 225 enum intel_engine_id id; 226 struct igt_spinner spin; 227 intel_wakeref_t wakeref; 228 int err = 0; 229 230 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 231 return 0; 232 233 if (igt_spinner_init(&spin, gt)) 234 return -ENOMEM; 235 236 intel_gt_pm_wait_for_idle(gt); 237 saved_work = rps->work.func; 238 rps->work.func = dummy_rps_work; 239 240 wakeref = intel_gt_pm_get(gt); 241 intel_rps_disable(>->rps); 242 243 intel_gt_check_clock_frequency(gt); 244 245 for_each_engine(engine, gt, id) { 246 struct i915_request *rq; 247 u32 cycles; 248 u64 dt; 249 250 if (!intel_engine_can_store_dword(engine)) 251 continue; 252 253 st_engine_heartbeat_disable(engine); 254 255 rq = igt_spinner_create_request(&spin, 256 engine->kernel_context, 257 MI_NOOP); 258 if (IS_ERR(rq)) { 259 st_engine_heartbeat_enable(engine); 260 err = PTR_ERR(rq); 261 break; 262 } 263 264 i915_request_add(rq); 265 266 if (!igt_wait_for_spinner(&spin, rq)) { 267 pr_err("%s: RPS spinner did not start\n", 268 engine->name); 269 igt_spinner_end(&spin); 270 st_engine_heartbeat_enable(engine); 271 intel_gt_set_wedged(engine->gt); 272 err = -EIO; 273 break; 274 } 275 276 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 277 278 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 279 280 /* Set the evaluation interval to infinity! */ 281 intel_uncore_write_fw(gt->uncore, 282 GEN6_RP_UP_EI, 0xffffffff); 283 intel_uncore_write_fw(gt->uncore, 284 GEN6_RP_UP_THRESHOLD, 0xffffffff); 285 286 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 287 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 288 289 if (wait_for(intel_uncore_read_fw(gt->uncore, 290 GEN6_RP_CUR_UP_EI), 291 10)) { 292 /* Just skip the test; assume lack of HW support */ 293 pr_notice("%s: rps evaluation interval not ticking\n", 294 engine->name); 295 err = -ENODEV; 296 } else { 297 ktime_t dt_[5]; 298 u32 cycles_[5]; 299 int i; 300 301 for (i = 0; i < 5; i++) { 302 preempt_disable(); 303 304 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 305 dt_[i] = ktime_get(); 306 307 udelay(1000); 308 309 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 310 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 311 312 preempt_enable(); 313 } 314 315 /* Use the median of both cycle/dt; close enough */ 316 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 317 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 318 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 319 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 320 } 321 322 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 323 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 324 325 igt_spinner_end(&spin); 326 st_engine_heartbeat_enable(engine); 327 328 if (err == 0) { 329 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 330 u32 expected = 331 intel_gt_ns_to_pm_interval(gt, dt); 332 333 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 334 engine->name, cycles, time, dt, expected, 335 gt->clock_frequency / 1000); 336 337 if (10 * time < 8 * dt || 338 8 * time > 10 * dt) { 339 pr_err("%s: rps clock time does not match walltime!\n", 340 engine->name); 341 err = -EINVAL; 342 } 343 344 if (10 * expected < 8 * cycles || 345 8 * expected > 10 * cycles) { 346 pr_err("%s: walltime does not match rps clock ticks!\n", 347 engine->name); 348 err = -EINVAL; 349 } 350 } 351 352 if (igt_flush_test(gt->i915)) 353 err = -EIO; 354 355 break; /* once is enough */ 356 } 357 358 intel_rps_enable(>->rps); 359 intel_gt_pm_put(gt, wakeref); 360 361 igt_spinner_fini(&spin); 362 363 intel_gt_pm_wait_for_idle(gt); 364 rps->work.func = saved_work; 365 366 if (err == -ENODEV) /* skipped, don't report a fail */ 367 err = 0; 368 369 return err; 370 } 371 372 int live_rps_control(void *arg) 373 { 374 struct intel_gt *gt = arg; 375 struct intel_rps *rps = >->rps; 376 void (*saved_work)(struct work_struct *wrk); 377 struct intel_engine_cs *engine; 378 enum intel_engine_id id; 379 struct igt_spinner spin; 380 intel_wakeref_t wakeref; 381 int err = 0; 382 383 /* 384 * Check that the actual frequency matches our requested frequency, 385 * to verify our control mechanism. We have to be careful that the 386 * PCU may throttle the GPU in which case the actual frequency used 387 * will be lowered than requested. 388 */ 389 390 if (!intel_rps_is_enabled(rps)) 391 return 0; 392 393 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 394 return 0; 395 396 if (igt_spinner_init(&spin, gt)) 397 return -ENOMEM; 398 399 intel_gt_pm_wait_for_idle(gt); 400 saved_work = rps->work.func; 401 rps->work.func = dummy_rps_work; 402 403 wakeref = intel_gt_pm_get(gt); 404 for_each_engine(engine, gt, id) { 405 struct i915_request *rq; 406 ktime_t min_dt, max_dt; 407 int f, limit; 408 int min, max; 409 410 if (!intel_engine_can_store_dword(engine)) 411 continue; 412 413 st_engine_heartbeat_disable(engine); 414 415 rq = igt_spinner_create_request(&spin, 416 engine->kernel_context, 417 MI_NOOP); 418 if (IS_ERR(rq)) { 419 err = PTR_ERR(rq); 420 break; 421 } 422 423 i915_request_add(rq); 424 425 if (!igt_wait_for_spinner(&spin, rq)) { 426 pr_err("%s: RPS spinner did not start\n", 427 engine->name); 428 igt_spinner_end(&spin); 429 st_engine_heartbeat_enable(engine); 430 intel_gt_set_wedged(engine->gt); 431 err = -EIO; 432 break; 433 } 434 435 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 436 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 437 engine->name, rps->min_freq, read_cagf(rps)); 438 igt_spinner_end(&spin); 439 st_engine_heartbeat_enable(engine); 440 show_pstate_limits(rps); 441 err = -EINVAL; 442 break; 443 } 444 445 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 446 if (rps_set_check(rps, f) < f) 447 break; 448 } 449 450 limit = rps_set_check(rps, f); 451 452 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 453 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 454 engine->name, rps->min_freq, read_cagf(rps)); 455 igt_spinner_end(&spin); 456 st_engine_heartbeat_enable(engine); 457 show_pstate_limits(rps); 458 err = -EINVAL; 459 break; 460 } 461 462 max_dt = ktime_get(); 463 max = rps_set_check(rps, limit); 464 max_dt = ktime_sub(ktime_get(), max_dt); 465 466 min_dt = ktime_get(); 467 min = rps_set_check(rps, rps->min_freq); 468 min_dt = ktime_sub(ktime_get(), min_dt); 469 470 igt_spinner_end(&spin); 471 st_engine_heartbeat_enable(engine); 472 473 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 474 engine->name, 475 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 476 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 477 limit, intel_gpu_freq(rps, limit), 478 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 479 480 if (limit != rps->max_freq) { 481 u32 throttle = intel_uncore_read(gt->uncore, 482 intel_gt_perf_limit_reasons_reg(gt)); 483 484 pr_warn("%s: GPU throttled with reasons 0x%08x\n", 485 engine->name, throttle & GT0_PERF_LIMIT_REASONS_MASK); 486 show_pstate_limits(rps); 487 } 488 489 if (igt_flush_test(gt->i915)) { 490 err = -EIO; 491 break; 492 } 493 } 494 intel_gt_pm_put(gt, wakeref); 495 496 igt_spinner_fini(&spin); 497 498 intel_gt_pm_wait_for_idle(gt); 499 rps->work.func = saved_work; 500 501 return err; 502 } 503 504 static void show_pcu_config(struct intel_rps *rps) 505 { 506 struct drm_i915_private *i915 = rps_to_i915(rps); 507 unsigned int max_gpu_freq, min_gpu_freq; 508 intel_wakeref_t wakeref; 509 int gpu_freq; 510 511 if (!HAS_LLC(i915)) 512 return; 513 514 min_gpu_freq = rps->min_freq; 515 max_gpu_freq = rps->max_freq; 516 if (GRAPHICS_VER(i915) >= 9) { 517 /* Convert GT frequency to 50 HZ units */ 518 min_gpu_freq /= GEN9_FREQ_SCALER; 519 max_gpu_freq /= GEN9_FREQ_SCALER; 520 } 521 522 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 523 524 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 525 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 526 int ia_freq = gpu_freq; 527 528 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE, 529 &ia_freq, NULL); 530 531 pr_info("%5d %5d %5d\n", 532 gpu_freq * 50, 533 ((ia_freq >> 0) & 0xff) * 100, 534 ((ia_freq >> 8) & 0xff) * 100); 535 } 536 537 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 538 } 539 540 static u64 __measure_frequency(u32 *cntr, int duration_ms) 541 { 542 u64 dc, dt; 543 544 dc = READ_ONCE(*cntr); 545 dt = ktime_get(); 546 usleep_range(1000 * duration_ms, 2000 * duration_ms); 547 dc = READ_ONCE(*cntr) - dc; 548 dt = ktime_get() - dt; 549 550 return div64_u64(1000 * 1000 * dc, dt); 551 } 552 553 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 554 { 555 u64 x[5]; 556 int i; 557 558 *freq = rps_set_check(rps, *freq); 559 for (i = 0; i < 5; i++) 560 x[i] = __measure_frequency(cntr, 2); 561 *freq = (*freq + read_cagf(rps)) / 2; 562 563 /* A simple triangle filter for better result stability */ 564 sort(x, 5, sizeof(*x), cmp_u64, NULL); 565 return div_u64(x[1] + 2 * x[2] + x[3], 4); 566 } 567 568 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 569 int duration_ms) 570 { 571 u64 dc, dt; 572 573 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 574 dt = ktime_get(); 575 usleep_range(1000 * duration_ms, 2000 * duration_ms); 576 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 577 dt = ktime_get() - dt; 578 579 return div64_u64(1000 * 1000 * dc, dt); 580 } 581 582 static u64 measure_cs_frequency_at(struct intel_rps *rps, 583 struct intel_engine_cs *engine, 584 int *freq) 585 { 586 u64 x[5]; 587 int i; 588 589 *freq = rps_set_check(rps, *freq); 590 for (i = 0; i < 5; i++) 591 x[i] = __measure_cs_frequency(engine, 2); 592 *freq = (*freq + read_cagf(rps)) / 2; 593 594 /* A simple triangle filter for better result stability */ 595 sort(x, 5, sizeof(*x), cmp_u64, NULL); 596 return div_u64(x[1] + 2 * x[2] + x[3], 4); 597 } 598 599 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 600 { 601 return f_d * x > f_n * y && f_n * x < f_d * y; 602 } 603 604 int live_rps_frequency_cs(void *arg) 605 { 606 void (*saved_work)(struct work_struct *wrk); 607 struct intel_gt *gt = arg; 608 struct intel_rps *rps = >->rps; 609 struct intel_engine_cs *engine; 610 struct pm_qos_request qos; 611 enum intel_engine_id id; 612 int err = 0; 613 614 /* 615 * The premise is that the GPU does change frequency at our behest. 616 * Let's check there is a correspondence between the requested 617 * frequency, the actual frequency, and the observed clock rate. 618 */ 619 620 if (!intel_rps_is_enabled(rps)) 621 return 0; 622 623 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 624 return 0; 625 626 if (CPU_LATENCY >= 0) 627 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 628 629 intel_gt_pm_wait_for_idle(gt); 630 saved_work = rps->work.func; 631 rps->work.func = dummy_rps_work; 632 633 for_each_engine(engine, gt, id) { 634 struct i915_request *rq; 635 struct i915_vma *vma; 636 u32 *cancel, *cntr; 637 struct { 638 u64 count; 639 int freq; 640 } min, max; 641 642 st_engine_heartbeat_disable(engine); 643 644 vma = create_spin_counter(engine, 645 engine->kernel_context->vm, false, 646 &cancel, &cntr); 647 if (IS_ERR(vma)) { 648 err = PTR_ERR(vma); 649 st_engine_heartbeat_enable(engine); 650 break; 651 } 652 653 rq = intel_engine_create_kernel_request(engine); 654 if (IS_ERR(rq)) { 655 err = PTR_ERR(rq); 656 goto err_vma; 657 } 658 659 err = i915_vma_move_to_active(vma, rq, 0); 660 if (!err) 661 err = rq->engine->emit_bb_start(rq, 662 i915_vma_offset(vma), 663 PAGE_SIZE, 0); 664 i915_request_add(rq); 665 if (err) 666 goto err_vma; 667 668 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 669 10)) { 670 pr_err("%s: timed loop did not start\n", 671 engine->name); 672 goto err_vma; 673 } 674 675 min.freq = rps->min_freq; 676 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 677 678 max.freq = rps->max_freq; 679 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 680 681 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 682 engine->name, 683 min.count, intel_gpu_freq(rps, min.freq), 684 max.count, intel_gpu_freq(rps, max.freq), 685 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 686 max.freq * min.count)); 687 688 if (!scaled_within(max.freq * min.count, 689 min.freq * max.count, 690 2, 3)) { 691 int f; 692 693 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 694 engine->name, 695 max.freq * min.count, 696 min.freq * max.count); 697 show_pcu_config(rps); 698 699 for (f = min.freq + 1; f <= rps->max_freq; f++) { 700 int act = f; 701 u64 count; 702 703 count = measure_cs_frequency_at(rps, engine, &act); 704 if (act < f) 705 break; 706 707 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 708 engine->name, 709 act, intel_gpu_freq(rps, act), count, 710 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 711 act * min.count)); 712 713 f = act; /* may skip ahead [pcu granularity] */ 714 } 715 716 err = -EINTR; /* ignore error, continue on with test */ 717 } 718 719 err_vma: 720 *cancel = MI_BATCH_BUFFER_END; 721 i915_gem_object_flush_map(vma->obj); 722 i915_gem_object_unpin_map(vma->obj); 723 i915_vma_unpin(vma); 724 i915_vma_unlock(vma); 725 i915_vma_put(vma); 726 727 st_engine_heartbeat_enable(engine); 728 if (igt_flush_test(gt->i915)) 729 err = -EIO; 730 if (err) 731 break; 732 } 733 734 intel_gt_pm_wait_for_idle(gt); 735 rps->work.func = saved_work; 736 737 if (CPU_LATENCY >= 0) 738 cpu_latency_qos_remove_request(&qos); 739 740 return err; 741 } 742 743 int live_rps_frequency_srm(void *arg) 744 { 745 void (*saved_work)(struct work_struct *wrk); 746 struct intel_gt *gt = arg; 747 struct intel_rps *rps = >->rps; 748 struct intel_engine_cs *engine; 749 struct pm_qos_request qos; 750 enum intel_engine_id id; 751 int err = 0; 752 753 /* 754 * The premise is that the GPU does change frequency at our behest. 755 * Let's check there is a correspondence between the requested 756 * frequency, the actual frequency, and the observed clock rate. 757 */ 758 759 if (!intel_rps_is_enabled(rps)) 760 return 0; 761 762 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 763 return 0; 764 765 if (CPU_LATENCY >= 0) 766 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 767 768 intel_gt_pm_wait_for_idle(gt); 769 saved_work = rps->work.func; 770 rps->work.func = dummy_rps_work; 771 772 for_each_engine(engine, gt, id) { 773 struct i915_request *rq; 774 struct i915_vma *vma; 775 u32 *cancel, *cntr; 776 struct { 777 u64 count; 778 int freq; 779 } min, max; 780 781 st_engine_heartbeat_disable(engine); 782 783 vma = create_spin_counter(engine, 784 engine->kernel_context->vm, true, 785 &cancel, &cntr); 786 if (IS_ERR(vma)) { 787 err = PTR_ERR(vma); 788 st_engine_heartbeat_enable(engine); 789 break; 790 } 791 792 rq = intel_engine_create_kernel_request(engine); 793 if (IS_ERR(rq)) { 794 err = PTR_ERR(rq); 795 goto err_vma; 796 } 797 798 err = i915_vma_move_to_active(vma, rq, 0); 799 if (!err) 800 err = rq->engine->emit_bb_start(rq, 801 i915_vma_offset(vma), 802 PAGE_SIZE, 0); 803 i915_request_add(rq); 804 if (err) 805 goto err_vma; 806 807 if (wait_for(READ_ONCE(*cntr), 10)) { 808 pr_err("%s: timed loop did not start\n", 809 engine->name); 810 goto err_vma; 811 } 812 813 min.freq = rps->min_freq; 814 min.count = measure_frequency_at(rps, cntr, &min.freq); 815 816 max.freq = rps->max_freq; 817 max.count = measure_frequency_at(rps, cntr, &max.freq); 818 819 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 820 engine->name, 821 min.count, intel_gpu_freq(rps, min.freq), 822 max.count, intel_gpu_freq(rps, max.freq), 823 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 824 max.freq * min.count)); 825 826 if (!scaled_within(max.freq * min.count, 827 min.freq * max.count, 828 1, 2)) { 829 int f; 830 831 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 832 engine->name, 833 max.freq * min.count, 834 min.freq * max.count); 835 show_pcu_config(rps); 836 837 for (f = min.freq + 1; f <= rps->max_freq; f++) { 838 int act = f; 839 u64 count; 840 841 count = measure_frequency_at(rps, cntr, &act); 842 if (act < f) 843 break; 844 845 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 846 engine->name, 847 act, intel_gpu_freq(rps, act), count, 848 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 849 act * min.count)); 850 851 f = act; /* may skip ahead [pcu granularity] */ 852 } 853 854 err = -EINTR; /* ignore error, continue on with test */ 855 } 856 857 err_vma: 858 *cancel = MI_BATCH_BUFFER_END; 859 i915_gem_object_flush_map(vma->obj); 860 i915_gem_object_unpin_map(vma->obj); 861 i915_vma_unpin(vma); 862 i915_vma_unlock(vma); 863 i915_vma_put(vma); 864 865 st_engine_heartbeat_enable(engine); 866 if (igt_flush_test(gt->i915)) 867 err = -EIO; 868 if (err) 869 break; 870 } 871 872 intel_gt_pm_wait_for_idle(gt); 873 rps->work.func = saved_work; 874 875 if (CPU_LATENCY >= 0) 876 cpu_latency_qos_remove_request(&qos); 877 878 return err; 879 } 880 881 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 882 { 883 /* Flush any previous EI */ 884 usleep_range(timeout_us, 2 * timeout_us); 885 886 /* Reset the interrupt status */ 887 rps_disable_interrupts(rps); 888 GEM_BUG_ON(rps->pm_iir); 889 rps_enable_interrupts(rps); 890 891 /* And then wait for the timeout, for real this time */ 892 usleep_range(2 * timeout_us, 3 * timeout_us); 893 } 894 895 static int __rps_up_interrupt(struct intel_rps *rps, 896 struct intel_engine_cs *engine, 897 struct igt_spinner *spin) 898 { 899 struct intel_uncore *uncore = engine->uncore; 900 struct i915_request *rq; 901 u32 timeout; 902 903 if (!intel_engine_can_store_dword(engine)) 904 return 0; 905 906 rps_set_check(rps, rps->min_freq); 907 908 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 909 if (IS_ERR(rq)) 910 return PTR_ERR(rq); 911 912 i915_request_get(rq); 913 i915_request_add(rq); 914 915 if (!igt_wait_for_spinner(spin, rq)) { 916 pr_err("%s: RPS spinner did not start\n", 917 engine->name); 918 i915_request_put(rq); 919 intel_gt_set_wedged(engine->gt); 920 return -EIO; 921 } 922 923 if (!intel_rps_is_active(rps)) { 924 pr_err("%s: RPS not enabled on starting spinner\n", 925 engine->name); 926 igt_spinner_end(spin); 927 i915_request_put(rq); 928 return -EINVAL; 929 } 930 931 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 932 pr_err("%s: RPS did not register UP interrupt\n", 933 engine->name); 934 i915_request_put(rq); 935 return -EINVAL; 936 } 937 938 if (rps->last_freq != rps->min_freq) { 939 pr_err("%s: RPS did not program min frequency\n", 940 engine->name); 941 i915_request_put(rq); 942 return -EINVAL; 943 } 944 945 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 946 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 947 timeout = DIV_ROUND_UP(timeout, 1000); 948 949 sleep_for_ei(rps, timeout); 950 GEM_BUG_ON(i915_request_completed(rq)); 951 952 igt_spinner_end(spin); 953 i915_request_put(rq); 954 955 if (rps->cur_freq != rps->min_freq) { 956 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 957 engine->name, intel_rps_read_actual_frequency(rps)); 958 return -EINVAL; 959 } 960 961 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 962 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 963 engine->name, rps->pm_iir, 964 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 965 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 966 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 967 return -EINVAL; 968 } 969 970 return 0; 971 } 972 973 static int __rps_down_interrupt(struct intel_rps *rps, 974 struct intel_engine_cs *engine) 975 { 976 struct intel_uncore *uncore = engine->uncore; 977 u32 timeout; 978 979 rps_set_check(rps, rps->max_freq); 980 981 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 982 pr_err("%s: RPS did not register DOWN interrupt\n", 983 engine->name); 984 return -EINVAL; 985 } 986 987 if (rps->last_freq != rps->max_freq) { 988 pr_err("%s: RPS did not program max frequency\n", 989 engine->name); 990 return -EINVAL; 991 } 992 993 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 994 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 995 timeout = DIV_ROUND_UP(timeout, 1000); 996 997 sleep_for_ei(rps, timeout); 998 999 if (rps->cur_freq != rps->max_freq) { 1000 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 1001 engine->name, 1002 intel_rps_read_actual_frequency(rps)); 1003 return -EINVAL; 1004 } 1005 1006 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1007 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1008 engine->name, rps->pm_iir, 1009 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1010 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1011 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1012 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1013 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1014 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1015 return -EINVAL; 1016 } 1017 1018 return 0; 1019 } 1020 1021 int live_rps_interrupt(void *arg) 1022 { 1023 struct intel_gt *gt = arg; 1024 struct intel_rps *rps = >->rps; 1025 void (*saved_work)(struct work_struct *wrk); 1026 struct intel_engine_cs *engine; 1027 enum intel_engine_id id; 1028 struct igt_spinner spin; 1029 intel_wakeref_t wakeref; 1030 u32 pm_events; 1031 int err = 0; 1032 1033 /* 1034 * First, let's check whether or not we are receiving interrupts. 1035 */ 1036 1037 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1038 return 0; 1039 1040 pm_events = 0; 1041 with_intel_gt_pm(gt, wakeref) 1042 pm_events = rps->pm_events; 1043 if (!pm_events) { 1044 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1045 return -ENODEV; 1046 } 1047 1048 if (igt_spinner_init(&spin, gt)) 1049 return -ENOMEM; 1050 1051 intel_gt_pm_wait_for_idle(gt); 1052 saved_work = rps->work.func; 1053 rps->work.func = dummy_rps_work; 1054 1055 for_each_engine(engine, gt, id) { 1056 /* Keep the engine busy with a spinner; expect an UP! */ 1057 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1058 intel_gt_pm_wait_for_idle(engine->gt); 1059 GEM_BUG_ON(intel_rps_is_active(rps)); 1060 1061 st_engine_heartbeat_disable(engine); 1062 1063 err = __rps_up_interrupt(rps, engine, &spin); 1064 1065 st_engine_heartbeat_enable(engine); 1066 if (err) 1067 goto out; 1068 1069 intel_gt_pm_wait_for_idle(engine->gt); 1070 } 1071 1072 /* Keep the engine awake but idle and check for DOWN */ 1073 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1074 st_engine_heartbeat_disable(engine); 1075 intel_rc6_disable(>->rc6); 1076 1077 err = __rps_down_interrupt(rps, engine); 1078 1079 intel_rc6_enable(>->rc6); 1080 st_engine_heartbeat_enable(engine); 1081 if (err) 1082 goto out; 1083 } 1084 } 1085 1086 out: 1087 if (igt_flush_test(gt->i915)) 1088 err = -EIO; 1089 1090 igt_spinner_fini(&spin); 1091 1092 intel_gt_pm_wait_for_idle(gt); 1093 rps->work.func = saved_work; 1094 1095 return err; 1096 } 1097 1098 static u64 __measure_power(int duration_ms) 1099 { 1100 u64 dE, dt; 1101 1102 dE = librapl_energy_uJ(); 1103 dt = ktime_get(); 1104 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1105 dE = librapl_energy_uJ() - dE; 1106 dt = ktime_get() - dt; 1107 1108 return div64_u64(1000 * 1000 * dE, dt); 1109 } 1110 1111 static u64 measure_power(struct intel_rps *rps, int *freq) 1112 { 1113 u64 x[5]; 1114 int i; 1115 1116 for (i = 0; i < 5; i++) 1117 x[i] = __measure_power(5); 1118 1119 *freq = (*freq + read_cagf(rps)) / 2; 1120 1121 /* A simple triangle filter for better result stability */ 1122 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1123 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1124 } 1125 1126 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1127 { 1128 *freq = rps_set_check(rps, *freq); 1129 msleep(100); 1130 return measure_power(rps, freq); 1131 } 1132 1133 int live_rps_power(void *arg) 1134 { 1135 struct intel_gt *gt = arg; 1136 struct intel_rps *rps = >->rps; 1137 void (*saved_work)(struct work_struct *wrk); 1138 struct intel_engine_cs *engine; 1139 enum intel_engine_id id; 1140 struct igt_spinner spin; 1141 int err = 0; 1142 1143 /* 1144 * Our fundamental assumption is that running at lower frequency 1145 * actually saves power. Let's see if our RAPL measurement support 1146 * that theory. 1147 */ 1148 1149 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1150 return 0; 1151 1152 if (!librapl_supported(gt->i915)) 1153 return 0; 1154 1155 if (igt_spinner_init(&spin, gt)) 1156 return -ENOMEM; 1157 1158 intel_gt_pm_wait_for_idle(gt); 1159 saved_work = rps->work.func; 1160 rps->work.func = dummy_rps_work; 1161 1162 for_each_engine(engine, gt, id) { 1163 struct i915_request *rq; 1164 struct { 1165 u64 power; 1166 int freq; 1167 } min, max; 1168 1169 if (!intel_engine_can_store_dword(engine)) 1170 continue; 1171 1172 st_engine_heartbeat_disable(engine); 1173 1174 rq = igt_spinner_create_request(&spin, 1175 engine->kernel_context, 1176 MI_NOOP); 1177 if (IS_ERR(rq)) { 1178 st_engine_heartbeat_enable(engine); 1179 err = PTR_ERR(rq); 1180 break; 1181 } 1182 1183 i915_request_add(rq); 1184 1185 if (!igt_wait_for_spinner(&spin, rq)) { 1186 pr_err("%s: RPS spinner did not start\n", 1187 engine->name); 1188 igt_spinner_end(&spin); 1189 st_engine_heartbeat_enable(engine); 1190 intel_gt_set_wedged(engine->gt); 1191 err = -EIO; 1192 break; 1193 } 1194 1195 max.freq = rps->max_freq; 1196 max.power = measure_power_at(rps, &max.freq); 1197 1198 min.freq = rps->min_freq; 1199 min.power = measure_power_at(rps, &min.freq); 1200 1201 igt_spinner_end(&spin); 1202 st_engine_heartbeat_enable(engine); 1203 1204 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1205 engine->name, 1206 min.power, intel_gpu_freq(rps, min.freq), 1207 max.power, intel_gpu_freq(rps, max.freq)); 1208 1209 if (10 * min.freq >= 9 * max.freq) { 1210 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1211 min.freq, intel_gpu_freq(rps, min.freq), 1212 max.freq, intel_gpu_freq(rps, max.freq)); 1213 continue; 1214 } 1215 1216 if (11 * min.power > 10 * max.power) { 1217 pr_err("%s: did not conserve power when setting lower frequency!\n", 1218 engine->name); 1219 err = -EINVAL; 1220 break; 1221 } 1222 1223 if (igt_flush_test(gt->i915)) { 1224 err = -EIO; 1225 break; 1226 } 1227 } 1228 1229 igt_spinner_fini(&spin); 1230 1231 intel_gt_pm_wait_for_idle(gt); 1232 rps->work.func = saved_work; 1233 1234 return err; 1235 } 1236 1237 int live_rps_dynamic(void *arg) 1238 { 1239 struct intel_gt *gt = arg; 1240 struct intel_rps *rps = >->rps; 1241 struct intel_engine_cs *engine; 1242 enum intel_engine_id id; 1243 struct igt_spinner spin; 1244 int err = 0; 1245 1246 /* 1247 * We've looked at the bascs, and have established that we 1248 * can change the clock frequency and that the HW will generate 1249 * interrupts based on load. Now we check how we integrate those 1250 * moving parts into dynamic reclocking based on load. 1251 */ 1252 1253 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1254 return 0; 1255 1256 if (igt_spinner_init(&spin, gt)) 1257 return -ENOMEM; 1258 1259 if (intel_rps_has_interrupts(rps)) 1260 pr_info("RPS has interrupt support\n"); 1261 if (intel_rps_uses_timer(rps)) 1262 pr_info("RPS has timer support\n"); 1263 1264 for_each_engine(engine, gt, id) { 1265 struct i915_request *rq; 1266 struct { 1267 ktime_t dt; 1268 u8 freq; 1269 } min, max; 1270 1271 if (!intel_engine_can_store_dword(engine)) 1272 continue; 1273 1274 intel_gt_pm_wait_for_idle(gt); 1275 GEM_BUG_ON(intel_rps_is_active(rps)); 1276 rps->cur_freq = rps->min_freq; 1277 1278 intel_engine_pm_get(engine); 1279 intel_rc6_disable(>->rc6); 1280 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1281 1282 rq = igt_spinner_create_request(&spin, 1283 engine->kernel_context, 1284 MI_NOOP); 1285 if (IS_ERR(rq)) { 1286 err = PTR_ERR(rq); 1287 goto err; 1288 } 1289 1290 i915_request_add(rq); 1291 1292 max.dt = ktime_get(); 1293 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1294 max.dt = ktime_sub(ktime_get(), max.dt); 1295 1296 igt_spinner_end(&spin); 1297 1298 min.dt = ktime_get(); 1299 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1300 min.dt = ktime_sub(ktime_get(), min.dt); 1301 1302 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1303 engine->name, 1304 max.freq, intel_gpu_freq(rps, max.freq), 1305 ktime_to_ns(max.dt), 1306 min.freq, intel_gpu_freq(rps, min.freq), 1307 ktime_to_ns(min.dt)); 1308 if (min.freq >= max.freq) { 1309 pr_err("%s: dynamic reclocking of spinner failed\n!", 1310 engine->name); 1311 err = -EINVAL; 1312 } 1313 1314 err: 1315 intel_rc6_enable(>->rc6); 1316 intel_engine_pm_put(engine); 1317 1318 if (igt_flush_test(gt->i915)) 1319 err = -EIO; 1320 if (err) 1321 break; 1322 } 1323 1324 igt_spinner_fini(&spin); 1325 1326 return err; 1327 } 1328