1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/pm_qos.h> 7 #include <linux/sort.h> 8 9 #include "gem/i915_gem_internal.h" 10 11 #include "i915_reg.h" 12 #include "intel_engine_heartbeat.h" 13 #include "intel_engine_pm.h" 14 #include "intel_engine_regs.h" 15 #include "intel_gpu_commands.h" 16 #include "intel_gt_clock_utils.h" 17 #include "intel_gt_pm.h" 18 #include "intel_rc6.h" 19 #include "selftest_engine_heartbeat.h" 20 #include "selftest_rps.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_spinner.h" 23 #include "selftests/librapl.h" 24 25 /* Try to isolate the impact of cstates from determing frequency response */ 26 #define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */ 27 28 static void dummy_rps_work(struct work_struct *wrk) 29 { 30 } 31 32 static int cmp_u64(const void *A, const void *B) 33 { 34 const u64 *a = A, *b = B; 35 36 if (*a < *b) 37 return -1; 38 else if (*a > *b) 39 return 1; 40 else 41 return 0; 42 } 43 44 static int cmp_u32(const void *A, const void *B) 45 { 46 const u32 *a = A, *b = B; 47 48 if (*a < *b) 49 return -1; 50 else if (*a > *b) 51 return 1; 52 else 53 return 0; 54 } 55 56 static struct i915_vma * 57 create_spin_counter(struct intel_engine_cs *engine, 58 struct i915_address_space *vm, 59 bool srm, 60 u32 **cancel, 61 u32 **counter) 62 { 63 enum { 64 COUNT, 65 INC, 66 __NGPR__, 67 }; 68 #define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x) 69 struct drm_i915_gem_object *obj; 70 struct i915_vma *vma; 71 unsigned long end; 72 u32 *base, *cs; 73 int loop, i; 74 int err; 75 76 obj = i915_gem_object_create_internal(vm->i915, 64 << 10); 77 if (IS_ERR(obj)) 78 return ERR_CAST(obj); 79 80 end = obj->base.size / sizeof(u32) - 1; 81 82 vma = i915_vma_instance(obj, vm, NULL); 83 if (IS_ERR(vma)) { 84 err = PTR_ERR(vma); 85 goto err_put; 86 } 87 88 err = i915_vma_pin(vma, 0, 0, PIN_USER); 89 if (err) 90 goto err_unlock; 91 92 i915_vma_lock(vma); 93 94 base = i915_gem_object_pin_map(obj, I915_MAP_WC); 95 if (IS_ERR(base)) { 96 err = PTR_ERR(base); 97 goto err_unpin; 98 } 99 cs = base; 100 101 *cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2); 102 for (i = 0; i < __NGPR__; i++) { 103 *cs++ = i915_mmio_reg_offset(CS_GPR(i)); 104 *cs++ = 0; 105 *cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4; 106 *cs++ = 0; 107 } 108 109 *cs++ = MI_LOAD_REGISTER_IMM(1); 110 *cs++ = i915_mmio_reg_offset(CS_GPR(INC)); 111 *cs++ = 1; 112 113 loop = cs - base; 114 115 /* Unroll the loop to avoid MI_BB_START stalls impacting measurements */ 116 for (i = 0; i < 1024; i++) { 117 *cs++ = MI_MATH(4); 118 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT)); 119 *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC)); 120 *cs++ = MI_MATH_ADD; 121 *cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU); 122 123 if (srm) { 124 *cs++ = MI_STORE_REGISTER_MEM_GEN8; 125 *cs++ = i915_mmio_reg_offset(CS_GPR(COUNT)); 126 *cs++ = lower_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 127 *cs++ = upper_32_bits(i915_vma_offset(vma) + end * sizeof(*cs)); 128 } 129 } 130 131 *cs++ = MI_BATCH_BUFFER_START_GEN8; 132 *cs++ = lower_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 133 *cs++ = upper_32_bits(i915_vma_offset(vma) + loop * sizeof(*cs)); 134 GEM_BUG_ON(cs - base > end); 135 136 i915_gem_object_flush_map(obj); 137 138 *cancel = base + loop; 139 *counter = srm ? memset32(base + end, 0, 1) : NULL; 140 return vma; 141 142 err_unpin: 143 i915_vma_unpin(vma); 144 err_unlock: 145 i915_vma_unlock(vma); 146 err_put: 147 i915_gem_object_put(obj); 148 return ERR_PTR(err); 149 } 150 151 static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms) 152 { 153 u8 history[64], i; 154 unsigned long end; 155 int sleep; 156 157 i = 0; 158 memset(history, freq, sizeof(history)); 159 sleep = 20; 160 161 /* The PCU does not change instantly, but drifts towards the goal? */ 162 end = jiffies + msecs_to_jiffies(timeout_ms); 163 do { 164 u8 act; 165 166 act = read_cagf(rps); 167 if (time_after(jiffies, end)) 168 return act; 169 170 /* Target acquired */ 171 if (act == freq) 172 return act; 173 174 /* Any change within the last N samples? */ 175 if (!memchr_inv(history, act, sizeof(history))) 176 return act; 177 178 history[i] = act; 179 i = (i + 1) % ARRAY_SIZE(history); 180 181 usleep_range(sleep, 2 * sleep); 182 sleep *= 2; 183 if (sleep > timeout_ms * 20) 184 sleep = timeout_ms * 20; 185 } while (1); 186 } 187 188 static u8 rps_set_check(struct intel_rps *rps, u8 freq) 189 { 190 mutex_lock(&rps->lock); 191 GEM_BUG_ON(!intel_rps_is_active(rps)); 192 if (wait_for(!intel_rps_set(rps, freq), 50)) { 193 mutex_unlock(&rps->lock); 194 return 0; 195 } 196 GEM_BUG_ON(rps->last_freq != freq); 197 mutex_unlock(&rps->lock); 198 199 return wait_for_freq(rps, freq, 50); 200 } 201 202 static void show_pstate_limits(struct intel_rps *rps) 203 { 204 struct drm_i915_private *i915 = rps_to_i915(rps); 205 206 if (IS_BROXTON(i915)) { 207 pr_info("P_STATE_CAP[%x]: 0x%08x\n", 208 i915_mmio_reg_offset(BXT_RP_STATE_CAP), 209 intel_uncore_read(rps_to_uncore(rps), 210 BXT_RP_STATE_CAP)); 211 } else if (GRAPHICS_VER(i915) == 9) { 212 pr_info("P_STATE_LIMITS[%x]: 0x%08x\n", 213 i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS), 214 intel_uncore_read(rps_to_uncore(rps), 215 GEN9_RP_STATE_LIMITS)); 216 } 217 } 218 219 int live_rps_clock_interval(void *arg) 220 { 221 struct intel_gt *gt = arg; 222 struct intel_rps *rps = >->rps; 223 void (*saved_work)(struct work_struct *wrk); 224 struct intel_engine_cs *engine; 225 enum intel_engine_id id; 226 struct igt_spinner spin; 227 intel_wakeref_t wakeref; 228 int err = 0; 229 230 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 231 return 0; 232 233 if (igt_spinner_init(&spin, gt)) 234 return -ENOMEM; 235 236 intel_gt_pm_wait_for_idle(gt); 237 saved_work = rps->work.func; 238 rps->work.func = dummy_rps_work; 239 240 wakeref = intel_gt_pm_get(gt); 241 intel_rps_disable(>->rps); 242 243 intel_gt_check_clock_frequency(gt); 244 245 for_each_engine(engine, gt, id) { 246 struct i915_request *rq; 247 u32 cycles; 248 u64 dt; 249 250 if (!intel_engine_can_store_dword(engine)) 251 continue; 252 253 st_engine_heartbeat_disable(engine); 254 255 rq = igt_spinner_create_request(&spin, 256 engine->kernel_context, 257 MI_NOOP); 258 if (IS_ERR(rq)) { 259 st_engine_heartbeat_enable(engine); 260 err = PTR_ERR(rq); 261 break; 262 } 263 264 i915_request_add(rq); 265 266 if (!igt_wait_for_spinner(&spin, rq)) { 267 pr_err("%s: RPS spinner did not start\n", 268 engine->name); 269 igt_spinner_end(&spin); 270 st_engine_heartbeat_enable(engine); 271 intel_gt_set_wedged(engine->gt); 272 err = -EIO; 273 break; 274 } 275 276 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 277 278 intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0); 279 280 /* Set the evaluation interval to infinity! */ 281 intel_uncore_write_fw(gt->uncore, 282 GEN6_RP_UP_EI, 0xffffffff); 283 intel_uncore_write_fw(gt->uncore, 284 GEN6_RP_UP_THRESHOLD, 0xffffffff); 285 286 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 287 GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG); 288 289 if (wait_for(intel_uncore_read_fw(gt->uncore, 290 GEN6_RP_CUR_UP_EI), 291 10)) { 292 /* Just skip the test; assume lack of HW support */ 293 pr_notice("%s: rps evaluation interval not ticking\n", 294 engine->name); 295 err = -ENODEV; 296 } else { 297 ktime_t dt_[5]; 298 u32 cycles_[5]; 299 int i; 300 301 for (i = 0; i < 5; i++) { 302 preempt_disable(); 303 304 cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 305 dt_[i] = ktime_get(); 306 307 udelay(1000); 308 309 cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); 310 dt_[i] = ktime_sub(ktime_get(), dt_[i]); 311 312 preempt_enable(); 313 } 314 315 /* Use the median of both cycle/dt; close enough */ 316 sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); 317 cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; 318 sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); 319 dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); 320 } 321 322 intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); 323 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 324 325 igt_spinner_end(&spin); 326 st_engine_heartbeat_enable(engine); 327 328 if (err == 0) { 329 u64 time = intel_gt_pm_interval_to_ns(gt, cycles); 330 u32 expected = 331 intel_gt_ns_to_pm_interval(gt, dt); 332 333 pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", 334 engine->name, cycles, time, dt, expected, 335 gt->clock_frequency / 1000); 336 337 if (10 * time < 8 * dt || 338 8 * time > 10 * dt) { 339 pr_err("%s: rps clock time does not match walltime!\n", 340 engine->name); 341 err = -EINVAL; 342 } 343 344 if (10 * expected < 8 * cycles || 345 8 * expected > 10 * cycles) { 346 pr_err("%s: walltime does not match rps clock ticks!\n", 347 engine->name); 348 err = -EINVAL; 349 } 350 } 351 352 if (igt_flush_test(gt->i915)) 353 err = -EIO; 354 355 break; /* once is enough */ 356 } 357 358 intel_rps_enable(>->rps); 359 intel_gt_pm_put(gt, wakeref); 360 361 igt_spinner_fini(&spin); 362 363 intel_gt_pm_wait_for_idle(gt); 364 rps->work.func = saved_work; 365 366 if (err == -ENODEV) /* skipped, don't report a fail */ 367 err = 0; 368 369 return err; 370 } 371 372 int live_rps_control(void *arg) 373 { 374 struct intel_gt *gt = arg; 375 struct intel_rps *rps = >->rps; 376 void (*saved_work)(struct work_struct *wrk); 377 struct intel_engine_cs *engine; 378 enum intel_engine_id id; 379 struct igt_spinner spin; 380 intel_wakeref_t wakeref; 381 int err = 0; 382 383 /* 384 * Check that the actual frequency matches our requested frequency, 385 * to verify our control mechanism. We have to be careful that the 386 * PCU may throttle the GPU in which case the actual frequency used 387 * will be lowered than requested. 388 */ 389 390 if (!intel_rps_is_enabled(rps)) 391 return 0; 392 393 if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */ 394 return 0; 395 396 if (igt_spinner_init(&spin, gt)) 397 return -ENOMEM; 398 399 intel_gt_pm_wait_for_idle(gt); 400 saved_work = rps->work.func; 401 rps->work.func = dummy_rps_work; 402 403 wakeref = intel_gt_pm_get(gt); 404 for_each_engine(engine, gt, id) { 405 struct i915_request *rq; 406 ktime_t min_dt, max_dt; 407 int f, limit; 408 int min, max; 409 410 if (!intel_engine_can_store_dword(engine)) 411 continue; 412 413 st_engine_heartbeat_disable(engine); 414 415 rq = igt_spinner_create_request(&spin, 416 engine->kernel_context, 417 MI_NOOP); 418 if (IS_ERR(rq)) { 419 err = PTR_ERR(rq); 420 break; 421 } 422 423 i915_request_add(rq); 424 425 if (!igt_wait_for_spinner(&spin, rq)) { 426 pr_err("%s: RPS spinner did not start\n", 427 engine->name); 428 igt_spinner_end(&spin); 429 st_engine_heartbeat_enable(engine); 430 intel_gt_set_wedged(engine->gt); 431 err = -EIO; 432 break; 433 } 434 435 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 436 pr_err("%s: could not set minimum frequency [%x], only %x!\n", 437 engine->name, rps->min_freq, read_cagf(rps)); 438 igt_spinner_end(&spin); 439 st_engine_heartbeat_enable(engine); 440 show_pstate_limits(rps); 441 err = -EINVAL; 442 break; 443 } 444 445 for (f = rps->min_freq + 1; f < rps->max_freq; f++) { 446 if (rps_set_check(rps, f) < f) 447 break; 448 } 449 450 limit = rps_set_check(rps, f); 451 452 if (rps_set_check(rps, rps->min_freq) != rps->min_freq) { 453 pr_err("%s: could not restore minimum frequency [%x], only %x!\n", 454 engine->name, rps->min_freq, read_cagf(rps)); 455 igt_spinner_end(&spin); 456 st_engine_heartbeat_enable(engine); 457 show_pstate_limits(rps); 458 err = -EINVAL; 459 break; 460 } 461 462 max_dt = ktime_get(); 463 max = rps_set_check(rps, limit); 464 max_dt = ktime_sub(ktime_get(), max_dt); 465 466 min_dt = ktime_get(); 467 min = rps_set_check(rps, rps->min_freq); 468 min_dt = ktime_sub(ktime_get(), min_dt); 469 470 igt_spinner_end(&spin); 471 st_engine_heartbeat_enable(engine); 472 473 pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n", 474 engine->name, 475 rps->min_freq, intel_gpu_freq(rps, rps->min_freq), 476 rps->max_freq, intel_gpu_freq(rps, rps->max_freq), 477 limit, intel_gpu_freq(rps, limit), 478 min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt)); 479 480 if (limit == rps->min_freq) { 481 pr_err("%s: GPU throttled to minimum!\n", 482 engine->name); 483 show_pstate_limits(rps); 484 err = -ENODEV; 485 break; 486 } 487 488 if (igt_flush_test(gt->i915)) { 489 err = -EIO; 490 break; 491 } 492 } 493 intel_gt_pm_put(gt, wakeref); 494 495 igt_spinner_fini(&spin); 496 497 intel_gt_pm_wait_for_idle(gt); 498 rps->work.func = saved_work; 499 500 return err; 501 } 502 503 static void show_pcu_config(struct intel_rps *rps) 504 { 505 struct drm_i915_private *i915 = rps_to_i915(rps); 506 unsigned int max_gpu_freq, min_gpu_freq; 507 intel_wakeref_t wakeref; 508 int gpu_freq; 509 510 if (!HAS_LLC(i915)) 511 return; 512 513 min_gpu_freq = rps->min_freq; 514 max_gpu_freq = rps->max_freq; 515 if (GRAPHICS_VER(i915) >= 9) { 516 /* Convert GT frequency to 50 HZ units */ 517 min_gpu_freq /= GEN9_FREQ_SCALER; 518 max_gpu_freq /= GEN9_FREQ_SCALER; 519 } 520 521 wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm); 522 523 pr_info("%5s %5s %5s\n", "GPU", "eCPU", "eRing"); 524 for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) { 525 int ia_freq = gpu_freq; 526 527 snb_pcode_read(rps_to_gt(rps)->uncore, GEN6_PCODE_READ_MIN_FREQ_TABLE, 528 &ia_freq, NULL); 529 530 pr_info("%5d %5d %5d\n", 531 gpu_freq * 50, 532 ((ia_freq >> 0) & 0xff) * 100, 533 ((ia_freq >> 8) & 0xff) * 100); 534 } 535 536 intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref); 537 } 538 539 static u64 __measure_frequency(u32 *cntr, int duration_ms) 540 { 541 u64 dc, dt; 542 543 dc = READ_ONCE(*cntr); 544 dt = ktime_get(); 545 usleep_range(1000 * duration_ms, 2000 * duration_ms); 546 dc = READ_ONCE(*cntr) - dc; 547 dt = ktime_get() - dt; 548 549 return div64_u64(1000 * 1000 * dc, dt); 550 } 551 552 static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq) 553 { 554 u64 x[5]; 555 int i; 556 557 *freq = rps_set_check(rps, *freq); 558 for (i = 0; i < 5; i++) 559 x[i] = __measure_frequency(cntr, 2); 560 *freq = (*freq + read_cagf(rps)) / 2; 561 562 /* A simple triangle filter for better result stability */ 563 sort(x, 5, sizeof(*x), cmp_u64, NULL); 564 return div_u64(x[1] + 2 * x[2] + x[3], 4); 565 } 566 567 static u64 __measure_cs_frequency(struct intel_engine_cs *engine, 568 int duration_ms) 569 { 570 u64 dc, dt; 571 572 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)); 573 dt = ktime_get(); 574 usleep_range(1000 * duration_ms, 2000 * duration_ms); 575 dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc; 576 dt = ktime_get() - dt; 577 578 return div64_u64(1000 * 1000 * dc, dt); 579 } 580 581 static u64 measure_cs_frequency_at(struct intel_rps *rps, 582 struct intel_engine_cs *engine, 583 int *freq) 584 { 585 u64 x[5]; 586 int i; 587 588 *freq = rps_set_check(rps, *freq); 589 for (i = 0; i < 5; i++) 590 x[i] = __measure_cs_frequency(engine, 2); 591 *freq = (*freq + read_cagf(rps)) / 2; 592 593 /* A simple triangle filter for better result stability */ 594 sort(x, 5, sizeof(*x), cmp_u64, NULL); 595 return div_u64(x[1] + 2 * x[2] + x[3], 4); 596 } 597 598 static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d) 599 { 600 return f_d * x > f_n * y && f_n * x < f_d * y; 601 } 602 603 int live_rps_frequency_cs(void *arg) 604 { 605 void (*saved_work)(struct work_struct *wrk); 606 struct intel_gt *gt = arg; 607 struct intel_rps *rps = >->rps; 608 struct intel_engine_cs *engine; 609 struct pm_qos_request qos; 610 enum intel_engine_id id; 611 int err = 0; 612 613 /* 614 * The premise is that the GPU does change frequency at our behest. 615 * Let's check there is a correspondence between the requested 616 * frequency, the actual frequency, and the observed clock rate. 617 */ 618 619 if (!intel_rps_is_enabled(rps)) 620 return 0; 621 622 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 623 return 0; 624 625 if (CPU_LATENCY >= 0) 626 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 627 628 intel_gt_pm_wait_for_idle(gt); 629 saved_work = rps->work.func; 630 rps->work.func = dummy_rps_work; 631 632 for_each_engine(engine, gt, id) { 633 struct i915_request *rq; 634 struct i915_vma *vma; 635 u32 *cancel, *cntr; 636 struct { 637 u64 count; 638 int freq; 639 } min, max; 640 641 st_engine_heartbeat_disable(engine); 642 643 vma = create_spin_counter(engine, 644 engine->kernel_context->vm, false, 645 &cancel, &cntr); 646 if (IS_ERR(vma)) { 647 err = PTR_ERR(vma); 648 st_engine_heartbeat_enable(engine); 649 break; 650 } 651 652 rq = intel_engine_create_kernel_request(engine); 653 if (IS_ERR(rq)) { 654 err = PTR_ERR(rq); 655 goto err_vma; 656 } 657 658 err = i915_vma_move_to_active(vma, rq, 0); 659 if (!err) 660 err = rq->engine->emit_bb_start(rq, 661 i915_vma_offset(vma), 662 PAGE_SIZE, 0); 663 i915_request_add(rq); 664 if (err) 665 goto err_vma; 666 667 if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)), 668 10)) { 669 pr_err("%s: timed loop did not start\n", 670 engine->name); 671 goto err_vma; 672 } 673 674 min.freq = rps->min_freq; 675 min.count = measure_cs_frequency_at(rps, engine, &min.freq); 676 677 max.freq = rps->max_freq; 678 max.count = measure_cs_frequency_at(rps, engine, &max.freq); 679 680 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 681 engine->name, 682 min.count, intel_gpu_freq(rps, min.freq), 683 max.count, intel_gpu_freq(rps, max.freq), 684 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 685 max.freq * min.count)); 686 687 if (!scaled_within(max.freq * min.count, 688 min.freq * max.count, 689 2, 3)) { 690 int f; 691 692 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 693 engine->name, 694 max.freq * min.count, 695 min.freq * max.count); 696 show_pcu_config(rps); 697 698 for (f = min.freq + 1; f <= rps->max_freq; f++) { 699 int act = f; 700 u64 count; 701 702 count = measure_cs_frequency_at(rps, engine, &act); 703 if (act < f) 704 break; 705 706 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 707 engine->name, 708 act, intel_gpu_freq(rps, act), count, 709 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 710 act * min.count)); 711 712 f = act; /* may skip ahead [pcu granularity] */ 713 } 714 715 err = -EINTR; /* ignore error, continue on with test */ 716 } 717 718 err_vma: 719 *cancel = MI_BATCH_BUFFER_END; 720 i915_gem_object_flush_map(vma->obj); 721 i915_gem_object_unpin_map(vma->obj); 722 i915_vma_unpin(vma); 723 i915_vma_unlock(vma); 724 i915_vma_put(vma); 725 726 st_engine_heartbeat_enable(engine); 727 if (igt_flush_test(gt->i915)) 728 err = -EIO; 729 if (err) 730 break; 731 } 732 733 intel_gt_pm_wait_for_idle(gt); 734 rps->work.func = saved_work; 735 736 if (CPU_LATENCY >= 0) 737 cpu_latency_qos_remove_request(&qos); 738 739 return err; 740 } 741 742 int live_rps_frequency_srm(void *arg) 743 { 744 void (*saved_work)(struct work_struct *wrk); 745 struct intel_gt *gt = arg; 746 struct intel_rps *rps = >->rps; 747 struct intel_engine_cs *engine; 748 struct pm_qos_request qos; 749 enum intel_engine_id id; 750 int err = 0; 751 752 /* 753 * The premise is that the GPU does change frequency at our behest. 754 * Let's check there is a correspondence between the requested 755 * frequency, the actual frequency, and the observed clock rate. 756 */ 757 758 if (!intel_rps_is_enabled(rps)) 759 return 0; 760 761 if (GRAPHICS_VER(gt->i915) < 8) /* for CS simplicity */ 762 return 0; 763 764 if (CPU_LATENCY >= 0) 765 cpu_latency_qos_add_request(&qos, CPU_LATENCY); 766 767 intel_gt_pm_wait_for_idle(gt); 768 saved_work = rps->work.func; 769 rps->work.func = dummy_rps_work; 770 771 for_each_engine(engine, gt, id) { 772 struct i915_request *rq; 773 struct i915_vma *vma; 774 u32 *cancel, *cntr; 775 struct { 776 u64 count; 777 int freq; 778 } min, max; 779 780 st_engine_heartbeat_disable(engine); 781 782 vma = create_spin_counter(engine, 783 engine->kernel_context->vm, true, 784 &cancel, &cntr); 785 if (IS_ERR(vma)) { 786 err = PTR_ERR(vma); 787 st_engine_heartbeat_enable(engine); 788 break; 789 } 790 791 rq = intel_engine_create_kernel_request(engine); 792 if (IS_ERR(rq)) { 793 err = PTR_ERR(rq); 794 goto err_vma; 795 } 796 797 err = i915_vma_move_to_active(vma, rq, 0); 798 if (!err) 799 err = rq->engine->emit_bb_start(rq, 800 i915_vma_offset(vma), 801 PAGE_SIZE, 0); 802 i915_request_add(rq); 803 if (err) 804 goto err_vma; 805 806 if (wait_for(READ_ONCE(*cntr), 10)) { 807 pr_err("%s: timed loop did not start\n", 808 engine->name); 809 goto err_vma; 810 } 811 812 min.freq = rps->min_freq; 813 min.count = measure_frequency_at(rps, cntr, &min.freq); 814 815 max.freq = rps->max_freq; 816 max.count = measure_frequency_at(rps, cntr, &max.freq); 817 818 pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n", 819 engine->name, 820 min.count, intel_gpu_freq(rps, min.freq), 821 max.count, intel_gpu_freq(rps, max.freq), 822 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count, 823 max.freq * min.count)); 824 825 if (!scaled_within(max.freq * min.count, 826 min.freq * max.count, 827 1, 2)) { 828 int f; 829 830 pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n", 831 engine->name, 832 max.freq * min.count, 833 min.freq * max.count); 834 show_pcu_config(rps); 835 836 for (f = min.freq + 1; f <= rps->max_freq; f++) { 837 int act = f; 838 u64 count; 839 840 count = measure_frequency_at(rps, cntr, &act); 841 if (act < f) 842 break; 843 844 pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n", 845 engine->name, 846 act, intel_gpu_freq(rps, act), count, 847 (int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count, 848 act * min.count)); 849 850 f = act; /* may skip ahead [pcu granularity] */ 851 } 852 853 err = -EINTR; /* ignore error, continue on with test */ 854 } 855 856 err_vma: 857 *cancel = MI_BATCH_BUFFER_END; 858 i915_gem_object_flush_map(vma->obj); 859 i915_gem_object_unpin_map(vma->obj); 860 i915_vma_unpin(vma); 861 i915_vma_unlock(vma); 862 i915_vma_put(vma); 863 864 st_engine_heartbeat_enable(engine); 865 if (igt_flush_test(gt->i915)) 866 err = -EIO; 867 if (err) 868 break; 869 } 870 871 intel_gt_pm_wait_for_idle(gt); 872 rps->work.func = saved_work; 873 874 if (CPU_LATENCY >= 0) 875 cpu_latency_qos_remove_request(&qos); 876 877 return err; 878 } 879 880 static void sleep_for_ei(struct intel_rps *rps, int timeout_us) 881 { 882 /* Flush any previous EI */ 883 usleep_range(timeout_us, 2 * timeout_us); 884 885 /* Reset the interrupt status */ 886 rps_disable_interrupts(rps); 887 GEM_BUG_ON(rps->pm_iir); 888 rps_enable_interrupts(rps); 889 890 /* And then wait for the timeout, for real this time */ 891 usleep_range(2 * timeout_us, 3 * timeout_us); 892 } 893 894 static int __rps_up_interrupt(struct intel_rps *rps, 895 struct intel_engine_cs *engine, 896 struct igt_spinner *spin) 897 { 898 struct intel_uncore *uncore = engine->uncore; 899 struct i915_request *rq; 900 u32 timeout; 901 902 if (!intel_engine_can_store_dword(engine)) 903 return 0; 904 905 rps_set_check(rps, rps->min_freq); 906 907 rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP); 908 if (IS_ERR(rq)) 909 return PTR_ERR(rq); 910 911 i915_request_get(rq); 912 i915_request_add(rq); 913 914 if (!igt_wait_for_spinner(spin, rq)) { 915 pr_err("%s: RPS spinner did not start\n", 916 engine->name); 917 i915_request_put(rq); 918 intel_gt_set_wedged(engine->gt); 919 return -EIO; 920 } 921 922 if (!intel_rps_is_active(rps)) { 923 pr_err("%s: RPS not enabled on starting spinner\n", 924 engine->name); 925 igt_spinner_end(spin); 926 i915_request_put(rq); 927 return -EINVAL; 928 } 929 930 if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) { 931 pr_err("%s: RPS did not register UP interrupt\n", 932 engine->name); 933 i915_request_put(rq); 934 return -EINVAL; 935 } 936 937 if (rps->last_freq != rps->min_freq) { 938 pr_err("%s: RPS did not program min frequency\n", 939 engine->name); 940 i915_request_put(rq); 941 return -EINVAL; 942 } 943 944 timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI); 945 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 946 timeout = DIV_ROUND_UP(timeout, 1000); 947 948 sleep_for_ei(rps, timeout); 949 GEM_BUG_ON(i915_request_completed(rq)); 950 951 igt_spinner_end(spin); 952 i915_request_put(rq); 953 954 if (rps->cur_freq != rps->min_freq) { 955 pr_err("%s: Frequency unexpectedly changed [up], now %d!\n", 956 engine->name, intel_rps_read_actual_frequency(rps)); 957 return -EINVAL; 958 } 959 960 if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) { 961 pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n", 962 engine->name, rps->pm_iir, 963 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 964 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 965 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 966 return -EINVAL; 967 } 968 969 return 0; 970 } 971 972 static int __rps_down_interrupt(struct intel_rps *rps, 973 struct intel_engine_cs *engine) 974 { 975 struct intel_uncore *uncore = engine->uncore; 976 u32 timeout; 977 978 rps_set_check(rps, rps->max_freq); 979 980 if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) { 981 pr_err("%s: RPS did not register DOWN interrupt\n", 982 engine->name); 983 return -EINVAL; 984 } 985 986 if (rps->last_freq != rps->max_freq) { 987 pr_err("%s: RPS did not program max frequency\n", 988 engine->name); 989 return -EINVAL; 990 } 991 992 timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI); 993 timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout); 994 timeout = DIV_ROUND_UP(timeout, 1000); 995 996 sleep_for_ei(rps, timeout); 997 998 if (rps->cur_freq != rps->max_freq) { 999 pr_err("%s: Frequency unexpectedly changed [down], now %d!\n", 1000 engine->name, 1001 intel_rps_read_actual_frequency(rps)); 1002 return -EINVAL; 1003 } 1004 1005 if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) { 1006 pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n", 1007 engine->name, rps->pm_iir, 1008 intel_uncore_read(uncore, GEN6_RP_PREV_DOWN), 1009 intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD), 1010 intel_uncore_read(uncore, GEN6_RP_DOWN_EI), 1011 intel_uncore_read(uncore, GEN6_RP_PREV_UP), 1012 intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD), 1013 intel_uncore_read(uncore, GEN6_RP_UP_EI)); 1014 return -EINVAL; 1015 } 1016 1017 return 0; 1018 } 1019 1020 int live_rps_interrupt(void *arg) 1021 { 1022 struct intel_gt *gt = arg; 1023 struct intel_rps *rps = >->rps; 1024 void (*saved_work)(struct work_struct *wrk); 1025 struct intel_engine_cs *engine; 1026 enum intel_engine_id id; 1027 struct igt_spinner spin; 1028 intel_wakeref_t wakeref; 1029 u32 pm_events; 1030 int err = 0; 1031 1032 /* 1033 * First, let's check whether or not we are receiving interrupts. 1034 */ 1035 1036 if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) 1037 return 0; 1038 1039 pm_events = 0; 1040 with_intel_gt_pm(gt, wakeref) 1041 pm_events = rps->pm_events; 1042 if (!pm_events) { 1043 pr_err("No RPS PM events registered, but RPS is enabled?\n"); 1044 return -ENODEV; 1045 } 1046 1047 if (igt_spinner_init(&spin, gt)) 1048 return -ENOMEM; 1049 1050 intel_gt_pm_wait_for_idle(gt); 1051 saved_work = rps->work.func; 1052 rps->work.func = dummy_rps_work; 1053 1054 for_each_engine(engine, gt, id) { 1055 /* Keep the engine busy with a spinner; expect an UP! */ 1056 if (pm_events & GEN6_PM_RP_UP_THRESHOLD) { 1057 intel_gt_pm_wait_for_idle(engine->gt); 1058 GEM_BUG_ON(intel_rps_is_active(rps)); 1059 1060 st_engine_heartbeat_disable(engine); 1061 1062 err = __rps_up_interrupt(rps, engine, &spin); 1063 1064 st_engine_heartbeat_enable(engine); 1065 if (err) 1066 goto out; 1067 1068 intel_gt_pm_wait_for_idle(engine->gt); 1069 } 1070 1071 /* Keep the engine awake but idle and check for DOWN */ 1072 if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) { 1073 st_engine_heartbeat_disable(engine); 1074 intel_rc6_disable(>->rc6); 1075 1076 err = __rps_down_interrupt(rps, engine); 1077 1078 intel_rc6_enable(>->rc6); 1079 st_engine_heartbeat_enable(engine); 1080 if (err) 1081 goto out; 1082 } 1083 } 1084 1085 out: 1086 if (igt_flush_test(gt->i915)) 1087 err = -EIO; 1088 1089 igt_spinner_fini(&spin); 1090 1091 intel_gt_pm_wait_for_idle(gt); 1092 rps->work.func = saved_work; 1093 1094 return err; 1095 } 1096 1097 static u64 __measure_power(int duration_ms) 1098 { 1099 u64 dE, dt; 1100 1101 dE = librapl_energy_uJ(); 1102 dt = ktime_get(); 1103 usleep_range(1000 * duration_ms, 2000 * duration_ms); 1104 dE = librapl_energy_uJ() - dE; 1105 dt = ktime_get() - dt; 1106 1107 return div64_u64(1000 * 1000 * dE, dt); 1108 } 1109 1110 static u64 measure_power(struct intel_rps *rps, int *freq) 1111 { 1112 u64 x[5]; 1113 int i; 1114 1115 for (i = 0; i < 5; i++) 1116 x[i] = __measure_power(5); 1117 1118 *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2; 1119 1120 /* A simple triangle filter for better result stability */ 1121 sort(x, 5, sizeof(*x), cmp_u64, NULL); 1122 return div_u64(x[1] + 2 * x[2] + x[3], 4); 1123 } 1124 1125 static u64 measure_power_at(struct intel_rps *rps, int *freq) 1126 { 1127 *freq = rps_set_check(rps, *freq); 1128 return measure_power(rps, freq); 1129 } 1130 1131 int live_rps_power(void *arg) 1132 { 1133 struct intel_gt *gt = arg; 1134 struct intel_rps *rps = >->rps; 1135 void (*saved_work)(struct work_struct *wrk); 1136 struct intel_engine_cs *engine; 1137 enum intel_engine_id id; 1138 struct igt_spinner spin; 1139 int err = 0; 1140 1141 /* 1142 * Our fundamental assumption is that running at lower frequency 1143 * actually saves power. Let's see if our RAPL measurement support 1144 * that theory. 1145 */ 1146 1147 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1148 return 0; 1149 1150 if (!librapl_supported(gt->i915)) 1151 return 0; 1152 1153 if (igt_spinner_init(&spin, gt)) 1154 return -ENOMEM; 1155 1156 intel_gt_pm_wait_for_idle(gt); 1157 saved_work = rps->work.func; 1158 rps->work.func = dummy_rps_work; 1159 1160 for_each_engine(engine, gt, id) { 1161 struct i915_request *rq; 1162 struct { 1163 u64 power; 1164 int freq; 1165 } min, max; 1166 1167 if (!intel_engine_can_store_dword(engine)) 1168 continue; 1169 1170 st_engine_heartbeat_disable(engine); 1171 1172 rq = igt_spinner_create_request(&spin, 1173 engine->kernel_context, 1174 MI_NOOP); 1175 if (IS_ERR(rq)) { 1176 st_engine_heartbeat_enable(engine); 1177 err = PTR_ERR(rq); 1178 break; 1179 } 1180 1181 i915_request_add(rq); 1182 1183 if (!igt_wait_for_spinner(&spin, rq)) { 1184 pr_err("%s: RPS spinner did not start\n", 1185 engine->name); 1186 igt_spinner_end(&spin); 1187 st_engine_heartbeat_enable(engine); 1188 intel_gt_set_wedged(engine->gt); 1189 err = -EIO; 1190 break; 1191 } 1192 1193 max.freq = rps->max_freq; 1194 max.power = measure_power_at(rps, &max.freq); 1195 1196 min.freq = rps->min_freq; 1197 min.power = measure_power_at(rps, &min.freq); 1198 1199 igt_spinner_end(&spin); 1200 st_engine_heartbeat_enable(engine); 1201 1202 pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n", 1203 engine->name, 1204 min.power, intel_gpu_freq(rps, min.freq), 1205 max.power, intel_gpu_freq(rps, max.freq)); 1206 1207 if (10 * min.freq >= 9 * max.freq) { 1208 pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n", 1209 min.freq, intel_gpu_freq(rps, min.freq), 1210 max.freq, intel_gpu_freq(rps, max.freq)); 1211 continue; 1212 } 1213 1214 if (11 * min.power > 10 * max.power) { 1215 pr_err("%s: did not conserve power when setting lower frequency!\n", 1216 engine->name); 1217 err = -EINVAL; 1218 break; 1219 } 1220 1221 if (igt_flush_test(gt->i915)) { 1222 err = -EIO; 1223 break; 1224 } 1225 } 1226 1227 igt_spinner_fini(&spin); 1228 1229 intel_gt_pm_wait_for_idle(gt); 1230 rps->work.func = saved_work; 1231 1232 return err; 1233 } 1234 1235 int live_rps_dynamic(void *arg) 1236 { 1237 struct intel_gt *gt = arg; 1238 struct intel_rps *rps = >->rps; 1239 struct intel_engine_cs *engine; 1240 enum intel_engine_id id; 1241 struct igt_spinner spin; 1242 int err = 0; 1243 1244 /* 1245 * We've looked at the bascs, and have established that we 1246 * can change the clock frequency and that the HW will generate 1247 * interrupts based on load. Now we check how we integrate those 1248 * moving parts into dynamic reclocking based on load. 1249 */ 1250 1251 if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) 1252 return 0; 1253 1254 if (igt_spinner_init(&spin, gt)) 1255 return -ENOMEM; 1256 1257 if (intel_rps_has_interrupts(rps)) 1258 pr_info("RPS has interrupt support\n"); 1259 if (intel_rps_uses_timer(rps)) 1260 pr_info("RPS has timer support\n"); 1261 1262 for_each_engine(engine, gt, id) { 1263 struct i915_request *rq; 1264 struct { 1265 ktime_t dt; 1266 u8 freq; 1267 } min, max; 1268 1269 if (!intel_engine_can_store_dword(engine)) 1270 continue; 1271 1272 intel_gt_pm_wait_for_idle(gt); 1273 GEM_BUG_ON(intel_rps_is_active(rps)); 1274 rps->cur_freq = rps->min_freq; 1275 1276 intel_engine_pm_get(engine); 1277 intel_rc6_disable(>->rc6); 1278 GEM_BUG_ON(rps->last_freq != rps->min_freq); 1279 1280 rq = igt_spinner_create_request(&spin, 1281 engine->kernel_context, 1282 MI_NOOP); 1283 if (IS_ERR(rq)) { 1284 err = PTR_ERR(rq); 1285 goto err; 1286 } 1287 1288 i915_request_add(rq); 1289 1290 max.dt = ktime_get(); 1291 max.freq = wait_for_freq(rps, rps->max_freq, 500); 1292 max.dt = ktime_sub(ktime_get(), max.dt); 1293 1294 igt_spinner_end(&spin); 1295 1296 min.dt = ktime_get(); 1297 min.freq = wait_for_freq(rps, rps->min_freq, 2000); 1298 min.dt = ktime_sub(ktime_get(), min.dt); 1299 1300 pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n", 1301 engine->name, 1302 max.freq, intel_gpu_freq(rps, max.freq), 1303 ktime_to_ns(max.dt), 1304 min.freq, intel_gpu_freq(rps, min.freq), 1305 ktime_to_ns(min.dt)); 1306 if (min.freq >= max.freq) { 1307 pr_err("%s: dynamic reclocking of spinner failed\n!", 1308 engine->name); 1309 err = -EINVAL; 1310 } 1311 1312 err: 1313 intel_rc6_enable(>->rc6); 1314 intel_engine_pm_put(engine); 1315 1316 if (igt_flush_test(gt->i915)) 1317 err = -EIO; 1318 if (err) 1319 break; 1320 } 1321 1322 igt_spinner_fini(&spin); 1323 1324 return err; 1325 } 1326