1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017-2018 Intel Corporation 5 */ 6 7 #include <linux/pm_runtime.h> 8 9 #include <drm/drm_print.h> 10 11 #include "gt/intel_engine.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_engine_user.h" 15 #include "gt/intel_gt.h" 16 #include "gt/intel_gt_pm.h" 17 #include "gt/intel_gt_regs.h" 18 #include "gt/intel_rc6.h" 19 #include "gt/intel_rps.h" 20 21 #include "i915_drv.h" 22 #include "i915_pmu.h" 23 24 /* Frequency for the sampling timer for events which need it. */ 25 #define FREQUENCY 200 26 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) 27 28 #define ENGINE_SAMPLE_MASK \ 29 (BIT(I915_SAMPLE_BUSY) | \ 30 BIT(I915_SAMPLE_WAIT) | \ 31 BIT(I915_SAMPLE_SEMA)) 32 33 static struct i915_pmu *event_to_pmu(struct perf_event *event) 34 { 35 return container_of(event->pmu, struct i915_pmu, base); 36 } 37 38 static struct drm_i915_private *pmu_to_i915(struct i915_pmu *pmu) 39 { 40 return container_of(pmu, struct drm_i915_private, pmu); 41 } 42 43 static u8 engine_config_sample(u64 config) 44 { 45 return config & I915_PMU_SAMPLE_MASK; 46 } 47 48 static u8 engine_event_sample(struct perf_event *event) 49 { 50 return engine_config_sample(event->attr.config); 51 } 52 53 static u8 engine_event_class(struct perf_event *event) 54 { 55 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; 56 } 57 58 static u8 engine_event_instance(struct perf_event *event) 59 { 60 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; 61 } 62 63 static bool is_engine_config(const u64 config) 64 { 65 return config < __I915_PMU_OTHER(0); 66 } 67 68 static unsigned int config_gt_id(const u64 config) 69 { 70 return config >> __I915_PMU_GT_SHIFT; 71 } 72 73 static u64 config_counter(const u64 config) 74 { 75 return config & ~(~0ULL << __I915_PMU_GT_SHIFT); 76 } 77 78 static unsigned int other_bit(const u64 config) 79 { 80 unsigned int val; 81 82 switch (config_counter(config)) { 83 case I915_PMU_ACTUAL_FREQUENCY: 84 val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED; 85 break; 86 case I915_PMU_REQUESTED_FREQUENCY: 87 val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED; 88 break; 89 case I915_PMU_RC6_RESIDENCY: 90 val = __I915_PMU_RC6_RESIDENCY_ENABLED; 91 break; 92 default: 93 /* 94 * Events that do not require sampling, or tracking state 95 * transitions between enabled and disabled can be ignored. 96 */ 97 return -1; 98 } 99 100 return I915_ENGINE_SAMPLE_COUNT + 101 config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT + 102 val; 103 } 104 105 static unsigned int config_bit(const u64 config) 106 { 107 if (is_engine_config(config)) 108 return engine_config_sample(config); 109 else 110 return other_bit(config); 111 } 112 113 static __always_inline u32 config_mask(const u64 config) 114 { 115 unsigned int bit = config_bit(config); 116 117 if (__builtin_constant_p(bit)) 118 BUILD_BUG_ON(bit > 119 BITS_PER_TYPE(typeof_member(struct i915_pmu, 120 enable)) - 1); 121 else 122 WARN_ON_ONCE(bit > 123 BITS_PER_TYPE(typeof_member(struct i915_pmu, 124 enable)) - 1); 125 126 return BIT(bit); 127 } 128 129 static bool is_engine_event(struct perf_event *event) 130 { 131 return is_engine_config(event->attr.config); 132 } 133 134 static unsigned int event_bit(struct perf_event *event) 135 { 136 return config_bit(event->attr.config); 137 } 138 139 static u32 frequency_enabled_mask(void) 140 { 141 unsigned int i; 142 u32 mask = 0; 143 144 for (i = 0; i < I915_PMU_MAX_GT; i++) 145 mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) | 146 config_mask(__I915_PMU_REQUESTED_FREQUENCY(i)); 147 148 return mask; 149 } 150 151 static bool pmu_needs_timer(struct i915_pmu *pmu) 152 { 153 struct drm_i915_private *i915 = pmu_to_i915(pmu); 154 u32 enable; 155 156 /* 157 * Only some counters need the sampling timer. 158 * 159 * We start with a bitmask of all currently enabled events. 160 */ 161 enable = pmu->enable; 162 163 /* 164 * Mask out all the ones which do not need the timer, or in 165 * other words keep all the ones that could need the timer. 166 */ 167 enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK; 168 169 /* 170 * Also there is software busyness tracking available we do not 171 * need the timer for I915_SAMPLE_BUSY counter. 172 */ 173 if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS) 174 enable &= ~BIT(I915_SAMPLE_BUSY); 175 176 /* 177 * If some bits remain it means we need the sampling timer running. 178 */ 179 return enable; 180 } 181 182 static u64 __get_rc6(struct intel_gt *gt) 183 { 184 struct drm_i915_private *i915 = gt->i915; 185 u64 val; 186 187 val = intel_rc6_residency_ns(>->rc6, INTEL_RC6_RES_RC6); 188 189 if (HAS_RC6p(i915)) 190 val += intel_rc6_residency_ns(>->rc6, INTEL_RC6_RES_RC6p); 191 192 if (HAS_RC6pp(i915)) 193 val += intel_rc6_residency_ns(>->rc6, INTEL_RC6_RES_RC6pp); 194 195 return val; 196 } 197 198 static inline s64 ktime_since_raw(const ktime_t kt) 199 { 200 return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); 201 } 202 203 static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample) 204 { 205 return pmu->sample[gt_id][sample].cur; 206 } 207 208 static void 209 store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val) 210 { 211 pmu->sample[gt_id][sample].cur = val; 212 } 213 214 static void 215 add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul) 216 { 217 pmu->sample[gt_id][sample].cur += mul_u32_u32(val, mul); 218 } 219 220 static u64 get_rc6(struct intel_gt *gt) 221 { 222 struct drm_i915_private *i915 = gt->i915; 223 const unsigned int gt_id = gt->info.id; 224 struct i915_pmu *pmu = &i915->pmu; 225 intel_wakeref_t wakeref; 226 unsigned long flags; 227 u64 val; 228 229 wakeref = intel_gt_pm_get_if_awake(gt); 230 if (wakeref) { 231 val = __get_rc6(gt); 232 intel_gt_pm_put_async(gt, wakeref); 233 } 234 235 spin_lock_irqsave(&pmu->lock, flags); 236 237 if (wakeref) { 238 store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val); 239 } else { 240 /* 241 * We think we are runtime suspended. 242 * 243 * Report the delta from when the device was suspended to now, 244 * on top of the last known real value, as the approximated RC6 245 * counter value. 246 */ 247 val = ktime_since_raw(pmu->sleep_last[gt_id]); 248 val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6); 249 } 250 251 if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED)) 252 val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED); 253 else 254 store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val); 255 256 spin_unlock_irqrestore(&pmu->lock, flags); 257 258 return val; 259 } 260 261 static void init_rc6(struct i915_pmu *pmu) 262 { 263 struct drm_i915_private *i915 = pmu_to_i915(pmu); 264 struct intel_gt *gt; 265 unsigned int i; 266 267 for_each_gt(gt, i915, i) { 268 intel_wakeref_t wakeref; 269 270 with_intel_runtime_pm(gt->uncore->rpm, wakeref) { 271 u64 val = __get_rc6(gt); 272 273 store_sample(pmu, i, __I915_SAMPLE_RC6, val); 274 store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED, 275 val); 276 pmu->sleep_last[i] = ktime_get_raw(); 277 } 278 } 279 } 280 281 static void park_rc6(struct intel_gt *gt) 282 { 283 struct i915_pmu *pmu = >->i915->pmu; 284 285 store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt)); 286 pmu->sleep_last[gt->info.id] = ktime_get_raw(); 287 } 288 289 static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) 290 { 291 if (!pmu->timer_enabled && pmu_needs_timer(pmu)) { 292 pmu->timer_enabled = true; 293 pmu->timer_last = ktime_get(); 294 hrtimer_start_range_ns(&pmu->timer, 295 ns_to_ktime(PERIOD), 0, 296 HRTIMER_MODE_REL_PINNED); 297 } 298 } 299 300 void i915_pmu_gt_parked(struct intel_gt *gt) 301 { 302 struct i915_pmu *pmu = >->i915->pmu; 303 304 if (!pmu->registered) 305 return; 306 307 spin_lock_irq(&pmu->lock); 308 309 park_rc6(gt); 310 311 /* 312 * Signal sampling timer to stop if only engine events are enabled and 313 * GPU went idle. 314 */ 315 pmu->unparked &= ~BIT(gt->info.id); 316 if (pmu->unparked == 0) 317 pmu->timer_enabled = false; 318 319 spin_unlock_irq(&pmu->lock); 320 } 321 322 void i915_pmu_gt_unparked(struct intel_gt *gt) 323 { 324 struct i915_pmu *pmu = >->i915->pmu; 325 326 if (!pmu->registered) 327 return; 328 329 spin_lock_irq(&pmu->lock); 330 331 /* 332 * Re-enable sampling timer when GPU goes active. 333 */ 334 if (pmu->unparked == 0) 335 __i915_pmu_maybe_start_timer(pmu); 336 337 pmu->unparked |= BIT(gt->info.id); 338 339 spin_unlock_irq(&pmu->lock); 340 } 341 342 static void 343 add_sample(struct i915_pmu_sample *sample, u32 val) 344 { 345 sample->cur += val; 346 } 347 348 static bool exclusive_mmio_access(const struct drm_i915_private *i915) 349 { 350 /* 351 * We have to avoid concurrent mmio cache line access on gen7 or 352 * risk a machine hang. For a fun history lesson dig out the old 353 * userspace intel_gpu_top and run it on Ivybridge or Haswell! 354 */ 355 return GRAPHICS_VER(i915) == 7; 356 } 357 358 static void gen3_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) 359 { 360 struct intel_engine_pmu *pmu = &engine->pmu; 361 bool busy; 362 u32 val; 363 364 val = ENGINE_READ_FW(engine, RING_CTL); 365 if (val == 0) /* powerwell off => engine idle */ 366 return; 367 368 if (val & RING_WAIT) 369 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); 370 if (val & RING_WAIT_SEMAPHORE) 371 add_sample(&pmu->sample[I915_SAMPLE_SEMA], period_ns); 372 373 /* No need to sample when busy stats are supported. */ 374 if (intel_engine_supports_stats(engine)) 375 return; 376 377 /* 378 * While waiting on a semaphore or event, MI_MODE reports the 379 * ring as idle. However, previously using the seqno, and with 380 * execlists sampling, we account for the ring waiting as the 381 * engine being busy. Therefore, we record the sample as being 382 * busy if either waiting or !idle. 383 */ 384 busy = val & (RING_WAIT_SEMAPHORE | RING_WAIT); 385 if (!busy) { 386 val = ENGINE_READ_FW(engine, RING_MI_MODE); 387 busy = !(val & MODE_IDLE); 388 } 389 if (busy) 390 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); 391 } 392 393 static void gen2_engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) 394 { 395 struct intel_engine_pmu *pmu = &engine->pmu; 396 u32 tail, head, acthd; 397 398 tail = ENGINE_READ_FW(engine, RING_TAIL); 399 head = ENGINE_READ_FW(engine, RING_HEAD); 400 acthd = ENGINE_READ_FW(engine, ACTHD); 401 402 if (head & HEAD_WAIT_I8XX) 403 add_sample(&pmu->sample[I915_SAMPLE_WAIT], period_ns); 404 405 if (head & HEAD_WAIT_I8XX || head != acthd || 406 (head & HEAD_ADDR) != (tail & TAIL_ADDR)) 407 add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); 408 } 409 410 static void engine_sample(struct intel_engine_cs *engine, unsigned int period_ns) 411 { 412 if (GRAPHICS_VER(engine->i915) >= 3) 413 gen3_engine_sample(engine, period_ns); 414 else 415 gen2_engine_sample(engine, period_ns); 416 } 417 418 static void 419 engines_sample(struct intel_gt *gt, unsigned int period_ns) 420 { 421 struct drm_i915_private *i915 = gt->i915; 422 struct intel_engine_cs *engine; 423 enum intel_engine_id id; 424 unsigned long flags; 425 426 if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 427 return; 428 429 if (!intel_gt_pm_is_awake(gt)) 430 return; 431 432 for_each_engine(engine, gt, id) { 433 if (!engine->pmu.enable) 434 continue; 435 436 if (!intel_engine_pm_get_if_awake(engine)) 437 continue; 438 439 if (exclusive_mmio_access(i915)) { 440 spin_lock_irqsave(&engine->uncore->lock, flags); 441 engine_sample(engine, period_ns); 442 spin_unlock_irqrestore(&engine->uncore->lock, flags); 443 } else { 444 engine_sample(engine, period_ns); 445 } 446 447 intel_engine_pm_put_async(engine); 448 } 449 } 450 451 static bool 452 frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt) 453 { 454 return pmu->enable & 455 (config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt)) | 456 config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt))); 457 } 458 459 static void 460 frequency_sample(struct intel_gt *gt, unsigned int period_ns) 461 { 462 struct drm_i915_private *i915 = gt->i915; 463 const unsigned int gt_id = gt->info.id; 464 struct i915_pmu *pmu = &i915->pmu; 465 struct intel_rps *rps = >->rps; 466 intel_wakeref_t wakeref; 467 468 if (!frequency_sampling_enabled(pmu, gt_id)) 469 return; 470 471 /* Report 0/0 (actual/requested) frequency while parked. */ 472 wakeref = intel_gt_pm_get_if_awake(gt); 473 if (!wakeref) 474 return; 475 476 if (pmu->enable & config_mask(__I915_PMU_ACTUAL_FREQUENCY(gt_id))) { 477 u32 val; 478 479 /* 480 * We take a quick peek here without using forcewake 481 * so that we don't perturb the system under observation 482 * (forcewake => !rc6 => increased power use). We expect 483 * that if the read fails because it is outside of the 484 * mmio power well, then it will return 0 -- in which 485 * case we assume the system is running at the intended 486 * frequency. Fortunately, the read should rarely fail! 487 */ 488 val = intel_rps_read_actual_frequency_fw(rps); 489 if (!val) 490 val = intel_gpu_freq(rps, rps->cur_freq); 491 492 add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT, 493 val, period_ns / 1000); 494 } 495 496 if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) { 497 add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ, 498 intel_rps_get_requested_frequency(rps), 499 period_ns / 1000); 500 } 501 502 intel_gt_pm_put_async(gt, wakeref); 503 } 504 505 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) 506 { 507 struct i915_pmu *pmu = container_of(hrtimer, struct i915_pmu, timer); 508 struct drm_i915_private *i915 = pmu_to_i915(pmu); 509 unsigned int period_ns; 510 struct intel_gt *gt; 511 unsigned int i; 512 ktime_t now; 513 514 if (!READ_ONCE(pmu->timer_enabled)) 515 return HRTIMER_NORESTART; 516 517 now = ktime_get(); 518 period_ns = ktime_to_ns(ktime_sub(now, pmu->timer_last)); 519 pmu->timer_last = now; 520 521 /* 522 * Strictly speaking the passed in period may not be 100% accurate for 523 * all internal calculation, since some amount of time can be spent on 524 * grabbing the forcewake. However the potential error from timer call- 525 * back delay greatly dominates this so we keep it simple. 526 */ 527 528 for_each_gt(gt, i915, i) { 529 if (!(pmu->unparked & BIT(i))) 530 continue; 531 532 engines_sample(gt, period_ns); 533 frequency_sample(gt, period_ns); 534 } 535 536 hrtimer_forward(hrtimer, now, ns_to_ktime(PERIOD)); 537 538 return HRTIMER_RESTART; 539 } 540 541 static void i915_pmu_event_destroy(struct perf_event *event) 542 { 543 struct i915_pmu *pmu = event_to_pmu(event); 544 struct drm_i915_private *i915 = pmu_to_i915(pmu); 545 546 drm_WARN_ON(&i915->drm, event->parent); 547 548 drm_dev_put(&i915->drm); 549 } 550 551 static int 552 engine_event_status(struct intel_engine_cs *engine, 553 enum drm_i915_pmu_engine_sample sample) 554 { 555 switch (sample) { 556 case I915_SAMPLE_BUSY: 557 case I915_SAMPLE_WAIT: 558 break; 559 case I915_SAMPLE_SEMA: 560 if (GRAPHICS_VER(engine->i915) < 6) 561 return -ENODEV; 562 break; 563 default: 564 return -ENOENT; 565 } 566 567 return 0; 568 } 569 570 static int 571 config_status(struct drm_i915_private *i915, u64 config) 572 { 573 struct intel_gt *gt = to_gt(i915); 574 575 unsigned int gt_id = config_gt_id(config); 576 unsigned int max_gt_id = HAS_EXTRA_GT_LIST(i915) ? 1 : 0; 577 578 if (gt_id > max_gt_id) 579 return -ENOENT; 580 581 switch (config_counter(config)) { 582 case I915_PMU_ACTUAL_FREQUENCY: 583 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 584 /* Requires a mutex for sampling! */ 585 return -ENODEV; 586 fallthrough; 587 case I915_PMU_REQUESTED_FREQUENCY: 588 if (GRAPHICS_VER(i915) < 6) 589 return -ENODEV; 590 break; 591 case I915_PMU_INTERRUPTS: 592 if (gt_id) 593 return -ENOENT; 594 break; 595 case I915_PMU_RC6_RESIDENCY: 596 if (!gt->rc6.supported) 597 return -ENODEV; 598 break; 599 case I915_PMU_SOFTWARE_GT_AWAKE_TIME: 600 break; 601 default: 602 return -ENOENT; 603 } 604 605 return 0; 606 } 607 608 static int engine_event_init(struct perf_event *event) 609 { 610 struct i915_pmu *pmu = event_to_pmu(event); 611 struct drm_i915_private *i915 = pmu_to_i915(pmu); 612 struct intel_engine_cs *engine; 613 614 engine = intel_engine_lookup_user(i915, engine_event_class(event), 615 engine_event_instance(event)); 616 if (!engine) 617 return -ENODEV; 618 619 return engine_event_status(engine, engine_event_sample(event)); 620 } 621 622 static int i915_pmu_event_init(struct perf_event *event) 623 { 624 struct i915_pmu *pmu = event_to_pmu(event); 625 struct drm_i915_private *i915 = pmu_to_i915(pmu); 626 int ret; 627 628 if (!pmu->registered) 629 return -ENODEV; 630 631 if (event->attr.type != event->pmu->type) 632 return -ENOENT; 633 634 /* unsupported modes and filters */ 635 if (event->attr.sample_period) /* no sampling */ 636 return -EINVAL; 637 638 if (has_branch_stack(event)) 639 return -EOPNOTSUPP; 640 641 if (event->cpu < 0) 642 return -EINVAL; 643 644 if (is_engine_event(event)) 645 ret = engine_event_init(event); 646 else 647 ret = config_status(i915, event->attr.config); 648 if (ret) 649 return ret; 650 651 if (!event->parent) { 652 drm_dev_get(&i915->drm); 653 event->destroy = i915_pmu_event_destroy; 654 } 655 656 return 0; 657 } 658 659 static u64 __i915_pmu_event_read(struct perf_event *event) 660 { 661 struct i915_pmu *pmu = event_to_pmu(event); 662 struct drm_i915_private *i915 = pmu_to_i915(pmu); 663 u64 val = 0; 664 665 if (is_engine_event(event)) { 666 u8 sample = engine_event_sample(event); 667 struct intel_engine_cs *engine; 668 669 engine = intel_engine_lookup_user(i915, 670 engine_event_class(event), 671 engine_event_instance(event)); 672 673 if (drm_WARN_ON_ONCE(&i915->drm, !engine)) { 674 /* Do nothing */ 675 } else if (sample == I915_SAMPLE_BUSY && 676 intel_engine_supports_stats(engine)) { 677 ktime_t unused; 678 679 val = ktime_to_ns(intel_engine_get_busy_time(engine, 680 &unused)); 681 } else { 682 val = engine->pmu.sample[sample].cur; 683 } 684 } else { 685 const unsigned int gt_id = config_gt_id(event->attr.config); 686 const u64 config = config_counter(event->attr.config); 687 688 switch (config) { 689 case I915_PMU_ACTUAL_FREQUENCY: 690 val = 691 div_u64(read_sample(pmu, gt_id, 692 __I915_SAMPLE_FREQ_ACT), 693 USEC_PER_SEC /* to MHz */); 694 break; 695 case I915_PMU_REQUESTED_FREQUENCY: 696 val = 697 div_u64(read_sample(pmu, gt_id, 698 __I915_SAMPLE_FREQ_REQ), 699 USEC_PER_SEC /* to MHz */); 700 break; 701 case I915_PMU_INTERRUPTS: 702 val = READ_ONCE(pmu->irq_count); 703 break; 704 case I915_PMU_RC6_RESIDENCY: 705 val = get_rc6(i915->gt[gt_id]); 706 break; 707 case I915_PMU_SOFTWARE_GT_AWAKE_TIME: 708 val = ktime_to_ns(intel_gt_get_awake_time(to_gt(i915))); 709 break; 710 } 711 } 712 713 return val; 714 } 715 716 static void i915_pmu_event_read(struct perf_event *event) 717 { 718 struct i915_pmu *pmu = event_to_pmu(event); 719 struct hw_perf_event *hwc = &event->hw; 720 u64 prev, new; 721 722 if (!pmu->registered) { 723 event->hw.state = PERF_HES_STOPPED; 724 return; 725 } 726 727 prev = local64_read(&hwc->prev_count); 728 do { 729 new = __i915_pmu_event_read(event); 730 } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, new)); 731 732 local64_add(new - prev, &event->count); 733 } 734 735 static void i915_pmu_enable(struct perf_event *event) 736 { 737 struct i915_pmu *pmu = event_to_pmu(event); 738 struct drm_i915_private *i915 = pmu_to_i915(pmu); 739 const unsigned int bit = event_bit(event); 740 unsigned long flags; 741 742 if (bit == -1) 743 goto update; 744 745 spin_lock_irqsave(&pmu->lock, flags); 746 747 /* 748 * Update the bitmask of enabled events and increment 749 * the event reference counter. 750 */ 751 BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); 752 GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); 753 GEM_BUG_ON(pmu->enable_count[bit] == ~0); 754 755 pmu->enable |= BIT(bit); 756 pmu->enable_count[bit]++; 757 758 /* 759 * Start the sampling timer if needed and not already enabled. 760 */ 761 __i915_pmu_maybe_start_timer(pmu); 762 763 /* 764 * For per-engine events the bitmask and reference counting 765 * is stored per engine. 766 */ 767 if (is_engine_event(event)) { 768 u8 sample = engine_event_sample(event); 769 struct intel_engine_cs *engine; 770 771 engine = intel_engine_lookup_user(i915, 772 engine_event_class(event), 773 engine_event_instance(event)); 774 775 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.enable_count) != 776 I915_ENGINE_SAMPLE_COUNT); 777 BUILD_BUG_ON(ARRAY_SIZE(engine->pmu.sample) != 778 I915_ENGINE_SAMPLE_COUNT); 779 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); 780 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); 781 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 782 783 engine->pmu.enable |= BIT(sample); 784 engine->pmu.enable_count[sample]++; 785 } 786 787 spin_unlock_irqrestore(&pmu->lock, flags); 788 789 update: 790 /* 791 * Store the current counter value so we can report the correct delta 792 * for all listeners. Even when the event was already enabled and has 793 * an existing non-zero value. 794 */ 795 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 796 } 797 798 static void i915_pmu_disable(struct perf_event *event) 799 { 800 struct i915_pmu *pmu = event_to_pmu(event); 801 struct drm_i915_private *i915 = pmu_to_i915(pmu); 802 const unsigned int bit = event_bit(event); 803 unsigned long flags; 804 805 if (bit == -1) 806 return; 807 808 spin_lock_irqsave(&pmu->lock, flags); 809 810 if (is_engine_event(event)) { 811 u8 sample = engine_event_sample(event); 812 struct intel_engine_cs *engine; 813 814 engine = intel_engine_lookup_user(i915, 815 engine_event_class(event), 816 engine_event_instance(event)); 817 818 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.enable_count)); 819 GEM_BUG_ON(sample >= ARRAY_SIZE(engine->pmu.sample)); 820 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); 821 822 /* 823 * Decrement the reference count and clear the enabled 824 * bitmask when the last listener on an event goes away. 825 */ 826 if (--engine->pmu.enable_count[sample] == 0) 827 engine->pmu.enable &= ~BIT(sample); 828 } 829 830 GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); 831 GEM_BUG_ON(pmu->enable_count[bit] == 0); 832 /* 833 * Decrement the reference count and clear the enabled 834 * bitmask when the last listener on an event goes away. 835 */ 836 if (--pmu->enable_count[bit] == 0) { 837 pmu->enable &= ~BIT(bit); 838 pmu->timer_enabled &= pmu_needs_timer(pmu); 839 } 840 841 spin_unlock_irqrestore(&pmu->lock, flags); 842 } 843 844 static void i915_pmu_event_start(struct perf_event *event, int flags) 845 { 846 struct i915_pmu *pmu = event_to_pmu(event); 847 848 if (!pmu->registered) 849 return; 850 851 i915_pmu_enable(event); 852 event->hw.state = 0; 853 } 854 855 static void i915_pmu_event_stop(struct perf_event *event, int flags) 856 { 857 struct i915_pmu *pmu = event_to_pmu(event); 858 859 if (!pmu->registered) 860 goto out; 861 862 if (flags & PERF_EF_UPDATE) 863 i915_pmu_event_read(event); 864 865 i915_pmu_disable(event); 866 867 out: 868 event->hw.state = PERF_HES_STOPPED; 869 } 870 871 static int i915_pmu_event_add(struct perf_event *event, int flags) 872 { 873 struct i915_pmu *pmu = event_to_pmu(event); 874 875 if (!pmu->registered) 876 return -ENODEV; 877 878 if (flags & PERF_EF_START) 879 i915_pmu_event_start(event, flags); 880 881 return 0; 882 } 883 884 static void i915_pmu_event_del(struct perf_event *event, int flags) 885 { 886 i915_pmu_event_stop(event, PERF_EF_UPDATE); 887 } 888 889 struct i915_str_attribute { 890 struct device_attribute attr; 891 const char *str; 892 }; 893 894 static ssize_t i915_pmu_format_show(struct device *dev, 895 struct device_attribute *attr, char *buf) 896 { 897 struct i915_str_attribute *eattr; 898 899 eattr = container_of(attr, struct i915_str_attribute, attr); 900 return sysfs_emit(buf, "%s\n", eattr->str); 901 } 902 903 #define I915_PMU_FORMAT_ATTR(_name, _config) \ 904 (&((struct i915_str_attribute[]) { \ 905 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ 906 .str = _config, } \ 907 })[0].attr.attr) 908 909 static struct attribute *i915_pmu_format_attrs[] = { 910 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), 911 NULL, 912 }; 913 914 static const struct attribute_group i915_pmu_format_attr_group = { 915 .name = "format", 916 .attrs = i915_pmu_format_attrs, 917 }; 918 919 struct i915_ext_attribute { 920 struct device_attribute attr; 921 unsigned long val; 922 }; 923 924 static ssize_t i915_pmu_event_show(struct device *dev, 925 struct device_attribute *attr, char *buf) 926 { 927 struct i915_ext_attribute *eattr; 928 929 eattr = container_of(attr, struct i915_ext_attribute, attr); 930 return sysfs_emit(buf, "config=0x%lx\n", eattr->val); 931 } 932 933 #define __event(__counter, __name, __unit) \ 934 { \ 935 .counter = (__counter), \ 936 .name = (__name), \ 937 .unit = (__unit), \ 938 .global = false, \ 939 } 940 941 #define __global_event(__counter, __name, __unit) \ 942 { \ 943 .counter = (__counter), \ 944 .name = (__name), \ 945 .unit = (__unit), \ 946 .global = true, \ 947 } 948 949 #define __engine_event(__sample, __name) \ 950 { \ 951 .sample = (__sample), \ 952 .name = (__name), \ 953 } 954 955 static struct i915_ext_attribute * 956 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) 957 { 958 sysfs_attr_init(&attr->attr.attr); 959 attr->attr.attr.name = name; 960 attr->attr.attr.mode = 0444; 961 attr->attr.show = i915_pmu_event_show; 962 attr->val = config; 963 964 return ++attr; 965 } 966 967 static struct perf_pmu_events_attr * 968 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, 969 const char *str) 970 { 971 sysfs_attr_init(&attr->attr.attr); 972 attr->attr.attr.name = name; 973 attr->attr.attr.mode = 0444; 974 attr->attr.show = perf_event_sysfs_show; 975 attr->event_str = str; 976 977 return ++attr; 978 } 979 980 static struct attribute ** 981 create_event_attributes(struct i915_pmu *pmu) 982 { 983 struct drm_i915_private *i915 = pmu_to_i915(pmu); 984 static const struct { 985 unsigned int counter; 986 const char *name; 987 const char *unit; 988 bool global; 989 } events[] = { 990 __event(0, "actual-frequency", "M"), 991 __event(1, "requested-frequency", "M"), 992 __global_event(2, "interrupts", NULL), 993 __event(3, "rc6-residency", "ns"), 994 __event(4, "software-gt-awake-time", "ns"), 995 }; 996 static const struct { 997 enum drm_i915_pmu_engine_sample sample; 998 char *name; 999 } engine_events[] = { 1000 __engine_event(I915_SAMPLE_BUSY, "busy"), 1001 __engine_event(I915_SAMPLE_SEMA, "sema"), 1002 __engine_event(I915_SAMPLE_WAIT, "wait"), 1003 }; 1004 unsigned int count = 0; 1005 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; 1006 struct i915_ext_attribute *i915_attr = NULL, *i915_iter; 1007 struct attribute **attr = NULL, **attr_iter; 1008 struct intel_engine_cs *engine; 1009 struct intel_gt *gt; 1010 unsigned int i, j; 1011 1012 /* Count how many counters we will be exposing. */ 1013 for_each_gt(gt, i915, j) { 1014 for (i = 0; i < ARRAY_SIZE(events); i++) { 1015 u64 config = ___I915_PMU_OTHER(j, events[i].counter); 1016 1017 if (!config_status(i915, config)) 1018 count++; 1019 } 1020 } 1021 1022 for_each_uabi_engine(engine, i915) { 1023 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 1024 if (!engine_event_status(engine, 1025 engine_events[i].sample)) 1026 count++; 1027 } 1028 } 1029 1030 /* Allocate attribute objects and table. */ 1031 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); 1032 if (!i915_attr) 1033 goto err_alloc; 1034 1035 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); 1036 if (!pmu_attr) 1037 goto err_alloc; 1038 1039 /* Max one pointer of each attribute type plus a termination entry. */ 1040 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); 1041 if (!attr) 1042 goto err_alloc; 1043 1044 i915_iter = i915_attr; 1045 pmu_iter = pmu_attr; 1046 attr_iter = attr; 1047 1048 /* Initialize supported non-engine counters. */ 1049 for_each_gt(gt, i915, j) { 1050 for (i = 0; i < ARRAY_SIZE(events); i++) { 1051 u64 config = ___I915_PMU_OTHER(j, events[i].counter); 1052 char *str; 1053 1054 if (config_status(i915, config)) 1055 continue; 1056 1057 if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) 1058 str = kstrdup(events[i].name, GFP_KERNEL); 1059 else 1060 str = kasprintf(GFP_KERNEL, "%s-gt%u", 1061 events[i].name, j); 1062 if (!str) 1063 goto err; 1064 1065 *attr_iter++ = &i915_iter->attr.attr; 1066 i915_iter = add_i915_attr(i915_iter, str, config); 1067 1068 if (events[i].unit) { 1069 if (events[i].global || !HAS_EXTRA_GT_LIST(i915)) 1070 str = kasprintf(GFP_KERNEL, "%s.unit", 1071 events[i].name); 1072 else 1073 str = kasprintf(GFP_KERNEL, "%s-gt%u.unit", 1074 events[i].name, j); 1075 if (!str) 1076 goto err; 1077 1078 *attr_iter++ = &pmu_iter->attr.attr; 1079 pmu_iter = add_pmu_attr(pmu_iter, str, 1080 events[i].unit); 1081 } 1082 } 1083 } 1084 1085 /* Initialize supported engine counters. */ 1086 for_each_uabi_engine(engine, i915) { 1087 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 1088 char *str; 1089 1090 if (engine_event_status(engine, 1091 engine_events[i].sample)) 1092 continue; 1093 1094 str = kasprintf(GFP_KERNEL, "%s-%s", 1095 engine->name, engine_events[i].name); 1096 if (!str) 1097 goto err; 1098 1099 *attr_iter++ = &i915_iter->attr.attr; 1100 i915_iter = 1101 add_i915_attr(i915_iter, str, 1102 __I915_PMU_ENGINE(engine->uabi_class, 1103 engine->uabi_instance, 1104 engine_events[i].sample)); 1105 1106 str = kasprintf(GFP_KERNEL, "%s-%s.unit", 1107 engine->name, engine_events[i].name); 1108 if (!str) 1109 goto err; 1110 1111 *attr_iter++ = &pmu_iter->attr.attr; 1112 pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); 1113 } 1114 } 1115 1116 pmu->i915_attr = i915_attr; 1117 pmu->pmu_attr = pmu_attr; 1118 1119 return attr; 1120 1121 err:; 1122 for (attr_iter = attr; *attr_iter; attr_iter++) 1123 kfree((*attr_iter)->name); 1124 1125 err_alloc: 1126 kfree(attr); 1127 kfree(i915_attr); 1128 kfree(pmu_attr); 1129 1130 return NULL; 1131 } 1132 1133 static void free_event_attributes(struct i915_pmu *pmu) 1134 { 1135 struct attribute **attr_iter = pmu->events_attr_group.attrs; 1136 1137 for (; *attr_iter; attr_iter++) 1138 kfree((*attr_iter)->name); 1139 1140 kfree(pmu->events_attr_group.attrs); 1141 kfree(pmu->i915_attr); 1142 kfree(pmu->pmu_attr); 1143 1144 pmu->events_attr_group.attrs = NULL; 1145 pmu->i915_attr = NULL; 1146 pmu->pmu_attr = NULL; 1147 } 1148 1149 void i915_pmu_register(struct drm_i915_private *i915) 1150 { 1151 struct i915_pmu *pmu = &i915->pmu; 1152 const struct attribute_group *attr_groups[] = { 1153 &i915_pmu_format_attr_group, 1154 &pmu->events_attr_group, 1155 NULL 1156 }; 1157 int ret = -ENOMEM; 1158 1159 spin_lock_init(&pmu->lock); 1160 hrtimer_setup(&pmu->timer, i915_sample, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1161 init_rc6(pmu); 1162 1163 if (IS_DGFX(i915)) { 1164 pmu->name = kasprintf(GFP_KERNEL, 1165 "i915_%s", 1166 dev_name(i915->drm.dev)); 1167 if (pmu->name) { 1168 /* tools/perf reserves colons as special. */ 1169 strreplace((char *)pmu->name, ':', '_'); 1170 } 1171 } else { 1172 pmu->name = "i915"; 1173 } 1174 if (!pmu->name) 1175 goto err; 1176 1177 pmu->events_attr_group.name = "events"; 1178 pmu->events_attr_group.attrs = create_event_attributes(pmu); 1179 if (!pmu->events_attr_group.attrs) 1180 goto err_name; 1181 1182 pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), 1183 GFP_KERNEL); 1184 if (!pmu->base.attr_groups) 1185 goto err_attr; 1186 1187 pmu->base.module = THIS_MODULE; 1188 pmu->base.task_ctx_nr = perf_invalid_context; 1189 pmu->base.scope = PERF_PMU_SCOPE_SYS_WIDE; 1190 pmu->base.event_init = i915_pmu_event_init; 1191 pmu->base.add = i915_pmu_event_add; 1192 pmu->base.del = i915_pmu_event_del; 1193 pmu->base.start = i915_pmu_event_start; 1194 pmu->base.stop = i915_pmu_event_stop; 1195 pmu->base.read = i915_pmu_event_read; 1196 1197 ret = perf_pmu_register(&pmu->base, pmu->name, -1); 1198 if (ret) 1199 goto err_groups; 1200 1201 pmu->registered = true; 1202 1203 return; 1204 1205 err_groups: 1206 kfree(pmu->base.attr_groups); 1207 err_attr: 1208 free_event_attributes(pmu); 1209 err_name: 1210 if (IS_DGFX(i915)) 1211 kfree(pmu->name); 1212 err: 1213 drm_notice(&i915->drm, "Failed to register PMU!\n"); 1214 } 1215 1216 void i915_pmu_unregister(struct drm_i915_private *i915) 1217 { 1218 struct i915_pmu *pmu = &i915->pmu; 1219 1220 if (!pmu->registered) 1221 return; 1222 1223 /* Disconnect the PMU callbacks */ 1224 pmu->registered = false; 1225 1226 hrtimer_cancel(&pmu->timer); 1227 1228 perf_pmu_unregister(&pmu->base); 1229 kfree(pmu->base.attr_groups); 1230 if (IS_DGFX(i915)) 1231 kfree(pmu->name); 1232 free_event_attributes(pmu); 1233 } 1234