1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017-2018 Intel Corporation 5 */ 6 7 #include <linux/irq.h> 8 #include "i915_pmu.h" 9 #include "intel_ringbuffer.h" 10 #include "i915_drv.h" 11 12 /* Frequency for the sampling timer for events which need it. */ 13 #define FREQUENCY 200 14 #define PERIOD max_t(u64, 10000, NSEC_PER_SEC / FREQUENCY) 15 16 #define ENGINE_SAMPLE_MASK \ 17 (BIT(I915_SAMPLE_BUSY) | \ 18 BIT(I915_SAMPLE_WAIT) | \ 19 BIT(I915_SAMPLE_SEMA)) 20 21 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) 22 23 static cpumask_t i915_pmu_cpumask; 24 25 static u8 engine_config_sample(u64 config) 26 { 27 return config & I915_PMU_SAMPLE_MASK; 28 } 29 30 static u8 engine_event_sample(struct perf_event *event) 31 { 32 return engine_config_sample(event->attr.config); 33 } 34 35 static u8 engine_event_class(struct perf_event *event) 36 { 37 return (event->attr.config >> I915_PMU_CLASS_SHIFT) & 0xff; 38 } 39 40 static u8 engine_event_instance(struct perf_event *event) 41 { 42 return (event->attr.config >> I915_PMU_SAMPLE_BITS) & 0xff; 43 } 44 45 static bool is_engine_config(u64 config) 46 { 47 return config < __I915_PMU_OTHER(0); 48 } 49 50 static unsigned int config_enabled_bit(u64 config) 51 { 52 if (is_engine_config(config)) 53 return engine_config_sample(config); 54 else 55 return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); 56 } 57 58 static u64 config_enabled_mask(u64 config) 59 { 60 return BIT_ULL(config_enabled_bit(config)); 61 } 62 63 static bool is_engine_event(struct perf_event *event) 64 { 65 return is_engine_config(event->attr.config); 66 } 67 68 static unsigned int event_enabled_bit(struct perf_event *event) 69 { 70 return config_enabled_bit(event->attr.config); 71 } 72 73 static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) 74 { 75 u64 enable; 76 77 /* 78 * Only some counters need the sampling timer. 79 * 80 * We start with a bitmask of all currently enabled events. 81 */ 82 enable = i915->pmu.enable; 83 84 /* 85 * Mask out all the ones which do not need the timer, or in 86 * other words keep all the ones that could need the timer. 87 */ 88 enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | 89 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | 90 ENGINE_SAMPLE_MASK; 91 92 /* 93 * When the GPU is idle per-engine counters do not need to be 94 * running so clear those bits out. 95 */ 96 if (!gpu_active) 97 enable &= ~ENGINE_SAMPLE_MASK; 98 /* 99 * Also there is software busyness tracking available we do not 100 * need the timer for I915_SAMPLE_BUSY counter. 101 * 102 * Use RCS as proxy for all engines. 103 */ 104 else if (intel_engine_supports_stats(i915->engine[RCS])) 105 enable &= ~BIT(I915_SAMPLE_BUSY); 106 107 /* 108 * If some bits remain it means we need the sampling timer running. 109 */ 110 return enable; 111 } 112 113 void i915_pmu_gt_parked(struct drm_i915_private *i915) 114 { 115 if (!i915->pmu.base.event_init) 116 return; 117 118 spin_lock_irq(&i915->pmu.lock); 119 /* 120 * Signal sampling timer to stop if only engine events are enabled and 121 * GPU went idle. 122 */ 123 i915->pmu.timer_enabled = pmu_needs_timer(i915, false); 124 spin_unlock_irq(&i915->pmu.lock); 125 } 126 127 static void __i915_pmu_maybe_start_timer(struct drm_i915_private *i915) 128 { 129 if (!i915->pmu.timer_enabled && pmu_needs_timer(i915, true)) { 130 i915->pmu.timer_enabled = true; 131 hrtimer_start_range_ns(&i915->pmu.timer, 132 ns_to_ktime(PERIOD), 0, 133 HRTIMER_MODE_REL_PINNED); 134 } 135 } 136 137 void i915_pmu_gt_unparked(struct drm_i915_private *i915) 138 { 139 if (!i915->pmu.base.event_init) 140 return; 141 142 spin_lock_irq(&i915->pmu.lock); 143 /* 144 * Re-enable sampling timer when GPU goes active. 145 */ 146 __i915_pmu_maybe_start_timer(i915); 147 spin_unlock_irq(&i915->pmu.lock); 148 } 149 150 static bool grab_forcewake(struct drm_i915_private *i915, bool fw) 151 { 152 if (!fw) 153 intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); 154 155 return true; 156 } 157 158 static void 159 update_sample(struct i915_pmu_sample *sample, u32 unit, u32 val) 160 { 161 sample->cur += mul_u32_u32(val, unit); 162 } 163 164 static void engines_sample(struct drm_i915_private *dev_priv) 165 { 166 struct intel_engine_cs *engine; 167 enum intel_engine_id id; 168 bool fw = false; 169 170 if ((dev_priv->pmu.enable & ENGINE_SAMPLE_MASK) == 0) 171 return; 172 173 if (!dev_priv->gt.awake) 174 return; 175 176 if (!intel_runtime_pm_get_if_in_use(dev_priv)) 177 return; 178 179 for_each_engine(engine, dev_priv, id) { 180 u32 current_seqno = intel_engine_get_seqno(engine); 181 u32 last_seqno = intel_engine_last_submit(engine); 182 u32 val; 183 184 val = !i915_seqno_passed(current_seqno, last_seqno); 185 186 update_sample(&engine->pmu.sample[I915_SAMPLE_BUSY], 187 PERIOD, val); 188 189 if (val && (engine->pmu.enable & 190 (BIT(I915_SAMPLE_WAIT) | BIT(I915_SAMPLE_SEMA)))) { 191 fw = grab_forcewake(dev_priv, fw); 192 193 val = I915_READ_FW(RING_CTL(engine->mmio_base)); 194 } else { 195 val = 0; 196 } 197 198 update_sample(&engine->pmu.sample[I915_SAMPLE_WAIT], 199 PERIOD, !!(val & RING_WAIT)); 200 201 update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], 202 PERIOD, !!(val & RING_WAIT_SEMAPHORE)); 203 } 204 205 if (fw) 206 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); 207 208 intel_runtime_pm_put(dev_priv); 209 } 210 211 static void frequency_sample(struct drm_i915_private *dev_priv) 212 { 213 if (dev_priv->pmu.enable & 214 config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { 215 u32 val; 216 217 val = dev_priv->gt_pm.rps.cur_freq; 218 if (dev_priv->gt.awake && 219 intel_runtime_pm_get_if_in_use(dev_priv)) { 220 val = intel_get_cagf(dev_priv, 221 I915_READ_NOTRACE(GEN6_RPSTAT1)); 222 intel_runtime_pm_put(dev_priv); 223 } 224 225 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT], 226 1, intel_gpu_freq(dev_priv, val)); 227 } 228 229 if (dev_priv->pmu.enable & 230 config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { 231 update_sample(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_REQ], 1, 232 intel_gpu_freq(dev_priv, 233 dev_priv->gt_pm.rps.cur_freq)); 234 } 235 } 236 237 static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) 238 { 239 struct drm_i915_private *i915 = 240 container_of(hrtimer, struct drm_i915_private, pmu.timer); 241 242 if (!READ_ONCE(i915->pmu.timer_enabled)) 243 return HRTIMER_NORESTART; 244 245 engines_sample(i915); 246 frequency_sample(i915); 247 248 hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); 249 return HRTIMER_RESTART; 250 } 251 252 static u64 count_interrupts(struct drm_i915_private *i915) 253 { 254 /* open-coded kstat_irqs() */ 255 struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq); 256 u64 sum = 0; 257 int cpu; 258 259 if (!desc || !desc->kstat_irqs) 260 return 0; 261 262 for_each_possible_cpu(cpu) 263 sum += *per_cpu_ptr(desc->kstat_irqs, cpu); 264 265 return sum; 266 } 267 268 static void engine_event_destroy(struct perf_event *event) 269 { 270 struct drm_i915_private *i915 = 271 container_of(event->pmu, typeof(*i915), pmu.base); 272 struct intel_engine_cs *engine; 273 274 engine = intel_engine_lookup_user(i915, 275 engine_event_class(event), 276 engine_event_instance(event)); 277 if (WARN_ON_ONCE(!engine)) 278 return; 279 280 if (engine_event_sample(event) == I915_SAMPLE_BUSY && 281 intel_engine_supports_stats(engine)) 282 intel_disable_engine_stats(engine); 283 } 284 285 static void i915_pmu_event_destroy(struct perf_event *event) 286 { 287 WARN_ON(event->parent); 288 289 if (is_engine_event(event)) 290 engine_event_destroy(event); 291 } 292 293 static int 294 engine_event_status(struct intel_engine_cs *engine, 295 enum drm_i915_pmu_engine_sample sample) 296 { 297 switch (sample) { 298 case I915_SAMPLE_BUSY: 299 case I915_SAMPLE_WAIT: 300 break; 301 case I915_SAMPLE_SEMA: 302 if (INTEL_GEN(engine->i915) < 6) 303 return -ENODEV; 304 break; 305 default: 306 return -ENOENT; 307 } 308 309 return 0; 310 } 311 312 static int 313 config_status(struct drm_i915_private *i915, u64 config) 314 { 315 switch (config) { 316 case I915_PMU_ACTUAL_FREQUENCY: 317 if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) 318 /* Requires a mutex for sampling! */ 319 return -ENODEV; 320 /* Fall-through. */ 321 case I915_PMU_REQUESTED_FREQUENCY: 322 if (INTEL_GEN(i915) < 6) 323 return -ENODEV; 324 break; 325 case I915_PMU_INTERRUPTS: 326 break; 327 case I915_PMU_RC6_RESIDENCY: 328 if (!HAS_RC6(i915)) 329 return -ENODEV; 330 break; 331 default: 332 return -ENOENT; 333 } 334 335 return 0; 336 } 337 338 static int engine_event_init(struct perf_event *event) 339 { 340 struct drm_i915_private *i915 = 341 container_of(event->pmu, typeof(*i915), pmu.base); 342 struct intel_engine_cs *engine; 343 u8 sample; 344 int ret; 345 346 engine = intel_engine_lookup_user(i915, engine_event_class(event), 347 engine_event_instance(event)); 348 if (!engine) 349 return -ENODEV; 350 351 sample = engine_event_sample(event); 352 ret = engine_event_status(engine, sample); 353 if (ret) 354 return ret; 355 356 if (sample == I915_SAMPLE_BUSY && intel_engine_supports_stats(engine)) 357 ret = intel_enable_engine_stats(engine); 358 359 return ret; 360 } 361 362 static int i915_pmu_event_init(struct perf_event *event) 363 { 364 struct drm_i915_private *i915 = 365 container_of(event->pmu, typeof(*i915), pmu.base); 366 int ret; 367 368 if (event->attr.type != event->pmu->type) 369 return -ENOENT; 370 371 /* unsupported modes and filters */ 372 if (event->attr.sample_period) /* no sampling */ 373 return -EINVAL; 374 375 if (has_branch_stack(event)) 376 return -EOPNOTSUPP; 377 378 if (event->cpu < 0) 379 return -EINVAL; 380 381 /* only allow running on one cpu at a time */ 382 if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) 383 return -EINVAL; 384 385 if (is_engine_event(event)) 386 ret = engine_event_init(event); 387 else 388 ret = config_status(i915, event->attr.config); 389 if (ret) 390 return ret; 391 392 if (!event->parent) 393 event->destroy = i915_pmu_event_destroy; 394 395 return 0; 396 } 397 398 static u64 __get_rc6(struct drm_i915_private *i915) 399 { 400 u64 val; 401 402 val = intel_rc6_residency_ns(i915, 403 IS_VALLEYVIEW(i915) ? 404 VLV_GT_RENDER_RC6 : 405 GEN6_GT_GFX_RC6); 406 407 if (HAS_RC6p(i915)) 408 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p); 409 410 if (HAS_RC6pp(i915)) 411 val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp); 412 413 return val; 414 } 415 416 static u64 get_rc6(struct drm_i915_private *i915) 417 { 418 #if IS_ENABLED(CONFIG_PM) 419 unsigned long flags; 420 u64 val; 421 422 if (intel_runtime_pm_get_if_in_use(i915)) { 423 val = __get_rc6(i915); 424 intel_runtime_pm_put(i915); 425 426 /* 427 * If we are coming back from being runtime suspended we must 428 * be careful not to report a larger value than returned 429 * previously. 430 */ 431 432 spin_lock_irqsave(&i915->pmu.lock, flags); 433 434 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 435 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; 436 i915->pmu.sample[__I915_SAMPLE_RC6].cur = val; 437 } else { 438 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 439 } 440 441 spin_unlock_irqrestore(&i915->pmu.lock, flags); 442 } else { 443 struct pci_dev *pdev = i915->drm.pdev; 444 struct device *kdev = &pdev->dev; 445 446 /* 447 * We are runtime suspended. 448 * 449 * Report the delta from when the device was suspended to now, 450 * on top of the last known real value, as the approximated RC6 451 * counter value. 452 */ 453 spin_lock_irqsave(&i915->pmu.lock, flags); 454 spin_lock(&kdev->power.lock); 455 456 /* 457 * After the above branch intel_runtime_pm_get_if_in_use failed 458 * to get the runtime PM reference we cannot assume we are in 459 * runtime suspend since we can either: a) race with coming out 460 * of it before we took the power.lock, or b) there are other 461 * states than suspended which can bring us here. 462 * 463 * We need to double-check that we are indeed currently runtime 464 * suspended and if not we cannot do better than report the last 465 * known RC6 value. 466 */ 467 if (kdev->power.runtime_status == RPM_SUSPENDED) { 468 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) 469 i915->pmu.suspended_jiffies_last = 470 kdev->power.suspended_jiffies; 471 472 val = kdev->power.suspended_jiffies - 473 i915->pmu.suspended_jiffies_last; 474 val += jiffies - kdev->power.accounting_timestamp; 475 476 val = jiffies_to_nsecs(val); 477 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; 478 479 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; 480 } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { 481 val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; 482 } else { 483 val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; 484 } 485 486 spin_unlock(&kdev->power.lock); 487 spin_unlock_irqrestore(&i915->pmu.lock, flags); 488 } 489 490 return val; 491 #else 492 return __get_rc6(i915); 493 #endif 494 } 495 496 static u64 __i915_pmu_event_read(struct perf_event *event) 497 { 498 struct drm_i915_private *i915 = 499 container_of(event->pmu, typeof(*i915), pmu.base); 500 u64 val = 0; 501 502 if (is_engine_event(event)) { 503 u8 sample = engine_event_sample(event); 504 struct intel_engine_cs *engine; 505 506 engine = intel_engine_lookup_user(i915, 507 engine_event_class(event), 508 engine_event_instance(event)); 509 510 if (WARN_ON_ONCE(!engine)) { 511 /* Do nothing */ 512 } else if (sample == I915_SAMPLE_BUSY && 513 intel_engine_supports_stats(engine)) { 514 val = ktime_to_ns(intel_engine_get_busy_time(engine)); 515 } else { 516 val = engine->pmu.sample[sample].cur; 517 } 518 } else { 519 switch (event->attr.config) { 520 case I915_PMU_ACTUAL_FREQUENCY: 521 val = 522 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_ACT].cur, 523 FREQUENCY); 524 break; 525 case I915_PMU_REQUESTED_FREQUENCY: 526 val = 527 div_u64(i915->pmu.sample[__I915_SAMPLE_FREQ_REQ].cur, 528 FREQUENCY); 529 break; 530 case I915_PMU_INTERRUPTS: 531 val = count_interrupts(i915); 532 break; 533 case I915_PMU_RC6_RESIDENCY: 534 val = get_rc6(i915); 535 break; 536 } 537 } 538 539 return val; 540 } 541 542 static void i915_pmu_event_read(struct perf_event *event) 543 { 544 struct hw_perf_event *hwc = &event->hw; 545 u64 prev, new; 546 547 again: 548 prev = local64_read(&hwc->prev_count); 549 new = __i915_pmu_event_read(event); 550 551 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 552 goto again; 553 554 local64_add(new - prev, &event->count); 555 } 556 557 static void i915_pmu_enable(struct perf_event *event) 558 { 559 struct drm_i915_private *i915 = 560 container_of(event->pmu, typeof(*i915), pmu.base); 561 unsigned int bit = event_enabled_bit(event); 562 unsigned long flags; 563 564 spin_lock_irqsave(&i915->pmu.lock, flags); 565 566 /* 567 * Update the bitmask of enabled events and increment 568 * the event reference counter. 569 */ 570 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 571 GEM_BUG_ON(i915->pmu.enable_count[bit] == ~0); 572 i915->pmu.enable |= BIT_ULL(bit); 573 i915->pmu.enable_count[bit]++; 574 575 /* 576 * Start the sampling timer if needed and not already enabled. 577 */ 578 __i915_pmu_maybe_start_timer(i915); 579 580 /* 581 * For per-engine events the bitmask and reference counting 582 * is stored per engine. 583 */ 584 if (is_engine_event(event)) { 585 u8 sample = engine_event_sample(event); 586 struct intel_engine_cs *engine; 587 588 engine = intel_engine_lookup_user(i915, 589 engine_event_class(event), 590 engine_event_instance(event)); 591 GEM_BUG_ON(!engine); 592 engine->pmu.enable |= BIT(sample); 593 594 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 595 GEM_BUG_ON(engine->pmu.enable_count[sample] == ~0); 596 engine->pmu.enable_count[sample]++; 597 } 598 599 spin_unlock_irqrestore(&i915->pmu.lock, flags); 600 601 /* 602 * Store the current counter value so we can report the correct delta 603 * for all listeners. Even when the event was already enabled and has 604 * an existing non-zero value. 605 */ 606 local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); 607 } 608 609 static void i915_pmu_disable(struct perf_event *event) 610 { 611 struct drm_i915_private *i915 = 612 container_of(event->pmu, typeof(*i915), pmu.base); 613 unsigned int bit = event_enabled_bit(event); 614 unsigned long flags; 615 616 spin_lock_irqsave(&i915->pmu.lock, flags); 617 618 if (is_engine_event(event)) { 619 u8 sample = engine_event_sample(event); 620 struct intel_engine_cs *engine; 621 622 engine = intel_engine_lookup_user(i915, 623 engine_event_class(event), 624 engine_event_instance(event)); 625 GEM_BUG_ON(!engine); 626 GEM_BUG_ON(sample >= I915_PMU_SAMPLE_BITS); 627 GEM_BUG_ON(engine->pmu.enable_count[sample] == 0); 628 /* 629 * Decrement the reference count and clear the enabled 630 * bitmask when the last listener on an event goes away. 631 */ 632 if (--engine->pmu.enable_count[sample] == 0) 633 engine->pmu.enable &= ~BIT(sample); 634 } 635 636 GEM_BUG_ON(bit >= I915_PMU_MASK_BITS); 637 GEM_BUG_ON(i915->pmu.enable_count[bit] == 0); 638 /* 639 * Decrement the reference count and clear the enabled 640 * bitmask when the last listener on an event goes away. 641 */ 642 if (--i915->pmu.enable_count[bit] == 0) { 643 i915->pmu.enable &= ~BIT_ULL(bit); 644 i915->pmu.timer_enabled &= pmu_needs_timer(i915, true); 645 } 646 647 spin_unlock_irqrestore(&i915->pmu.lock, flags); 648 } 649 650 static void i915_pmu_event_start(struct perf_event *event, int flags) 651 { 652 i915_pmu_enable(event); 653 event->hw.state = 0; 654 } 655 656 static void i915_pmu_event_stop(struct perf_event *event, int flags) 657 { 658 if (flags & PERF_EF_UPDATE) 659 i915_pmu_event_read(event); 660 i915_pmu_disable(event); 661 event->hw.state = PERF_HES_STOPPED; 662 } 663 664 static int i915_pmu_event_add(struct perf_event *event, int flags) 665 { 666 if (flags & PERF_EF_START) 667 i915_pmu_event_start(event, flags); 668 669 return 0; 670 } 671 672 static void i915_pmu_event_del(struct perf_event *event, int flags) 673 { 674 i915_pmu_event_stop(event, PERF_EF_UPDATE); 675 } 676 677 static int i915_pmu_event_event_idx(struct perf_event *event) 678 { 679 return 0; 680 } 681 682 struct i915_str_attribute { 683 struct device_attribute attr; 684 const char *str; 685 }; 686 687 static ssize_t i915_pmu_format_show(struct device *dev, 688 struct device_attribute *attr, char *buf) 689 { 690 struct i915_str_attribute *eattr; 691 692 eattr = container_of(attr, struct i915_str_attribute, attr); 693 return sprintf(buf, "%s\n", eattr->str); 694 } 695 696 #define I915_PMU_FORMAT_ATTR(_name, _config) \ 697 (&((struct i915_str_attribute[]) { \ 698 { .attr = __ATTR(_name, 0444, i915_pmu_format_show, NULL), \ 699 .str = _config, } \ 700 })[0].attr.attr) 701 702 static struct attribute *i915_pmu_format_attrs[] = { 703 I915_PMU_FORMAT_ATTR(i915_eventid, "config:0-20"), 704 NULL, 705 }; 706 707 static const struct attribute_group i915_pmu_format_attr_group = { 708 .name = "format", 709 .attrs = i915_pmu_format_attrs, 710 }; 711 712 struct i915_ext_attribute { 713 struct device_attribute attr; 714 unsigned long val; 715 }; 716 717 static ssize_t i915_pmu_event_show(struct device *dev, 718 struct device_attribute *attr, char *buf) 719 { 720 struct i915_ext_attribute *eattr; 721 722 eattr = container_of(attr, struct i915_ext_attribute, attr); 723 return sprintf(buf, "config=0x%lx\n", eattr->val); 724 } 725 726 static struct attribute_group i915_pmu_events_attr_group = { 727 .name = "events", 728 /* Patch in attrs at runtime. */ 729 }; 730 731 static ssize_t 732 i915_pmu_get_attr_cpumask(struct device *dev, 733 struct device_attribute *attr, 734 char *buf) 735 { 736 return cpumap_print_to_pagebuf(true, buf, &i915_pmu_cpumask); 737 } 738 739 static DEVICE_ATTR(cpumask, 0444, i915_pmu_get_attr_cpumask, NULL); 740 741 static struct attribute *i915_cpumask_attrs[] = { 742 &dev_attr_cpumask.attr, 743 NULL, 744 }; 745 746 static const struct attribute_group i915_pmu_cpumask_attr_group = { 747 .attrs = i915_cpumask_attrs, 748 }; 749 750 static const struct attribute_group *i915_pmu_attr_groups[] = { 751 &i915_pmu_format_attr_group, 752 &i915_pmu_events_attr_group, 753 &i915_pmu_cpumask_attr_group, 754 NULL 755 }; 756 757 #define __event(__config, __name, __unit) \ 758 { \ 759 .config = (__config), \ 760 .name = (__name), \ 761 .unit = (__unit), \ 762 } 763 764 #define __engine_event(__sample, __name) \ 765 { \ 766 .sample = (__sample), \ 767 .name = (__name), \ 768 } 769 770 static struct i915_ext_attribute * 771 add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) 772 { 773 sysfs_attr_init(&attr->attr.attr); 774 attr->attr.attr.name = name; 775 attr->attr.attr.mode = 0444; 776 attr->attr.show = i915_pmu_event_show; 777 attr->val = config; 778 779 return ++attr; 780 } 781 782 static struct perf_pmu_events_attr * 783 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, 784 const char *str) 785 { 786 sysfs_attr_init(&attr->attr.attr); 787 attr->attr.attr.name = name; 788 attr->attr.attr.mode = 0444; 789 attr->attr.show = perf_event_sysfs_show; 790 attr->event_str = str; 791 792 return ++attr; 793 } 794 795 static struct attribute ** 796 create_event_attributes(struct drm_i915_private *i915) 797 { 798 static const struct { 799 u64 config; 800 const char *name; 801 const char *unit; 802 } events[] = { 803 __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), 804 __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), 805 __event(I915_PMU_INTERRUPTS, "interrupts", NULL), 806 __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), 807 }; 808 static const struct { 809 enum drm_i915_pmu_engine_sample sample; 810 char *name; 811 } engine_events[] = { 812 __engine_event(I915_SAMPLE_BUSY, "busy"), 813 __engine_event(I915_SAMPLE_SEMA, "sema"), 814 __engine_event(I915_SAMPLE_WAIT, "wait"), 815 }; 816 unsigned int count = 0; 817 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; 818 struct i915_ext_attribute *i915_attr = NULL, *i915_iter; 819 struct attribute **attr = NULL, **attr_iter; 820 struct intel_engine_cs *engine; 821 enum intel_engine_id id; 822 unsigned int i; 823 824 /* Count how many counters we will be exposing. */ 825 for (i = 0; i < ARRAY_SIZE(events); i++) { 826 if (!config_status(i915, events[i].config)) 827 count++; 828 } 829 830 for_each_engine(engine, i915, id) { 831 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 832 if (!engine_event_status(engine, 833 engine_events[i].sample)) 834 count++; 835 } 836 } 837 838 /* Allocate attribute objects and table. */ 839 i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); 840 if (!i915_attr) 841 goto err_alloc; 842 843 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); 844 if (!pmu_attr) 845 goto err_alloc; 846 847 /* Max one pointer of each attribute type plus a termination entry. */ 848 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); 849 if (!attr) 850 goto err_alloc; 851 852 i915_iter = i915_attr; 853 pmu_iter = pmu_attr; 854 attr_iter = attr; 855 856 /* Initialize supported non-engine counters. */ 857 for (i = 0; i < ARRAY_SIZE(events); i++) { 858 char *str; 859 860 if (config_status(i915, events[i].config)) 861 continue; 862 863 str = kstrdup(events[i].name, GFP_KERNEL); 864 if (!str) 865 goto err; 866 867 *attr_iter++ = &i915_iter->attr.attr; 868 i915_iter = add_i915_attr(i915_iter, str, events[i].config); 869 870 if (events[i].unit) { 871 str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); 872 if (!str) 873 goto err; 874 875 *attr_iter++ = &pmu_iter->attr.attr; 876 pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); 877 } 878 } 879 880 /* Initialize supported engine counters. */ 881 for_each_engine(engine, i915, id) { 882 for (i = 0; i < ARRAY_SIZE(engine_events); i++) { 883 char *str; 884 885 if (engine_event_status(engine, 886 engine_events[i].sample)) 887 continue; 888 889 str = kasprintf(GFP_KERNEL, "%s-%s", 890 engine->name, engine_events[i].name); 891 if (!str) 892 goto err; 893 894 *attr_iter++ = &i915_iter->attr.attr; 895 i915_iter = 896 add_i915_attr(i915_iter, str, 897 __I915_PMU_ENGINE(engine->uabi_class, 898 engine->instance, 899 engine_events[i].sample)); 900 901 str = kasprintf(GFP_KERNEL, "%s-%s.unit", 902 engine->name, engine_events[i].name); 903 if (!str) 904 goto err; 905 906 *attr_iter++ = &pmu_iter->attr.attr; 907 pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); 908 } 909 } 910 911 i915->pmu.i915_attr = i915_attr; 912 i915->pmu.pmu_attr = pmu_attr; 913 914 return attr; 915 916 err:; 917 for (attr_iter = attr; *attr_iter; attr_iter++) 918 kfree((*attr_iter)->name); 919 920 err_alloc: 921 kfree(attr); 922 kfree(i915_attr); 923 kfree(pmu_attr); 924 925 return NULL; 926 } 927 928 static void free_event_attributes(struct drm_i915_private *i915) 929 { 930 struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; 931 932 for (; *attr_iter; attr_iter++) 933 kfree((*attr_iter)->name); 934 935 kfree(i915_pmu_events_attr_group.attrs); 936 kfree(i915->pmu.i915_attr); 937 kfree(i915->pmu.pmu_attr); 938 939 i915_pmu_events_attr_group.attrs = NULL; 940 i915->pmu.i915_attr = NULL; 941 i915->pmu.pmu_attr = NULL; 942 } 943 944 static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 945 { 946 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 947 948 GEM_BUG_ON(!pmu->base.event_init); 949 950 /* Select the first online CPU as a designated reader. */ 951 if (!cpumask_weight(&i915_pmu_cpumask)) 952 cpumask_set_cpu(cpu, &i915_pmu_cpumask); 953 954 return 0; 955 } 956 957 static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 958 { 959 struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); 960 unsigned int target; 961 962 GEM_BUG_ON(!pmu->base.event_init); 963 964 if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { 965 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 966 /* Migrate events if there is a valid target */ 967 if (target < nr_cpu_ids) { 968 cpumask_set_cpu(target, &i915_pmu_cpumask); 969 perf_pmu_migrate_context(&pmu->base, cpu, target); 970 } 971 } 972 973 return 0; 974 } 975 976 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; 977 978 static int i915_pmu_register_cpuhp_state(struct drm_i915_private *i915) 979 { 980 enum cpuhp_state slot; 981 int ret; 982 983 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 984 "perf/x86/intel/i915:online", 985 i915_pmu_cpu_online, 986 i915_pmu_cpu_offline); 987 if (ret < 0) 988 return ret; 989 990 slot = ret; 991 ret = cpuhp_state_add_instance(slot, &i915->pmu.node); 992 if (ret) { 993 cpuhp_remove_multi_state(slot); 994 return ret; 995 } 996 997 cpuhp_slot = slot; 998 return 0; 999 } 1000 1001 static void i915_pmu_unregister_cpuhp_state(struct drm_i915_private *i915) 1002 { 1003 WARN_ON(cpuhp_slot == CPUHP_INVALID); 1004 WARN_ON(cpuhp_state_remove_instance(cpuhp_slot, &i915->pmu.node)); 1005 cpuhp_remove_multi_state(cpuhp_slot); 1006 } 1007 1008 void i915_pmu_register(struct drm_i915_private *i915) 1009 { 1010 int ret; 1011 1012 if (INTEL_GEN(i915) <= 2) { 1013 DRM_INFO("PMU not supported for this GPU."); 1014 return; 1015 } 1016 1017 i915_pmu_events_attr_group.attrs = create_event_attributes(i915); 1018 if (!i915_pmu_events_attr_group.attrs) { 1019 ret = -ENOMEM; 1020 goto err; 1021 } 1022 1023 i915->pmu.base.attr_groups = i915_pmu_attr_groups; 1024 i915->pmu.base.task_ctx_nr = perf_invalid_context; 1025 i915->pmu.base.event_init = i915_pmu_event_init; 1026 i915->pmu.base.add = i915_pmu_event_add; 1027 i915->pmu.base.del = i915_pmu_event_del; 1028 i915->pmu.base.start = i915_pmu_event_start; 1029 i915->pmu.base.stop = i915_pmu_event_stop; 1030 i915->pmu.base.read = i915_pmu_event_read; 1031 i915->pmu.base.event_idx = i915_pmu_event_event_idx; 1032 1033 spin_lock_init(&i915->pmu.lock); 1034 hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1035 i915->pmu.timer.function = i915_sample; 1036 1037 ret = perf_pmu_register(&i915->pmu.base, "i915", -1); 1038 if (ret) 1039 goto err; 1040 1041 ret = i915_pmu_register_cpuhp_state(i915); 1042 if (ret) 1043 goto err_unreg; 1044 1045 return; 1046 1047 err_unreg: 1048 perf_pmu_unregister(&i915->pmu.base); 1049 err: 1050 i915->pmu.base.event_init = NULL; 1051 free_event_attributes(i915); 1052 DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); 1053 } 1054 1055 void i915_pmu_unregister(struct drm_i915_private *i915) 1056 { 1057 if (!i915->pmu.base.event_init) 1058 return; 1059 1060 WARN_ON(i915->pmu.enable); 1061 1062 hrtimer_cancel(&i915->pmu.timer); 1063 1064 i915_pmu_unregister_cpuhp_state(i915); 1065 1066 perf_pmu_unregister(&i915->pmu.base); 1067 i915->pmu.base.event_init = NULL; 1068 free_event_attributes(i915); 1069 } 1070