1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023 Intel Corporation 4 */ 5 6 #include <drm/drm_drv.h> 7 #include <drm/drm_managed.h> 8 #include <drm/xe_drm.h> 9 10 #include "regs/xe_gt_regs.h" 11 #include "xe_device.h" 12 #include "xe_gt_clock.h" 13 #include "xe_mmio.h" 14 15 static cpumask_t xe_pmu_cpumask; 16 static unsigned int xe_pmu_target_cpu = -1; 17 18 static unsigned int config_gt_id(const u64 config) 19 { 20 return config >> __DRM_XE_PMU_GT_SHIFT; 21 } 22 23 static u64 config_counter(const u64 config) 24 { 25 return config & ~(~0ULL << __DRM_XE_PMU_GT_SHIFT); 26 } 27 28 static void xe_pmu_event_destroy(struct perf_event *event) 29 { 30 struct xe_device *xe = 31 container_of(event->pmu, typeof(*xe), pmu.base); 32 33 drm_WARN_ON(&xe->drm, event->parent); 34 35 drm_dev_put(&xe->drm); 36 } 37 38 static u64 __engine_group_busyness_read(struct xe_gt *gt, int sample_type) 39 { 40 u64 val; 41 42 switch (sample_type) { 43 case __XE_SAMPLE_RENDER_GROUP_BUSY: 44 val = xe_mmio_read32(gt, XE_OAG_RENDER_BUSY_FREE); 45 break; 46 case __XE_SAMPLE_COPY_GROUP_BUSY: 47 val = xe_mmio_read32(gt, XE_OAG_BLT_BUSY_FREE); 48 break; 49 case __XE_SAMPLE_MEDIA_GROUP_BUSY: 50 val = xe_mmio_read32(gt, XE_OAG_ANY_MEDIA_FF_BUSY_FREE); 51 break; 52 case __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY: 53 val = xe_mmio_read32(gt, XE_OAG_RC0_ANY_ENGINE_BUSY_FREE); 54 break; 55 default: 56 drm_warn(>->tile->xe->drm, "unknown pmu event\n"); 57 } 58 59 return xe_gt_clock_cycles_to_ns(gt, val * 16); 60 } 61 62 static u64 engine_group_busyness_read(struct xe_gt *gt, u64 config) 63 { 64 int sample_type = config_counter(config); 65 const unsigned int gt_id = gt->info.id; 66 struct xe_device *xe = gt->tile->xe; 67 struct xe_pmu *pmu = &xe->pmu; 68 unsigned long flags; 69 bool device_awake; 70 u64 val; 71 72 device_awake = xe_device_mem_access_get_if_ongoing(xe); 73 if (device_awake) { 74 XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); 75 val = __engine_group_busyness_read(gt, sample_type); 76 XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FW_GT)); 77 xe_device_mem_access_put(xe); 78 } 79 80 spin_lock_irqsave(&pmu->lock, flags); 81 82 if (device_awake) 83 pmu->sample[gt_id][sample_type] = val; 84 else 85 val = pmu->sample[gt_id][sample_type]; 86 87 spin_unlock_irqrestore(&pmu->lock, flags); 88 89 return val; 90 } 91 92 static void engine_group_busyness_store(struct xe_gt *gt) 93 { 94 struct xe_pmu *pmu = >->tile->xe->pmu; 95 unsigned int gt_id = gt->info.id; 96 unsigned long flags; 97 int i; 98 99 spin_lock_irqsave(&pmu->lock, flags); 100 101 for (i = __XE_SAMPLE_RENDER_GROUP_BUSY; i <= __XE_SAMPLE_ANY_ENGINE_GROUP_BUSY; i++) 102 pmu->sample[gt_id][i] = __engine_group_busyness_read(gt, i); 103 104 spin_unlock_irqrestore(&pmu->lock, flags); 105 } 106 107 static int 108 config_status(struct xe_device *xe, u64 config) 109 { 110 unsigned int gt_id = config_gt_id(config); 111 struct xe_gt *gt = xe_device_get_gt(xe, gt_id); 112 113 if (gt_id >= XE_PMU_MAX_GT) 114 return -ENOENT; 115 116 switch (config_counter(config)) { 117 case DRM_XE_PMU_RENDER_GROUP_BUSY(0): 118 case DRM_XE_PMU_COPY_GROUP_BUSY(0): 119 case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0): 120 if (gt->info.type == XE_GT_TYPE_MEDIA) 121 return -ENOENT; 122 break; 123 case DRM_XE_PMU_MEDIA_GROUP_BUSY(0): 124 if (!(gt->info.engine_mask & (BIT(XE_HW_ENGINE_VCS0) | BIT(XE_HW_ENGINE_VECS0)))) 125 return -ENOENT; 126 break; 127 default: 128 return -ENOENT; 129 } 130 131 return 0; 132 } 133 134 static int xe_pmu_event_init(struct perf_event *event) 135 { 136 struct xe_device *xe = 137 container_of(event->pmu, typeof(*xe), pmu.base); 138 struct xe_pmu *pmu = &xe->pmu; 139 int ret; 140 141 if (pmu->closed) 142 return -ENODEV; 143 144 if (event->attr.type != event->pmu->type) 145 return -ENOENT; 146 147 /* unsupported modes and filters */ 148 if (event->attr.sample_period) /* no sampling */ 149 return -EINVAL; 150 151 if (has_branch_stack(event)) 152 return -EOPNOTSUPP; 153 154 if (event->cpu < 0) 155 return -EINVAL; 156 157 /* only allow running on one cpu at a time */ 158 if (!cpumask_test_cpu(event->cpu, &xe_pmu_cpumask)) 159 return -EINVAL; 160 161 ret = config_status(xe, event->attr.config); 162 if (ret) 163 return ret; 164 165 if (!event->parent) { 166 drm_dev_get(&xe->drm); 167 event->destroy = xe_pmu_event_destroy; 168 } 169 170 return 0; 171 } 172 173 static u64 __xe_pmu_event_read(struct perf_event *event) 174 { 175 struct xe_device *xe = 176 container_of(event->pmu, typeof(*xe), pmu.base); 177 const unsigned int gt_id = config_gt_id(event->attr.config); 178 const u64 config = event->attr.config; 179 struct xe_gt *gt = xe_device_get_gt(xe, gt_id); 180 u64 val; 181 182 switch (config_counter(config)) { 183 case DRM_XE_PMU_RENDER_GROUP_BUSY(0): 184 case DRM_XE_PMU_COPY_GROUP_BUSY(0): 185 case DRM_XE_PMU_ANY_ENGINE_GROUP_BUSY(0): 186 case DRM_XE_PMU_MEDIA_GROUP_BUSY(0): 187 val = engine_group_busyness_read(gt, config); 188 break; 189 default: 190 drm_warn(>->tile->xe->drm, "unknown pmu event\n"); 191 } 192 193 return val; 194 } 195 196 static void xe_pmu_event_read(struct perf_event *event) 197 { 198 struct xe_device *xe = 199 container_of(event->pmu, typeof(*xe), pmu.base); 200 struct hw_perf_event *hwc = &event->hw; 201 struct xe_pmu *pmu = &xe->pmu; 202 u64 prev, new; 203 204 if (pmu->closed) { 205 event->hw.state = PERF_HES_STOPPED; 206 return; 207 } 208 again: 209 prev = local64_read(&hwc->prev_count); 210 new = __xe_pmu_event_read(event); 211 212 if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) 213 goto again; 214 215 local64_add(new - prev, &event->count); 216 } 217 218 static void xe_pmu_enable(struct perf_event *event) 219 { 220 /* 221 * Store the current counter value so we can report the correct delta 222 * for all listeners. Even when the event was already enabled and has 223 * an existing non-zero value. 224 */ 225 local64_set(&event->hw.prev_count, __xe_pmu_event_read(event)); 226 } 227 228 static void xe_pmu_event_start(struct perf_event *event, int flags) 229 { 230 struct xe_device *xe = 231 container_of(event->pmu, typeof(*xe), pmu.base); 232 struct xe_pmu *pmu = &xe->pmu; 233 234 if (pmu->closed) 235 return; 236 237 xe_pmu_enable(event); 238 event->hw.state = 0; 239 } 240 241 static void xe_pmu_event_stop(struct perf_event *event, int flags) 242 { 243 if (flags & PERF_EF_UPDATE) 244 xe_pmu_event_read(event); 245 246 event->hw.state = PERF_HES_STOPPED; 247 } 248 249 static int xe_pmu_event_add(struct perf_event *event, int flags) 250 { 251 struct xe_device *xe = 252 container_of(event->pmu, typeof(*xe), pmu.base); 253 struct xe_pmu *pmu = &xe->pmu; 254 255 if (pmu->closed) 256 return -ENODEV; 257 258 if (flags & PERF_EF_START) 259 xe_pmu_event_start(event, flags); 260 261 return 0; 262 } 263 264 static void xe_pmu_event_del(struct perf_event *event, int flags) 265 { 266 xe_pmu_event_stop(event, PERF_EF_UPDATE); 267 } 268 269 static int xe_pmu_event_event_idx(struct perf_event *event) 270 { 271 return 0; 272 } 273 274 struct xe_ext_attribute { 275 struct device_attribute attr; 276 unsigned long val; 277 }; 278 279 static ssize_t xe_pmu_event_show(struct device *dev, 280 struct device_attribute *attr, char *buf) 281 { 282 struct xe_ext_attribute *eattr; 283 284 eattr = container_of(attr, struct xe_ext_attribute, attr); 285 return sprintf(buf, "config=0x%lx\n", eattr->val); 286 } 287 288 static ssize_t cpumask_show(struct device *dev, 289 struct device_attribute *attr, char *buf) 290 { 291 return cpumap_print_to_pagebuf(true, buf, &xe_pmu_cpumask); 292 } 293 294 static DEVICE_ATTR_RO(cpumask); 295 296 static struct attribute *xe_cpumask_attrs[] = { 297 &dev_attr_cpumask.attr, 298 NULL, 299 }; 300 301 static const struct attribute_group xe_pmu_cpumask_attr_group = { 302 .attrs = xe_cpumask_attrs, 303 }; 304 305 #define __event(__counter, __name, __unit) \ 306 { \ 307 .counter = (__counter), \ 308 .name = (__name), \ 309 .unit = (__unit), \ 310 .global = false, \ 311 } 312 313 #define __global_event(__counter, __name, __unit) \ 314 { \ 315 .counter = (__counter), \ 316 .name = (__name), \ 317 .unit = (__unit), \ 318 .global = true, \ 319 } 320 321 static struct xe_ext_attribute * 322 add_xe_attr(struct xe_ext_attribute *attr, const char *name, u64 config) 323 { 324 sysfs_attr_init(&attr->attr.attr); 325 attr->attr.attr.name = name; 326 attr->attr.attr.mode = 0444; 327 attr->attr.show = xe_pmu_event_show; 328 attr->val = config; 329 330 return ++attr; 331 } 332 333 static struct perf_pmu_events_attr * 334 add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, 335 const char *str) 336 { 337 sysfs_attr_init(&attr->attr.attr); 338 attr->attr.attr.name = name; 339 attr->attr.attr.mode = 0444; 340 attr->attr.show = perf_event_sysfs_show; 341 attr->event_str = str; 342 343 return ++attr; 344 } 345 346 static struct attribute ** 347 create_event_attributes(struct xe_pmu *pmu) 348 { 349 struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); 350 static const struct { 351 unsigned int counter; 352 const char *name; 353 const char *unit; 354 bool global; 355 } events[] = { 356 __event(0, "render-group-busy", "ns"), 357 __event(1, "copy-group-busy", "ns"), 358 __event(2, "media-group-busy", "ns"), 359 __event(3, "any-engine-group-busy", "ns"), 360 }; 361 362 struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; 363 struct xe_ext_attribute *xe_attr = NULL, *xe_iter; 364 struct attribute **attr = NULL, **attr_iter; 365 unsigned int count = 0; 366 unsigned int i, j; 367 struct xe_gt *gt; 368 369 /* Count how many counters we will be exposing. */ 370 for_each_gt(gt, xe, j) { 371 for (i = 0; i < ARRAY_SIZE(events); i++) { 372 u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter); 373 374 if (!config_status(xe, config)) 375 count++; 376 } 377 } 378 379 /* Allocate attribute objects and table. */ 380 xe_attr = kcalloc(count, sizeof(*xe_attr), GFP_KERNEL); 381 if (!xe_attr) 382 goto err_alloc; 383 384 pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); 385 if (!pmu_attr) 386 goto err_alloc; 387 388 /* Max one pointer of each attribute type plus a termination entry. */ 389 attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); 390 if (!attr) 391 goto err_alloc; 392 393 xe_iter = xe_attr; 394 pmu_iter = pmu_attr; 395 attr_iter = attr; 396 397 for_each_gt(gt, xe, j) { 398 for (i = 0; i < ARRAY_SIZE(events); i++) { 399 u64 config = ___DRM_XE_PMU_OTHER(j, events[i].counter); 400 char *str; 401 402 if (config_status(xe, config)) 403 continue; 404 405 if (events[i].global) 406 str = kstrdup(events[i].name, GFP_KERNEL); 407 else 408 str = kasprintf(GFP_KERNEL, "%s-gt%u", 409 events[i].name, j); 410 if (!str) 411 goto err; 412 413 *attr_iter++ = &xe_iter->attr.attr; 414 xe_iter = add_xe_attr(xe_iter, str, config); 415 416 if (events[i].unit) { 417 if (events[i].global) 418 str = kasprintf(GFP_KERNEL, "%s.unit", 419 events[i].name); 420 else 421 str = kasprintf(GFP_KERNEL, "%s-gt%u.unit", 422 events[i].name, j); 423 if (!str) 424 goto err; 425 426 *attr_iter++ = &pmu_iter->attr.attr; 427 pmu_iter = add_pmu_attr(pmu_iter, str, 428 events[i].unit); 429 } 430 } 431 } 432 433 pmu->xe_attr = xe_attr; 434 pmu->pmu_attr = pmu_attr; 435 436 return attr; 437 438 err: 439 for (attr_iter = attr; *attr_iter; attr_iter++) 440 kfree((*attr_iter)->name); 441 442 err_alloc: 443 kfree(attr); 444 kfree(xe_attr); 445 kfree(pmu_attr); 446 447 return NULL; 448 } 449 450 static void free_event_attributes(struct xe_pmu *pmu) 451 { 452 struct attribute **attr_iter = pmu->events_attr_group.attrs; 453 454 for (; *attr_iter; attr_iter++) 455 kfree((*attr_iter)->name); 456 457 kfree(pmu->events_attr_group.attrs); 458 kfree(pmu->xe_attr); 459 kfree(pmu->pmu_attr); 460 461 pmu->events_attr_group.attrs = NULL; 462 pmu->xe_attr = NULL; 463 pmu->pmu_attr = NULL; 464 } 465 466 static int xe_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 467 { 468 struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); 469 470 /* Select the first online CPU as a designated reader. */ 471 if (cpumask_empty(&xe_pmu_cpumask)) 472 cpumask_set_cpu(cpu, &xe_pmu_cpumask); 473 474 return 0; 475 } 476 477 static int xe_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 478 { 479 struct xe_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); 480 unsigned int target = xe_pmu_target_cpu; 481 482 /* 483 * Unregistering an instance generates a CPU offline event which we must 484 * ignore to avoid incorrectly modifying the shared xe_pmu_cpumask. 485 */ 486 if (pmu->closed) 487 return 0; 488 489 if (cpumask_test_and_clear_cpu(cpu, &xe_pmu_cpumask)) { 490 target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); 491 492 /* Migrate events if there is a valid target */ 493 if (target < nr_cpu_ids) { 494 cpumask_set_cpu(target, &xe_pmu_cpumask); 495 xe_pmu_target_cpu = target; 496 } 497 } 498 499 if (target < nr_cpu_ids && target != pmu->cpuhp.cpu) { 500 perf_pmu_migrate_context(&pmu->base, cpu, target); 501 pmu->cpuhp.cpu = target; 502 } 503 504 return 0; 505 } 506 507 static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; 508 509 int xe_pmu_init(void) 510 { 511 int ret; 512 513 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 514 "perf/x86/intel/xe:online", 515 xe_pmu_cpu_online, 516 xe_pmu_cpu_offline); 517 if (ret < 0) 518 pr_notice("Failed to setup cpuhp state for xe PMU! (%d)\n", 519 ret); 520 else 521 cpuhp_slot = ret; 522 523 return 0; 524 } 525 526 void xe_pmu_exit(void) 527 { 528 if (cpuhp_slot != CPUHP_INVALID) 529 cpuhp_remove_multi_state(cpuhp_slot); 530 } 531 532 static int xe_pmu_register_cpuhp_state(struct xe_pmu *pmu) 533 { 534 if (cpuhp_slot == CPUHP_INVALID) 535 return -EINVAL; 536 537 return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node); 538 } 539 540 static void xe_pmu_unregister_cpuhp_state(struct xe_pmu *pmu) 541 { 542 cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node); 543 } 544 545 void xe_pmu_suspend(struct xe_gt *gt) 546 { 547 engine_group_busyness_store(gt); 548 } 549 550 static void xe_pmu_unregister(struct drm_device *device, void *arg) 551 { 552 struct xe_pmu *pmu = arg; 553 554 if (!pmu->base.event_init) 555 return; 556 557 /* 558 * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu 559 * ensures all currently executing ones will have exited before we 560 * proceed with unregistration. 561 */ 562 pmu->closed = true; 563 synchronize_rcu(); 564 565 xe_pmu_unregister_cpuhp_state(pmu); 566 567 perf_pmu_unregister(&pmu->base); 568 pmu->base.event_init = NULL; 569 kfree(pmu->base.attr_groups); 570 kfree(pmu->name); 571 free_event_attributes(pmu); 572 } 573 574 void xe_pmu_register(struct xe_pmu *pmu) 575 { 576 struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); 577 const struct attribute_group *attr_groups[] = { 578 &pmu->events_attr_group, 579 &xe_pmu_cpumask_attr_group, 580 NULL 581 }; 582 583 int ret = -ENOMEM; 584 585 spin_lock_init(&pmu->lock); 586 pmu->cpuhp.cpu = -1; 587 588 pmu->name = kasprintf(GFP_KERNEL, 589 "xe_%s", 590 dev_name(xe->drm.dev)); 591 if (pmu->name) 592 /* tools/perf reserves colons as special. */ 593 strreplace((char *)pmu->name, ':', '_'); 594 595 if (!pmu->name) 596 goto err; 597 598 pmu->events_attr_group.name = "events"; 599 pmu->events_attr_group.attrs = create_event_attributes(pmu); 600 if (!pmu->events_attr_group.attrs) 601 goto err_name; 602 603 pmu->base.attr_groups = kmemdup(attr_groups, sizeof(attr_groups), 604 GFP_KERNEL); 605 if (!pmu->base.attr_groups) 606 goto err_attr; 607 608 pmu->base.module = THIS_MODULE; 609 pmu->base.task_ctx_nr = perf_invalid_context; 610 pmu->base.event_init = xe_pmu_event_init; 611 pmu->base.add = xe_pmu_event_add; 612 pmu->base.del = xe_pmu_event_del; 613 pmu->base.start = xe_pmu_event_start; 614 pmu->base.stop = xe_pmu_event_stop; 615 pmu->base.read = xe_pmu_event_read; 616 pmu->base.event_idx = xe_pmu_event_event_idx; 617 618 ret = perf_pmu_register(&pmu->base, pmu->name, -1); 619 if (ret) 620 goto err_groups; 621 622 ret = xe_pmu_register_cpuhp_state(pmu); 623 if (ret) 624 goto err_unreg; 625 626 ret = drmm_add_action_or_reset(&xe->drm, xe_pmu_unregister, pmu); 627 if (ret) 628 goto err_cpuhp; 629 630 return; 631 632 err_cpuhp: 633 xe_pmu_unregister_cpuhp_state(pmu); 634 err_unreg: 635 perf_pmu_unregister(&pmu->base); 636 err_groups: 637 kfree(pmu->base.attr_groups); 638 err_attr: 639 pmu->base.event_init = NULL; 640 free_event_attributes(pmu); 641 err_name: 642 kfree(pmu->name); 643 err: 644 drm_notice(&xe->drm, "Failed to register PMU!\n"); 645 } 646