1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVIDIA Tegra410 CPU Memory (CMEM) Latency PMU driver. 4 * 5 * Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. 6 */ 7 8 #include <linux/acpi.h> 9 #include <linux/bitops.h> 10 #include <linux/cpumask.h> 11 #include <linux/device.h> 12 #include <linux/interrupt.h> 13 #include <linux/io.h> 14 #include <linux/module.h> 15 #include <linux/perf_event.h> 16 #include <linux/platform_device.h> 17 18 #define NUM_INSTANCES 14 19 20 /* Register offsets. */ 21 #define CMEM_LAT_CG_CTRL 0x800 22 #define CMEM_LAT_CTRL 0x808 23 #define CMEM_LAT_STATUS 0x810 24 #define CMEM_LAT_CYCLE_CNTR 0x818 25 #define CMEM_LAT_MC0_REQ_CNTR 0x820 26 #define CMEM_LAT_MC0_AOR_CNTR 0x830 27 #define CMEM_LAT_MC1_REQ_CNTR 0x838 28 #define CMEM_LAT_MC1_AOR_CNTR 0x848 29 #define CMEM_LAT_MC2_REQ_CNTR 0x850 30 #define CMEM_LAT_MC2_AOR_CNTR 0x860 31 32 /* CMEM_LAT_CTRL values. */ 33 #define CMEM_LAT_CTRL_DISABLE 0x0ULL 34 #define CMEM_LAT_CTRL_ENABLE 0x1ULL 35 #define CMEM_LAT_CTRL_CLR 0x2ULL 36 37 /* CMEM_LAT_CG_CTRL values. */ 38 #define CMEM_LAT_CG_CTRL_DISABLE 0x0ULL 39 #define CMEM_LAT_CG_CTRL_ENABLE 0x1ULL 40 41 /* CMEM_LAT_STATUS register field. */ 42 #define CMEM_LAT_STATUS_CYCLE_OVF BIT(0) 43 #define CMEM_LAT_STATUS_MC0_AOR_OVF BIT(1) 44 #define CMEM_LAT_STATUS_MC0_REQ_OVF BIT(3) 45 #define CMEM_LAT_STATUS_MC1_AOR_OVF BIT(4) 46 #define CMEM_LAT_STATUS_MC1_REQ_OVF BIT(6) 47 #define CMEM_LAT_STATUS_MC2_AOR_OVF BIT(7) 48 #define CMEM_LAT_STATUS_MC2_REQ_OVF BIT(9) 49 50 /* Events. */ 51 #define CMEM_LAT_EVENT_CYCLES 0x0 52 #define CMEM_LAT_EVENT_REQ 0x1 53 #define CMEM_LAT_EVENT_AOR 0x2 54 55 #define CMEM_LAT_NUM_EVENTS 0x3 56 #define CMEM_LAT_MASK_EVENT 0x3 57 #define CMEM_LAT_MAX_ACTIVE_EVENTS 32 58 59 #define CMEM_LAT_ACTIVE_CPU_MASK 0x0 60 #define CMEM_LAT_ASSOCIATED_CPU_MASK 0x1 61 62 static unsigned long cmem_lat_pmu_cpuhp_state; 63 64 struct cmem_lat_pmu_hw_events { 65 struct perf_event *events[CMEM_LAT_MAX_ACTIVE_EVENTS]; 66 DECLARE_BITMAP(used_ctrs, CMEM_LAT_MAX_ACTIVE_EVENTS); 67 }; 68 69 struct cmem_lat_pmu { 70 struct pmu pmu; 71 struct device *dev; 72 const char *name; 73 const char *identifier; 74 void __iomem *base_broadcast; 75 void __iomem *base[NUM_INSTANCES]; 76 cpumask_t associated_cpus; 77 cpumask_t active_cpu; 78 struct hlist_node node; 79 struct cmem_lat_pmu_hw_events hw_events; 80 }; 81 82 #define to_cmem_lat_pmu(p) \ 83 container_of(p, struct cmem_lat_pmu, pmu) 84 85 86 /* Get event type from perf_event. */ 87 static inline u32 get_event_type(struct perf_event *event) 88 { 89 return (event->attr.config) & CMEM_LAT_MASK_EVENT; 90 } 91 92 /* PMU operations. */ 93 static int cmem_lat_pmu_get_event_idx(struct cmem_lat_pmu_hw_events *hw_events, 94 struct perf_event *event) 95 { 96 unsigned int idx; 97 98 idx = find_first_zero_bit(hw_events->used_ctrs, CMEM_LAT_MAX_ACTIVE_EVENTS); 99 if (idx >= CMEM_LAT_MAX_ACTIVE_EVENTS) 100 return -EAGAIN; 101 102 set_bit(idx, hw_events->used_ctrs); 103 104 return idx; 105 } 106 107 static bool cmem_lat_pmu_validate_event(struct pmu *pmu, 108 struct cmem_lat_pmu_hw_events *hw_events, 109 struct perf_event *event) 110 { 111 int ret; 112 113 if (is_software_event(event)) 114 return true; 115 116 /* Reject groups spanning multiple HW PMUs. */ 117 if (event->pmu != pmu) 118 return false; 119 120 ret = cmem_lat_pmu_get_event_idx(hw_events, event); 121 if (ret < 0) 122 return false; 123 124 return true; 125 } 126 127 /* Make sure the group of events can be scheduled at once on the PMU. */ 128 static bool cmem_lat_pmu_validate_group(struct perf_event *event) 129 { 130 struct perf_event *sibling, *leader = event->group_leader; 131 struct cmem_lat_pmu_hw_events fake_hw_events; 132 133 if (event->group_leader == event) 134 return true; 135 136 memset(&fake_hw_events, 0, sizeof(fake_hw_events)); 137 138 if (!cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, leader)) 139 return false; 140 141 for_each_sibling_event(sibling, leader) { 142 if (!cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, sibling)) 143 return false; 144 } 145 146 return cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, event); 147 } 148 149 static int cmem_lat_pmu_event_init(struct perf_event *event) 150 { 151 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu); 152 struct hw_perf_event *hwc = &event->hw; 153 u32 event_type = get_event_type(event); 154 155 if (event->attr.type != event->pmu->type || 156 event_type >= CMEM_LAT_NUM_EVENTS) 157 return -ENOENT; 158 159 /* 160 * Sampling, per-process mode, and per-task counters are not supported 161 * since this PMU is shared across all CPUs. 162 */ 163 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) { 164 dev_dbg(cmem_lat_pmu->pmu.dev, 165 "Can't support sampling and per-process mode\n"); 166 return -EOPNOTSUPP; 167 } 168 169 if (event->cpu < 0) { 170 dev_dbg(cmem_lat_pmu->pmu.dev, "Can't support per-task counters\n"); 171 return -EINVAL; 172 } 173 174 /* 175 * Make sure the CPU assignment is on one of the CPUs associated with 176 * this PMU. 177 */ 178 if (!cpumask_test_cpu(event->cpu, &cmem_lat_pmu->associated_cpus)) { 179 dev_dbg(cmem_lat_pmu->pmu.dev, 180 "Requested cpu is not associated with the PMU\n"); 181 return -EINVAL; 182 } 183 184 /* Enforce the current active CPU to handle the events in this PMU. */ 185 event->cpu = cpumask_first(&cmem_lat_pmu->active_cpu); 186 if (event->cpu >= nr_cpu_ids) 187 return -EINVAL; 188 189 if (!cmem_lat_pmu_validate_group(event)) 190 return -EINVAL; 191 192 hwc->idx = -1; 193 hwc->config = event_type; 194 195 return 0; 196 } 197 198 static u64 cmem_lat_pmu_read_status(struct cmem_lat_pmu *cmem_lat_pmu, 199 unsigned int inst) 200 { 201 return readq(cmem_lat_pmu->base[inst] + CMEM_LAT_STATUS); 202 } 203 204 static u64 cmem_lat_pmu_read_cycle_counter(struct perf_event *event) 205 { 206 const unsigned int instance = 0; 207 u64 status; 208 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu); 209 struct device *dev = cmem_lat_pmu->dev; 210 211 /* 212 * Use the reading from first instance since all instances are 213 * identical. 214 */ 215 status = cmem_lat_pmu_read_status(cmem_lat_pmu, instance); 216 if (status & CMEM_LAT_STATUS_CYCLE_OVF) 217 dev_warn(dev, "Cycle counter overflow\n"); 218 219 return readq(cmem_lat_pmu->base[instance] + CMEM_LAT_CYCLE_CNTR); 220 } 221 222 static u64 cmem_lat_pmu_read_req_counter(struct perf_event *event) 223 { 224 unsigned int i; 225 u64 status, val = 0; 226 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu); 227 struct device *dev = cmem_lat_pmu->dev; 228 229 /* Sum up the counts from all instances. */ 230 for (i = 0; i < NUM_INSTANCES; i++) { 231 status = cmem_lat_pmu_read_status(cmem_lat_pmu, i); 232 if (status & CMEM_LAT_STATUS_MC0_REQ_OVF) 233 dev_warn(dev, "MC0 request counter overflow\n"); 234 if (status & CMEM_LAT_STATUS_MC1_REQ_OVF) 235 dev_warn(dev, "MC1 request counter overflow\n"); 236 if (status & CMEM_LAT_STATUS_MC2_REQ_OVF) 237 dev_warn(dev, "MC2 request counter overflow\n"); 238 239 val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC0_REQ_CNTR); 240 val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC1_REQ_CNTR); 241 val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC2_REQ_CNTR); 242 } 243 244 return val; 245 } 246 247 static u64 cmem_lat_pmu_read_aor_counter(struct perf_event *event) 248 { 249 unsigned int i; 250 u64 status, val = 0; 251 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu); 252 struct device *dev = cmem_lat_pmu->dev; 253 254 /* Sum up the counts from all instances. */ 255 for (i = 0; i < NUM_INSTANCES; i++) { 256 status = cmem_lat_pmu_read_status(cmem_lat_pmu, i); 257 if (status & CMEM_LAT_STATUS_MC0_AOR_OVF) 258 dev_warn(dev, "MC0 AOR counter overflow\n"); 259 if (status & CMEM_LAT_STATUS_MC1_AOR_OVF) 260 dev_warn(dev, "MC1 AOR counter overflow\n"); 261 if (status & CMEM_LAT_STATUS_MC2_AOR_OVF) 262 dev_warn(dev, "MC2 AOR counter overflow\n"); 263 264 val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC0_AOR_CNTR); 265 val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC1_AOR_CNTR); 266 val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC2_AOR_CNTR); 267 } 268 269 return val; 270 } 271 272 static u64 (*read_counter_fn[CMEM_LAT_NUM_EVENTS])(struct perf_event *) = { 273 [CMEM_LAT_EVENT_CYCLES] = cmem_lat_pmu_read_cycle_counter, 274 [CMEM_LAT_EVENT_REQ] = cmem_lat_pmu_read_req_counter, 275 [CMEM_LAT_EVENT_AOR] = cmem_lat_pmu_read_aor_counter, 276 }; 277 278 static void cmem_lat_pmu_event_update(struct perf_event *event) 279 { 280 u32 event_type; 281 u64 prev, now; 282 struct hw_perf_event *hwc = &event->hw; 283 284 if (hwc->state & PERF_HES_STOPPED) 285 return; 286 287 event_type = hwc->config; 288 289 do { 290 prev = local64_read(&hwc->prev_count); 291 now = read_counter_fn[event_type](event); 292 } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); 293 294 local64_add(now - prev, &event->count); 295 296 hwc->state |= PERF_HES_UPTODATE; 297 } 298 299 static void cmem_lat_pmu_start(struct perf_event *event, int pmu_flags) 300 { 301 event->hw.state = 0; 302 } 303 304 static void cmem_lat_pmu_stop(struct perf_event *event, int pmu_flags) 305 { 306 event->hw.state |= PERF_HES_STOPPED; 307 } 308 309 static int cmem_lat_pmu_add(struct perf_event *event, int flags) 310 { 311 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu); 312 struct cmem_lat_pmu_hw_events *hw_events = &cmem_lat_pmu->hw_events; 313 struct hw_perf_event *hwc = &event->hw; 314 int idx; 315 316 if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), 317 &cmem_lat_pmu->associated_cpus))) 318 return -ENOENT; 319 320 idx = cmem_lat_pmu_get_event_idx(hw_events, event); 321 if (idx < 0) 322 return idx; 323 324 hw_events->events[idx] = event; 325 hwc->idx = idx; 326 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 327 328 if (flags & PERF_EF_START) 329 cmem_lat_pmu_start(event, PERF_EF_RELOAD); 330 331 /* Propagate changes to the userspace mapping. */ 332 perf_event_update_userpage(event); 333 334 return 0; 335 } 336 337 static void cmem_lat_pmu_del(struct perf_event *event, int flags) 338 { 339 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu); 340 struct cmem_lat_pmu_hw_events *hw_events = &cmem_lat_pmu->hw_events; 341 struct hw_perf_event *hwc = &event->hw; 342 int idx = hwc->idx; 343 344 cmem_lat_pmu_stop(event, PERF_EF_UPDATE); 345 346 hw_events->events[idx] = NULL; 347 348 clear_bit(idx, hw_events->used_ctrs); 349 350 perf_event_update_userpage(event); 351 } 352 353 static void cmem_lat_pmu_read(struct perf_event *event) 354 { 355 cmem_lat_pmu_event_update(event); 356 } 357 358 static inline void cmem_lat_pmu_cg_ctrl(struct cmem_lat_pmu *cmem_lat_pmu, 359 u64 val) 360 { 361 writeq(val, cmem_lat_pmu->base_broadcast + CMEM_LAT_CG_CTRL); 362 } 363 364 static inline void cmem_lat_pmu_ctrl(struct cmem_lat_pmu *cmem_lat_pmu, u64 val) 365 { 366 writeq(val, cmem_lat_pmu->base_broadcast + CMEM_LAT_CTRL); 367 } 368 369 static void cmem_lat_pmu_enable(struct pmu *pmu) 370 { 371 bool disabled; 372 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu); 373 374 disabled = bitmap_empty(cmem_lat_pmu->hw_events.used_ctrs, 375 CMEM_LAT_MAX_ACTIVE_EVENTS); 376 377 if (disabled) 378 return; 379 380 /* Enable all the counters. */ 381 cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_ENABLE); 382 cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_ENABLE); 383 } 384 385 static void cmem_lat_pmu_disable(struct pmu *pmu) 386 { 387 int idx; 388 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu); 389 390 /* Disable all the counters. */ 391 cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_DISABLE); 392 393 /* 394 * The counters will start from 0 again on restart. 395 * Update the events immediately to avoid losing the counts. 396 */ 397 for_each_set_bit(idx, cmem_lat_pmu->hw_events.used_ctrs, 398 CMEM_LAT_MAX_ACTIVE_EVENTS) { 399 struct perf_event *event = cmem_lat_pmu->hw_events.events[idx]; 400 401 if (!event) 402 continue; 403 404 cmem_lat_pmu_event_update(event); 405 406 local64_set(&event->hw.prev_count, 0ULL); 407 } 408 409 cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_CLR); 410 cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_DISABLE); 411 } 412 413 /* PMU identifier attribute. */ 414 415 static ssize_t cmem_lat_pmu_identifier_show(struct device *dev, 416 struct device_attribute *attr, 417 char *page) 418 { 419 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(dev_get_drvdata(dev)); 420 421 return sysfs_emit(page, "%s\n", cmem_lat_pmu->identifier); 422 } 423 424 static struct device_attribute cmem_lat_pmu_identifier_attr = 425 __ATTR(identifier, 0444, cmem_lat_pmu_identifier_show, NULL); 426 427 static struct attribute *cmem_lat_pmu_identifier_attrs[] = { 428 &cmem_lat_pmu_identifier_attr.attr, 429 NULL 430 }; 431 432 static struct attribute_group cmem_lat_pmu_identifier_attr_group = { 433 .attrs = cmem_lat_pmu_identifier_attrs, 434 }; 435 436 /* Format attributes. */ 437 438 #define NV_PMU_EXT_ATTR(_name, _func, _config) \ 439 (&((struct dev_ext_attribute[]){ \ 440 { \ 441 .attr = __ATTR(_name, 0444, _func, NULL), \ 442 .var = (void *)_config \ 443 } \ 444 })[0].attr.attr) 445 446 static struct attribute *cmem_lat_pmu_formats[] = { 447 NV_PMU_EXT_ATTR(event, device_show_string, "config:0-1"), 448 NULL 449 }; 450 451 static const struct attribute_group cmem_lat_pmu_format_group = { 452 .name = "format", 453 .attrs = cmem_lat_pmu_formats, 454 }; 455 456 /* Event attributes. */ 457 458 static ssize_t cmem_lat_pmu_sysfs_event_show(struct device *dev, 459 struct device_attribute *attr, char *buf) 460 { 461 struct perf_pmu_events_attr *pmu_attr; 462 463 pmu_attr = container_of(attr, typeof(*pmu_attr), attr); 464 return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id); 465 } 466 467 #define NV_PMU_EVENT_ATTR(_name, _config) \ 468 PMU_EVENT_ATTR_ID(_name, cmem_lat_pmu_sysfs_event_show, _config) 469 470 static struct attribute *cmem_lat_pmu_events[] = { 471 NV_PMU_EVENT_ATTR(cycles, CMEM_LAT_EVENT_CYCLES), 472 NV_PMU_EVENT_ATTR(rd_req, CMEM_LAT_EVENT_REQ), 473 NV_PMU_EVENT_ATTR(rd_cum_outs, CMEM_LAT_EVENT_AOR), 474 NULL 475 }; 476 477 static const struct attribute_group cmem_lat_pmu_events_group = { 478 .name = "events", 479 .attrs = cmem_lat_pmu_events, 480 }; 481 482 /* Cpumask attributes. */ 483 484 static ssize_t cmem_lat_pmu_cpumask_show(struct device *dev, 485 struct device_attribute *attr, char *buf) 486 { 487 struct pmu *pmu = dev_get_drvdata(dev); 488 struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu); 489 struct dev_ext_attribute *eattr = 490 container_of(attr, struct dev_ext_attribute, attr); 491 unsigned long mask_id = (unsigned long)eattr->var; 492 const cpumask_t *cpumask; 493 494 switch (mask_id) { 495 case CMEM_LAT_ACTIVE_CPU_MASK: 496 cpumask = &cmem_lat_pmu->active_cpu; 497 break; 498 case CMEM_LAT_ASSOCIATED_CPU_MASK: 499 cpumask = &cmem_lat_pmu->associated_cpus; 500 break; 501 default: 502 return 0; 503 } 504 return cpumap_print_to_pagebuf(true, buf, cpumask); 505 } 506 507 #define NV_PMU_CPUMASK_ATTR(_name, _config) \ 508 NV_PMU_EXT_ATTR(_name, cmem_lat_pmu_cpumask_show, \ 509 (unsigned long)_config) 510 511 static struct attribute *cmem_lat_pmu_cpumask_attrs[] = { 512 NV_PMU_CPUMASK_ATTR(cpumask, CMEM_LAT_ACTIVE_CPU_MASK), 513 NV_PMU_CPUMASK_ATTR(associated_cpus, CMEM_LAT_ASSOCIATED_CPU_MASK), 514 NULL 515 }; 516 517 static const struct attribute_group cmem_lat_pmu_cpumask_attr_group = { 518 .attrs = cmem_lat_pmu_cpumask_attrs, 519 }; 520 521 /* Per PMU device attribute groups. */ 522 523 static const struct attribute_group *cmem_lat_pmu_attr_groups[] = { 524 &cmem_lat_pmu_identifier_attr_group, 525 &cmem_lat_pmu_format_group, 526 &cmem_lat_pmu_events_group, 527 &cmem_lat_pmu_cpumask_attr_group, 528 NULL 529 }; 530 531 static int cmem_lat_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 532 { 533 struct cmem_lat_pmu *cmem_lat_pmu = 534 hlist_entry_safe(node, struct cmem_lat_pmu, node); 535 536 if (!cpumask_test_cpu(cpu, &cmem_lat_pmu->associated_cpus)) 537 return 0; 538 539 /* If the PMU is already managed, there is nothing to do */ 540 if (!cpumask_empty(&cmem_lat_pmu->active_cpu)) 541 return 0; 542 543 /* Use this CPU for event counting */ 544 cpumask_set_cpu(cpu, &cmem_lat_pmu->active_cpu); 545 546 return 0; 547 } 548 549 static int cmem_lat_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node) 550 { 551 unsigned int dst; 552 553 struct cmem_lat_pmu *cmem_lat_pmu = 554 hlist_entry_safe(node, struct cmem_lat_pmu, node); 555 556 /* Nothing to do if this CPU doesn't own the PMU */ 557 if (!cpumask_test_and_clear_cpu(cpu, &cmem_lat_pmu->active_cpu)) 558 return 0; 559 560 /* Choose a new CPU to migrate ownership of the PMU to */ 561 dst = cpumask_any_and_but(&cmem_lat_pmu->associated_cpus, 562 cpu_online_mask, cpu); 563 if (dst >= nr_cpu_ids) 564 return 0; 565 566 /* Use this CPU for event counting */ 567 perf_pmu_migrate_context(&cmem_lat_pmu->pmu, cpu, dst); 568 cpumask_set_cpu(dst, &cmem_lat_pmu->active_cpu); 569 570 return 0; 571 } 572 573 static int cmem_lat_pmu_get_cpus(struct cmem_lat_pmu *cmem_lat_pmu, 574 unsigned int socket) 575 { 576 int cpu; 577 578 for_each_possible_cpu(cpu) { 579 if (cpu_to_node(cpu) == socket) 580 cpumask_set_cpu(cpu, &cmem_lat_pmu->associated_cpus); 581 } 582 583 if (cpumask_empty(&cmem_lat_pmu->associated_cpus)) { 584 dev_dbg(cmem_lat_pmu->dev, 585 "No cpu associated with PMU socket-%u\n", socket); 586 return -ENODEV; 587 } 588 589 return 0; 590 } 591 592 static int cmem_lat_pmu_probe(struct platform_device *pdev) 593 { 594 struct device *dev = &pdev->dev; 595 struct acpi_device *acpi_dev; 596 struct cmem_lat_pmu *cmem_lat_pmu; 597 char *name, *uid_str; 598 int ret, i; 599 u32 socket; 600 601 acpi_dev = ACPI_COMPANION(dev); 602 if (!acpi_dev) 603 return -ENODEV; 604 605 uid_str = acpi_device_uid(acpi_dev); 606 if (!uid_str) 607 return -ENODEV; 608 609 ret = kstrtou32(uid_str, 0, &socket); 610 if (ret) 611 return ret; 612 613 cmem_lat_pmu = devm_kzalloc(dev, sizeof(*cmem_lat_pmu), GFP_KERNEL); 614 name = devm_kasprintf(dev, GFP_KERNEL, "nvidia_cmem_latency_pmu_%u", socket); 615 if (!cmem_lat_pmu || !name) 616 return -ENOMEM; 617 618 cmem_lat_pmu->dev = dev; 619 cmem_lat_pmu->name = name; 620 cmem_lat_pmu->identifier = acpi_device_hid(acpi_dev); 621 platform_set_drvdata(pdev, cmem_lat_pmu); 622 623 cmem_lat_pmu->pmu = (struct pmu) { 624 .parent = &pdev->dev, 625 .task_ctx_nr = perf_invalid_context, 626 .pmu_enable = cmem_lat_pmu_enable, 627 .pmu_disable = cmem_lat_pmu_disable, 628 .event_init = cmem_lat_pmu_event_init, 629 .add = cmem_lat_pmu_add, 630 .del = cmem_lat_pmu_del, 631 .start = cmem_lat_pmu_start, 632 .stop = cmem_lat_pmu_stop, 633 .read = cmem_lat_pmu_read, 634 .attr_groups = cmem_lat_pmu_attr_groups, 635 .capabilities = PERF_PMU_CAP_NO_EXCLUDE | 636 PERF_PMU_CAP_NO_INTERRUPT, 637 }; 638 639 /* Map the address of all the instances. */ 640 for (i = 0; i < NUM_INSTANCES; i++) { 641 cmem_lat_pmu->base[i] = devm_platform_ioremap_resource(pdev, i); 642 if (IS_ERR(cmem_lat_pmu->base[i])) { 643 dev_err(dev, "Failed map address for instance %d\n", i); 644 return PTR_ERR(cmem_lat_pmu->base[i]); 645 } 646 } 647 648 /* Map broadcast address. */ 649 cmem_lat_pmu->base_broadcast = devm_platform_ioremap_resource(pdev, 650 NUM_INSTANCES); 651 if (IS_ERR(cmem_lat_pmu->base_broadcast)) { 652 dev_err(dev, "Failed map broadcast address\n"); 653 return PTR_ERR(cmem_lat_pmu->base_broadcast); 654 } 655 656 ret = cmem_lat_pmu_get_cpus(cmem_lat_pmu, socket); 657 if (ret) 658 return ret; 659 660 ret = cpuhp_state_add_instance(cmem_lat_pmu_cpuhp_state, 661 &cmem_lat_pmu->node); 662 if (ret) { 663 dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); 664 return ret; 665 } 666 667 cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_ENABLE); 668 cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_CLR); 669 cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_DISABLE); 670 671 ret = perf_pmu_register(&cmem_lat_pmu->pmu, name, -1); 672 if (ret) { 673 dev_err(&pdev->dev, "Failed to register PMU: %d\n", ret); 674 cpuhp_state_remove_instance(cmem_lat_pmu_cpuhp_state, 675 &cmem_lat_pmu->node); 676 return ret; 677 } 678 679 dev_dbg(&pdev->dev, "Registered %s PMU\n", name); 680 681 return 0; 682 } 683 684 static void cmem_lat_pmu_device_remove(struct platform_device *pdev) 685 { 686 struct cmem_lat_pmu *cmem_lat_pmu = platform_get_drvdata(pdev); 687 688 perf_pmu_unregister(&cmem_lat_pmu->pmu); 689 cpuhp_state_remove_instance(cmem_lat_pmu_cpuhp_state, 690 &cmem_lat_pmu->node); 691 } 692 693 static const struct acpi_device_id cmem_lat_pmu_acpi_match[] = { 694 { "NVDA2021" }, 695 { } 696 }; 697 MODULE_DEVICE_TABLE(acpi, cmem_lat_pmu_acpi_match); 698 699 static struct platform_driver cmem_lat_pmu_driver = { 700 .driver = { 701 .name = "nvidia-t410-cmem-latency-pmu", 702 .acpi_match_table = ACPI_PTR(cmem_lat_pmu_acpi_match), 703 .suppress_bind_attrs = true, 704 }, 705 .probe = cmem_lat_pmu_probe, 706 .remove = cmem_lat_pmu_device_remove, 707 }; 708 709 static int __init cmem_lat_pmu_init(void) 710 { 711 int ret; 712 713 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 714 "perf/nvidia/cmem_latency:online", 715 cmem_lat_pmu_cpu_online, 716 cmem_lat_pmu_cpu_teardown); 717 if (ret < 0) 718 return ret; 719 720 cmem_lat_pmu_cpuhp_state = ret; 721 722 return platform_driver_register(&cmem_lat_pmu_driver); 723 } 724 725 static void __exit cmem_lat_pmu_exit(void) 726 { 727 platform_driver_unregister(&cmem_lat_pmu_driver); 728 cpuhp_remove_multi_state(cmem_lat_pmu_cpuhp_state); 729 } 730 731 module_init(cmem_lat_pmu_init); 732 module_exit(cmem_lat_pmu_exit); 733 734 MODULE_LICENSE("GPL"); 735 MODULE_DESCRIPTION("NVIDIA Tegra410 CPU Memory Latency PMU driver"); 736 MODULE_AUTHOR("Besar Wicaksono <bwicaksono@nvidia.com>"); 737