11fb2daa6STony Luck // SPDX-License-Identifier: GPL-2.0-only 21fb2daa6STony Luck /* 31fb2daa6STony Luck * Resource Director Technology(RDT) 41fb2daa6STony Luck * - Intel Application Energy Telemetry 51fb2daa6STony Luck * 61fb2daa6STony Luck * Copyright (C) 2025 Intel Corporation 71fb2daa6STony Luck * 81fb2daa6STony Luck * Author: 91fb2daa6STony Luck * Tony Luck <tony.luck@intel.com> 101fb2daa6STony Luck */ 111fb2daa6STony Luck 121fb2daa6STony Luck #define pr_fmt(fmt) "resctrl: " fmt 131fb2daa6STony Luck 1451541f6cSTony Luck #include <linux/bits.h> 158f6b6ad6STony Luck #include <linux/compiler_types.h> 1651541f6cSTony Luck #include <linux/container_of.h> 17f4e0cd80STony Luck #include <linux/cpumask.h> 181fb2daa6STony Luck #include <linux/err.h> 1951541f6cSTony Luck #include <linux/errno.h> 20f4e0cd80STony Luck #include <linux/gfp_types.h> 211fb2daa6STony Luck #include <linux/init.h> 221fb2daa6STony Luck #include <linux/intel_pmt_features.h> 231fb2daa6STony Luck #include <linux/intel_vsec.h> 2451541f6cSTony Luck #include <linux/io.h> 2567640e33STony Luck #include <linux/minmax.h> 267e6df961STony Luck #include <linux/printk.h> 27f4e0cd80STony Luck #include <linux/rculist.h> 28f4e0cd80STony Luck #include <linux/rcupdate.h> 291fb2daa6STony Luck #include <linux/resctrl.h> 308f6b6ad6STony Luck #include <linux/resctrl_types.h> 31f4e0cd80STony Luck #include <linux/slab.h> 321fb2daa6STony Luck #include <linux/stddef.h> 337e6df961STony Luck #include <linux/topology.h> 348f6b6ad6STony Luck #include <linux/types.h> 351fb2daa6STony Luck 361fb2daa6STony Luck #include "internal.h" 371fb2daa6STony Luck 381fb2daa6STony Luck /** 398f6b6ad6STony Luck * struct pmt_event - Telemetry event. 408f6b6ad6STony Luck * @id: Resctrl event id. 418f6b6ad6STony Luck * @idx: Counter index within each per-RMID block of counters. 428f6b6ad6STony Luck * @bin_bits: Zero for integer valued events, else number bits in fraction 438f6b6ad6STony Luck * part of fixed-point. 448f6b6ad6STony Luck */ 458f6b6ad6STony Luck struct pmt_event { 468f6b6ad6STony Luck enum resctrl_event_id id; 478f6b6ad6STony Luck unsigned int idx; 488f6b6ad6STony Luck unsigned int bin_bits; 498f6b6ad6STony Luck }; 508f6b6ad6STony Luck 518f6b6ad6STony Luck #define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits } 528f6b6ad6STony Luck 538f6b6ad6STony Luck /** 541fb2daa6STony Luck * struct event_group - Events with the same feature type ("energy" or "perf") and GUID. 551fb2daa6STony Luck * @pfname: PMT feature name ("energy" or "perf") of this event group. 56842e7f97STony Luck * Used by boot rdt= option. 571fb2daa6STony Luck * @pfg: Points to the aggregated telemetry space information 581fb2daa6STony Luck * returned by the intel_pmt_get_regions_by_feature() 591fb2daa6STony Luck * call to the INTEL_PMT_TELEMETRY driver that contains 601fb2daa6STony Luck * data for all telemetry regions of type @pfname. 611fb2daa6STony Luck * Valid if the system supports the event group, 621fb2daa6STony Luck * NULL otherwise. 63842e7f97STony Luck * @force_off: True when "rdt" command line or architecture code disables 6467640e33STony Luck * this event group due to insufficient RMIDs. 65842e7f97STony Luck * @force_on: True when "rdt" command line overrides disable of this 66842e7f97STony Luck * event group. 678f6b6ad6STony Luck * @guid: Unique number per XML description file. 6867640e33STony Luck * @num_rmid: Number of RMIDs supported by this group. May be 6967640e33STony Luck * adjusted downwards if enumeration from 7067640e33STony Luck * intel_pmt_get_regions_by_feature() indicates fewer 7167640e33STony Luck * RMIDs can be tracked simultaneously. 728f6b6ad6STony Luck * @mmio_size: Number of bytes of MMIO registers for this group. 738f6b6ad6STony Luck * @num_events: Number of events in this group. 748f6b6ad6STony Luck * @evts: Array of event descriptors. 751fb2daa6STony Luck */ 761fb2daa6STony Luck struct event_group { 771fb2daa6STony Luck /* Data fields for additional structures to manage this group. */ 781fb2daa6STony Luck const char *pfname; 791fb2daa6STony Luck struct pmt_feature_group *pfg; 80842e7f97STony Luck bool force_off, force_on; 818f6b6ad6STony Luck 828f6b6ad6STony Luck /* Remaining fields initialized from XML file. */ 838f6b6ad6STony Luck u32 guid; 8467640e33STony Luck u32 num_rmid; 858f6b6ad6STony Luck size_t mmio_size; 868f6b6ad6STony Luck unsigned int num_events; 878f6b6ad6STony Luck struct pmt_event evts[] __counted_by(num_events); 888f6b6ad6STony Luck }; 898f6b6ad6STony Luck 908f6b6ad6STony Luck #define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \ 918f6b6ad6STony Luck (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64)) 928f6b6ad6STony Luck 938f6b6ad6STony Luck /* 948f6b6ad6STony Luck * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml 958f6b6ad6STony Luck */ 968f6b6ad6STony Luck static struct event_group energy_0x26696143 = { 978f6b6ad6STony Luck .pfname = "energy", 988f6b6ad6STony Luck .guid = 0x26696143, 9967640e33STony Luck .num_rmid = 576, 1008f6b6ad6STony Luck .mmio_size = XML_MMIO_SIZE(576, 2, 3), 1018f6b6ad6STony Luck .num_events = 2, 1028f6b6ad6STony Luck .evts = { 1038f6b6ad6STony Luck EVT(PMT_EVENT_ENERGY, 0, 18), 1048f6b6ad6STony Luck EVT(PMT_EVENT_ACTIVITY, 1, 18), 1058f6b6ad6STony Luck } 1068f6b6ad6STony Luck }; 1078f6b6ad6STony Luck 1088f6b6ad6STony Luck /* 1098f6b6ad6STony Luck * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml 1108f6b6ad6STony Luck */ 1118f6b6ad6STony Luck static struct event_group perf_0x26557651 = { 1128f6b6ad6STony Luck .pfname = "perf", 1138f6b6ad6STony Luck .guid = 0x26557651, 11467640e33STony Luck .num_rmid = 576, 1158f6b6ad6STony Luck .mmio_size = XML_MMIO_SIZE(576, 7, 3), 1168f6b6ad6STony Luck .num_events = 7, 1178f6b6ad6STony Luck .evts = { 1188f6b6ad6STony Luck EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0), 1198f6b6ad6STony Luck EVT(PMT_EVENT_C1_RES, 1, 0), 1208f6b6ad6STony Luck EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0), 1218f6b6ad6STony Luck EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0), 1228f6b6ad6STony Luck EVT(PMT_EVENT_AUTO_C6_RES, 4, 0), 1238f6b6ad6STony Luck EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0), 1248f6b6ad6STony Luck EVT(PMT_EVENT_UOPS_RETIRED, 6, 0), 1258f6b6ad6STony Luck } 1261fb2daa6STony Luck }; 1271fb2daa6STony Luck 1281fb2daa6STony Luck static struct event_group *known_event_groups[] = { 1298f6b6ad6STony Luck &energy_0x26696143, 1308f6b6ad6STony Luck &perf_0x26557651, 1311fb2daa6STony Luck }; 1321fb2daa6STony Luck 1331fb2daa6STony Luck #define for_each_event_group(_peg) \ 1341fb2daa6STony Luck for (_peg = known_event_groups; \ 1351fb2daa6STony Luck _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \ 1361fb2daa6STony Luck _peg++) 1371fb2daa6STony Luck 138842e7f97STony Luck bool intel_handle_aet_option(bool force_off, char *tok) 139842e7f97STony Luck { 140842e7f97STony Luck struct event_group **peg; 141842e7f97STony Luck bool ret = false; 142842e7f97STony Luck u32 guid = 0; 143842e7f97STony Luck char *name; 144842e7f97STony Luck 145842e7f97STony Luck if (!tok) 146842e7f97STony Luck return false; 147842e7f97STony Luck 148842e7f97STony Luck name = strsep(&tok, ":"); 149842e7f97STony Luck if (tok && kstrtou32(tok, 16, &guid)) 150842e7f97STony Luck return false; 151842e7f97STony Luck 152842e7f97STony Luck for_each_event_group(peg) { 153842e7f97STony Luck if (strcmp(name, (*peg)->pfname)) 154842e7f97STony Luck continue; 155842e7f97STony Luck if (guid && (*peg)->guid != guid) 156842e7f97STony Luck continue; 157842e7f97STony Luck if (force_off) 158842e7f97STony Luck (*peg)->force_off = true; 159842e7f97STony Luck else 160842e7f97STony Luck (*peg)->force_on = true; 161842e7f97STony Luck ret = true; 162842e7f97STony Luck } 163842e7f97STony Luck 164842e7f97STony Luck return ret; 165842e7f97STony Luck } 166842e7f97STony Luck 1677e6df961STony Luck static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e) 1687e6df961STony Luck { 1697e6df961STony Luck if (tr->guid != e->guid) 1707e6df961STony Luck return true; 1717e6df961STony Luck if (tr->plat_info.package_id >= topology_max_packages()) { 1727e6df961STony Luck pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id, 1737e6df961STony Luck tr->guid); 1747e6df961STony Luck return true; 1757e6df961STony Luck } 1767e6df961STony Luck if (tr->size != e->mmio_size) { 1777e6df961STony Luck pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n", 1787e6df961STony Luck tr->size, e->guid, e->mmio_size); 1797e6df961STony Luck return true; 1807e6df961STony Luck } 1817e6df961STony Luck 1827e6df961STony Luck return false; 1837e6df961STony Luck } 1847e6df961STony Luck 1857e6df961STony Luck static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p) 1867e6df961STony Luck { 1877e6df961STony Luck bool usable_regions = false; 1887e6df961STony Luck 1897e6df961STony Luck for (int i = 0; i < p->count; i++) { 1907e6df961STony Luck if (skip_telem_region(&p->regions[i], e)) { 1917e6df961STony Luck /* 1927e6df961STony Luck * Clear the address field of regions that did not pass the checks in 1937e6df961STony Luck * skip_telem_region() so they will not be used by intel_aet_read_event(). 1947e6df961STony Luck * This is safe to do because intel_pmt_get_regions_by_feature() allocates 1957e6df961STony Luck * a new pmt_feature_group structure to return to each caller and only makes 1967e6df961STony Luck * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group() 1977e6df961STony Luck * returns the structure. 1987e6df961STony Luck */ 1997e6df961STony Luck p->regions[i].addr = NULL; 2007e6df961STony Luck 2017e6df961STony Luck continue; 2027e6df961STony Luck } 2037e6df961STony Luck usable_regions = true; 2047e6df961STony Luck } 2057e6df961STony Luck 2067e6df961STony Luck return usable_regions; 2077e6df961STony Luck } 2087e6df961STony Luck 20967640e33STony Luck static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p) 21067640e33STony Luck { 21167640e33STony Luck struct telemetry_region *tr; 21267640e33STony Luck 21367640e33STony Luck for (int i = 0; i < p->count; i++) { 21467640e33STony Luck if (!p->regions[i].addr) 21567640e33STony Luck continue; 21667640e33STony Luck tr = &p->regions[i]; 21767640e33STony Luck if (tr->num_rmids < e->num_rmid) { 21867640e33STony Luck e->force_off = true; 21967640e33STony Luck return false; 22067640e33STony Luck } 22167640e33STony Luck } 22267640e33STony Luck 22367640e33STony Luck return true; 22467640e33STony Luck } 22567640e33STony Luck 2261fb2daa6STony Luck static bool enable_events(struct event_group *e, struct pmt_feature_group *p) 2271fb2daa6STony Luck { 2287e6df961STony Luck struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; 2297e6df961STony Luck int skipped_events = 0; 2307e6df961STony Luck 231842e7f97STony Luck if (e->force_off) 232842e7f97STony Luck return false; 233842e7f97STony Luck 2347e6df961STony Luck if (!group_has_usable_regions(e, p)) 2351fb2daa6STony Luck return false; 2367e6df961STony Luck 23767640e33STony Luck /* 23867640e33STony Luck * Only enable event group with insufficient RMIDs if the user requested 23967640e33STony Luck * it from the kernel command line. 24067640e33STony Luck */ 24167640e33STony Luck if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) { 24267640e33STony Luck pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n", 24367640e33STony Luck r->name, e->pfname, e->guid); 24467640e33STony Luck return false; 24567640e33STony Luck } 24667640e33STony Luck 24767640e33STony Luck for (int i = 0; i < p->count; i++) { 24867640e33STony Luck if (!p->regions[i].addr) 24967640e33STony Luck continue; 25067640e33STony Luck /* 25167640e33STony Luck * e->num_rmid only adjusted lower if user (via rdt= kernel 25267640e33STony Luck * parameter) forces an event group with insufficient RMID 25367640e33STony Luck * to be enabled. 25467640e33STony Luck */ 25567640e33STony Luck e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids); 25667640e33STony Luck } 25767640e33STony Luck 2587e6df961STony Luck for (int j = 0; j < e->num_events; j++) { 2597e6df961STony Luck if (!resctrl_enable_mon_event(e->evts[j].id, true, 2607e6df961STony Luck e->evts[j].bin_bits, &e->evts[j])) 2617e6df961STony Luck skipped_events++; 2627e6df961STony Luck } 2637e6df961STony Luck if (e->num_events == skipped_events) { 2647e6df961STony Luck pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid); 2657e6df961STony Luck return false; 2667e6df961STony Luck } 2677e6df961STony Luck 26867640e33STony Luck if (r->mon.num_rmid) 26967640e33STony Luck r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid); 27067640e33STony Luck else 27167640e33STony Luck r->mon.num_rmid = e->num_rmid; 27267640e33STony Luck 273*4bbfc901STony Luck if (skipped_events) 274*4bbfc901STony Luck pr_info("%s %s:0x%x monitoring detected (skipped %d events)\n", r->name, 275*4bbfc901STony Luck e->pfname, e->guid, skipped_events); 276*4bbfc901STony Luck else 277*4bbfc901STony Luck pr_info("%s %s:0x%x monitoring detected\n", r->name, e->pfname, e->guid); 278*4bbfc901STony Luck 2797e6df961STony Luck return true; 2801fb2daa6STony Luck } 2811fb2daa6STony Luck 2821fb2daa6STony Luck static enum pmt_feature_id lookup_pfid(const char *pfname) 2831fb2daa6STony Luck { 2841fb2daa6STony Luck if (!strcmp(pfname, "energy")) 2851fb2daa6STony Luck return FEATURE_PER_RMID_ENERGY_TELEM; 2861fb2daa6STony Luck else if (!strcmp(pfname, "perf")) 2871fb2daa6STony Luck return FEATURE_PER_RMID_PERF_TELEM; 2881fb2daa6STony Luck 2891fb2daa6STony Luck pr_warn("Unknown PMT feature name '%s'\n", pfname); 2901fb2daa6STony Luck 2911fb2daa6STony Luck return FEATURE_INVALID; 2921fb2daa6STony Luck } 2931fb2daa6STony Luck 2941fb2daa6STony Luck /* 2951fb2daa6STony Luck * Request a copy of struct pmt_feature_group for each event group. If there is 2961fb2daa6STony Luck * one, the returned structure has an array of telemetry_region structures, 2971fb2daa6STony Luck * each element of the array describes one telemetry aggregator. The 2981fb2daa6STony Luck * telemetry aggregators may have different GUIDs so obtain duplicate struct 2991fb2daa6STony Luck * pmt_feature_group for event groups with same feature type but different 3001fb2daa6STony Luck * GUID. Post-processing ensures an event group can only use the telemetry 3011fb2daa6STony Luck * aggregators that match its GUID. An event group keeps a pointer to its 3021fb2daa6STony Luck * struct pmt_feature_group to indicate that its events are successfully 3031fb2daa6STony Luck * enabled. 3041fb2daa6STony Luck */ 3051fb2daa6STony Luck bool intel_aet_get_events(void) 3061fb2daa6STony Luck { 3071fb2daa6STony Luck struct pmt_feature_group *p; 3081fb2daa6STony Luck enum pmt_feature_id pfid; 3091fb2daa6STony Luck struct event_group **peg; 3101fb2daa6STony Luck bool ret = false; 3111fb2daa6STony Luck 3121fb2daa6STony Luck for_each_event_group(peg) { 3131fb2daa6STony Luck pfid = lookup_pfid((*peg)->pfname); 3141fb2daa6STony Luck p = intel_pmt_get_regions_by_feature(pfid); 3151fb2daa6STony Luck if (IS_ERR_OR_NULL(p)) 3161fb2daa6STony Luck continue; 3171fb2daa6STony Luck if (enable_events(*peg, p)) { 3181fb2daa6STony Luck (*peg)->pfg = p; 3191fb2daa6STony Luck ret = true; 3201fb2daa6STony Luck } else { 3211fb2daa6STony Luck intel_pmt_put_feature_group(p); 3221fb2daa6STony Luck } 3231fb2daa6STony Luck } 3241fb2daa6STony Luck 3251fb2daa6STony Luck return ret; 3261fb2daa6STony Luck } 3271fb2daa6STony Luck 3281fb2daa6STony Luck void __exit intel_aet_exit(void) 3291fb2daa6STony Luck { 3301fb2daa6STony Luck struct event_group **peg; 3311fb2daa6STony Luck 3321fb2daa6STony Luck for_each_event_group(peg) { 3331fb2daa6STony Luck if ((*peg)->pfg) { 3341fb2daa6STony Luck intel_pmt_put_feature_group((*peg)->pfg); 3351fb2daa6STony Luck (*peg)->pfg = NULL; 3361fb2daa6STony Luck } 3371fb2daa6STony Luck } 3381fb2daa6STony Luck } 33951541f6cSTony Luck 34051541f6cSTony Luck #define DATA_VALID BIT_ULL(63) 34151541f6cSTony Luck #define DATA_BITS GENMASK_ULL(62, 0) 34251541f6cSTony Luck 34351541f6cSTony Luck /* 34451541f6cSTony Luck * Read counter for an event on a domain (summing all aggregators on the 34551541f6cSTony Luck * domain). If an aggregator hasn't received any data for a specific RMID, 34651541f6cSTony Luck * the MMIO read indicates that data is not valid. Return success if at 34751541f6cSTony Luck * least one aggregator has valid data. 34851541f6cSTony Luck */ 34951541f6cSTony Luck int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val) 35051541f6cSTony Luck { 35151541f6cSTony Luck struct pmt_event *pevt = arch_priv; 35251541f6cSTony Luck struct event_group *e; 35351541f6cSTony Luck bool valid = false; 35451541f6cSTony Luck u64 total = 0; 35551541f6cSTony Luck u64 evtcount; 35651541f6cSTony Luck void *pevt0; 35751541f6cSTony Luck u32 idx; 35851541f6cSTony Luck 35951541f6cSTony Luck pevt0 = pevt - pevt->idx; 36051541f6cSTony Luck e = container_of(pevt0, struct event_group, evts); 36151541f6cSTony Luck idx = rmid * e->num_events; 36251541f6cSTony Luck idx += pevt->idx; 36351541f6cSTony Luck 36451541f6cSTony Luck if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) { 36551541f6cSTony Luck pr_warn_once("MMIO index %u out of range\n", idx); 36651541f6cSTony Luck return -EIO; 36751541f6cSTony Luck } 36851541f6cSTony Luck 36951541f6cSTony Luck for (int i = 0; i < e->pfg->count; i++) { 37051541f6cSTony Luck if (!e->pfg->regions[i].addr) 37151541f6cSTony Luck continue; 37251541f6cSTony Luck if (e->pfg->regions[i].plat_info.package_id != domid) 37351541f6cSTony Luck continue; 37451541f6cSTony Luck evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64)); 37551541f6cSTony Luck if (!(evtcount & DATA_VALID)) 37651541f6cSTony Luck continue; 37751541f6cSTony Luck total += evtcount & DATA_BITS; 37851541f6cSTony Luck valid = true; 37951541f6cSTony Luck } 38051541f6cSTony Luck 38151541f6cSTony Luck if (valid) 38251541f6cSTony Luck *val = total; 38351541f6cSTony Luck 38451541f6cSTony Luck return valid ? 0 : -EINVAL; 38551541f6cSTony Luck } 386f4e0cd80STony Luck 387f4e0cd80STony Luck void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r, 388f4e0cd80STony Luck struct list_head *add_pos) 389f4e0cd80STony Luck { 390f4e0cd80STony Luck struct rdt_perf_pkg_mon_domain *d; 391f4e0cd80STony Luck int err; 392f4e0cd80STony Luck 393f4e0cd80STony Luck d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); 394f4e0cd80STony Luck if (!d) 395f4e0cd80STony Luck return; 396f4e0cd80STony Luck 397f4e0cd80STony Luck d->hdr.id = id; 398f4e0cd80STony Luck d->hdr.type = RESCTRL_MON_DOMAIN; 399f4e0cd80STony Luck d->hdr.rid = RDT_RESOURCE_PERF_PKG; 400f4e0cd80STony Luck cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 401f4e0cd80STony Luck list_add_tail_rcu(&d->hdr.list, add_pos); 402f4e0cd80STony Luck 403f4e0cd80STony Luck err = resctrl_online_mon_domain(r, &d->hdr); 404f4e0cd80STony Luck if (err) { 405f4e0cd80STony Luck list_del_rcu(&d->hdr.list); 406f4e0cd80STony Luck synchronize_rcu(); 407f4e0cd80STony Luck kfree(d); 408f4e0cd80STony Luck } 409f4e0cd80STony Luck } 410