1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Intel Application Energy Telemetry 5 * 6 * Copyright (C) 2025 Intel Corporation 7 * 8 * Author: 9 * Tony Luck <tony.luck@intel.com> 10 */ 11 12 #define pr_fmt(fmt) "resctrl: " fmt 13 14 #include <linux/bits.h> 15 #include <linux/compiler_types.h> 16 #include <linux/container_of.h> 17 #include <linux/cpumask.h> 18 #include <linux/err.h> 19 #include <linux/errno.h> 20 #include <linux/gfp_types.h> 21 #include <linux/init.h> 22 #include <linux/intel_pmt_features.h> 23 #include <linux/intel_vsec.h> 24 #include <linux/io.h> 25 #include <linux/printk.h> 26 #include <linux/rculist.h> 27 #include <linux/rcupdate.h> 28 #include <linux/resctrl.h> 29 #include <linux/resctrl_types.h> 30 #include <linux/slab.h> 31 #include <linux/stddef.h> 32 #include <linux/topology.h> 33 #include <linux/types.h> 34 35 #include "internal.h" 36 37 /** 38 * struct pmt_event - Telemetry event. 39 * @id: Resctrl event id. 40 * @idx: Counter index within each per-RMID block of counters. 41 * @bin_bits: Zero for integer valued events, else number bits in fraction 42 * part of fixed-point. 43 */ 44 struct pmt_event { 45 enum resctrl_event_id id; 46 unsigned int idx; 47 unsigned int bin_bits; 48 }; 49 50 #define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits } 51 52 /** 53 * struct event_group - Events with the same feature type ("energy" or "perf") and GUID. 54 * @pfname: PMT feature name ("energy" or "perf") of this event group. 55 * Used by boot rdt= option. 56 * @pfg: Points to the aggregated telemetry space information 57 * returned by the intel_pmt_get_regions_by_feature() 58 * call to the INTEL_PMT_TELEMETRY driver that contains 59 * data for all telemetry regions of type @pfname. 60 * Valid if the system supports the event group, 61 * NULL otherwise. 62 * @force_off: True when "rdt" command line or architecture code disables 63 * this event group. 64 * @force_on: True when "rdt" command line overrides disable of this 65 * event group. 66 * @guid: Unique number per XML description file. 67 * @mmio_size: Number of bytes of MMIO registers for this group. 68 * @num_events: Number of events in this group. 69 * @evts: Array of event descriptors. 70 */ 71 struct event_group { 72 /* Data fields for additional structures to manage this group. */ 73 const char *pfname; 74 struct pmt_feature_group *pfg; 75 bool force_off, force_on; 76 77 /* Remaining fields initialized from XML file. */ 78 u32 guid; 79 size_t mmio_size; 80 unsigned int num_events; 81 struct pmt_event evts[] __counted_by(num_events); 82 }; 83 84 #define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \ 85 (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64)) 86 87 /* 88 * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml 89 */ 90 static struct event_group energy_0x26696143 = { 91 .pfname = "energy", 92 .guid = 0x26696143, 93 .mmio_size = XML_MMIO_SIZE(576, 2, 3), 94 .num_events = 2, 95 .evts = { 96 EVT(PMT_EVENT_ENERGY, 0, 18), 97 EVT(PMT_EVENT_ACTIVITY, 1, 18), 98 } 99 }; 100 101 /* 102 * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml 103 */ 104 static struct event_group perf_0x26557651 = { 105 .pfname = "perf", 106 .guid = 0x26557651, 107 .mmio_size = XML_MMIO_SIZE(576, 7, 3), 108 .num_events = 7, 109 .evts = { 110 EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0), 111 EVT(PMT_EVENT_C1_RES, 1, 0), 112 EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0), 113 EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0), 114 EVT(PMT_EVENT_AUTO_C6_RES, 4, 0), 115 EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0), 116 EVT(PMT_EVENT_UOPS_RETIRED, 6, 0), 117 } 118 }; 119 120 static struct event_group *known_event_groups[] = { 121 &energy_0x26696143, 122 &perf_0x26557651, 123 }; 124 125 #define for_each_event_group(_peg) \ 126 for (_peg = known_event_groups; \ 127 _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \ 128 _peg++) 129 130 bool intel_handle_aet_option(bool force_off, char *tok) 131 { 132 struct event_group **peg; 133 bool ret = false; 134 u32 guid = 0; 135 char *name; 136 137 if (!tok) 138 return false; 139 140 name = strsep(&tok, ":"); 141 if (tok && kstrtou32(tok, 16, &guid)) 142 return false; 143 144 for_each_event_group(peg) { 145 if (strcmp(name, (*peg)->pfname)) 146 continue; 147 if (guid && (*peg)->guid != guid) 148 continue; 149 if (force_off) 150 (*peg)->force_off = true; 151 else 152 (*peg)->force_on = true; 153 ret = true; 154 } 155 156 return ret; 157 } 158 159 static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e) 160 { 161 if (tr->guid != e->guid) 162 return true; 163 if (tr->plat_info.package_id >= topology_max_packages()) { 164 pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id, 165 tr->guid); 166 return true; 167 } 168 if (tr->size != e->mmio_size) { 169 pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n", 170 tr->size, e->guid, e->mmio_size); 171 return true; 172 } 173 174 return false; 175 } 176 177 static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p) 178 { 179 bool usable_regions = false; 180 181 for (int i = 0; i < p->count; i++) { 182 if (skip_telem_region(&p->regions[i], e)) { 183 /* 184 * Clear the address field of regions that did not pass the checks in 185 * skip_telem_region() so they will not be used by intel_aet_read_event(). 186 * This is safe to do because intel_pmt_get_regions_by_feature() allocates 187 * a new pmt_feature_group structure to return to each caller and only makes 188 * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group() 189 * returns the structure. 190 */ 191 p->regions[i].addr = NULL; 192 193 continue; 194 } 195 usable_regions = true; 196 } 197 198 return usable_regions; 199 } 200 201 static bool enable_events(struct event_group *e, struct pmt_feature_group *p) 202 { 203 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; 204 int skipped_events = 0; 205 206 if (e->force_off) 207 return false; 208 209 if (!group_has_usable_regions(e, p)) 210 return false; 211 212 for (int j = 0; j < e->num_events; j++) { 213 if (!resctrl_enable_mon_event(e->evts[j].id, true, 214 e->evts[j].bin_bits, &e->evts[j])) 215 skipped_events++; 216 } 217 if (e->num_events == skipped_events) { 218 pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid); 219 return false; 220 } 221 222 return true; 223 } 224 225 static enum pmt_feature_id lookup_pfid(const char *pfname) 226 { 227 if (!strcmp(pfname, "energy")) 228 return FEATURE_PER_RMID_ENERGY_TELEM; 229 else if (!strcmp(pfname, "perf")) 230 return FEATURE_PER_RMID_PERF_TELEM; 231 232 pr_warn("Unknown PMT feature name '%s'\n", pfname); 233 234 return FEATURE_INVALID; 235 } 236 237 /* 238 * Request a copy of struct pmt_feature_group for each event group. If there is 239 * one, the returned structure has an array of telemetry_region structures, 240 * each element of the array describes one telemetry aggregator. The 241 * telemetry aggregators may have different GUIDs so obtain duplicate struct 242 * pmt_feature_group for event groups with same feature type but different 243 * GUID. Post-processing ensures an event group can only use the telemetry 244 * aggregators that match its GUID. An event group keeps a pointer to its 245 * struct pmt_feature_group to indicate that its events are successfully 246 * enabled. 247 */ 248 bool intel_aet_get_events(void) 249 { 250 struct pmt_feature_group *p; 251 enum pmt_feature_id pfid; 252 struct event_group **peg; 253 bool ret = false; 254 255 for_each_event_group(peg) { 256 pfid = lookup_pfid((*peg)->pfname); 257 p = intel_pmt_get_regions_by_feature(pfid); 258 if (IS_ERR_OR_NULL(p)) 259 continue; 260 if (enable_events(*peg, p)) { 261 (*peg)->pfg = p; 262 ret = true; 263 } else { 264 intel_pmt_put_feature_group(p); 265 } 266 } 267 268 return ret; 269 } 270 271 void __exit intel_aet_exit(void) 272 { 273 struct event_group **peg; 274 275 for_each_event_group(peg) { 276 if ((*peg)->pfg) { 277 intel_pmt_put_feature_group((*peg)->pfg); 278 (*peg)->pfg = NULL; 279 } 280 } 281 } 282 283 #define DATA_VALID BIT_ULL(63) 284 #define DATA_BITS GENMASK_ULL(62, 0) 285 286 /* 287 * Read counter for an event on a domain (summing all aggregators on the 288 * domain). If an aggregator hasn't received any data for a specific RMID, 289 * the MMIO read indicates that data is not valid. Return success if at 290 * least one aggregator has valid data. 291 */ 292 int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val) 293 { 294 struct pmt_event *pevt = arch_priv; 295 struct event_group *e; 296 bool valid = false; 297 u64 total = 0; 298 u64 evtcount; 299 void *pevt0; 300 u32 idx; 301 302 pevt0 = pevt - pevt->idx; 303 e = container_of(pevt0, struct event_group, evts); 304 idx = rmid * e->num_events; 305 idx += pevt->idx; 306 307 if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) { 308 pr_warn_once("MMIO index %u out of range\n", idx); 309 return -EIO; 310 } 311 312 for (int i = 0; i < e->pfg->count; i++) { 313 if (!e->pfg->regions[i].addr) 314 continue; 315 if (e->pfg->regions[i].plat_info.package_id != domid) 316 continue; 317 evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64)); 318 if (!(evtcount & DATA_VALID)) 319 continue; 320 total += evtcount & DATA_BITS; 321 valid = true; 322 } 323 324 if (valid) 325 *val = total; 326 327 return valid ? 0 : -EINVAL; 328 } 329 330 void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r, 331 struct list_head *add_pos) 332 { 333 struct rdt_perf_pkg_mon_domain *d; 334 int err; 335 336 d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); 337 if (!d) 338 return; 339 340 d->hdr.id = id; 341 d->hdr.type = RESCTRL_MON_DOMAIN; 342 d->hdr.rid = RDT_RESOURCE_PERF_PKG; 343 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 344 list_add_tail_rcu(&d->hdr.list, add_pos); 345 346 err = resctrl_online_mon_domain(r, &d->hdr); 347 if (err) { 348 list_del_rcu(&d->hdr.list); 349 synchronize_rcu(); 350 kfree(d); 351 } 352 } 353