1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Resource Director Technology(RDT) 4 * - Intel Application Energy Telemetry 5 * 6 * Copyright (C) 2025 Intel Corporation 7 * 8 * Author: 9 * Tony Luck <tony.luck@intel.com> 10 */ 11 12 #define pr_fmt(fmt) "resctrl: " fmt 13 14 #include <linux/bits.h> 15 #include <linux/compiler_types.h> 16 #include <linux/container_of.h> 17 #include <linux/cpumask.h> 18 #include <linux/err.h> 19 #include <linux/errno.h> 20 #include <linux/gfp_types.h> 21 #include <linux/init.h> 22 #include <linux/intel_pmt_features.h> 23 #include <linux/intel_vsec.h> 24 #include <linux/io.h> 25 #include <linux/minmax.h> 26 #include <linux/printk.h> 27 #include <linux/rculist.h> 28 #include <linux/rcupdate.h> 29 #include <linux/resctrl.h> 30 #include <linux/resctrl_types.h> 31 #include <linux/slab.h> 32 #include <linux/stddef.h> 33 #include <linux/topology.h> 34 #include <linux/types.h> 35 36 #include "internal.h" 37 38 /** 39 * struct pmt_event - Telemetry event. 40 * @id: Resctrl event id. 41 * @idx: Counter index within each per-RMID block of counters. 42 * @bin_bits: Zero for integer valued events, else number bits in fraction 43 * part of fixed-point. 44 */ 45 struct pmt_event { 46 enum resctrl_event_id id; 47 unsigned int idx; 48 unsigned int bin_bits; 49 }; 50 51 #define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits } 52 53 /** 54 * struct event_group - Events with the same feature type ("energy" or "perf") and GUID. 55 * @pfname: PMT feature name ("energy" or "perf") of this event group. 56 * Used by boot rdt= option. 57 * @pfg: Points to the aggregated telemetry space information 58 * returned by the intel_pmt_get_regions_by_feature() 59 * call to the INTEL_PMT_TELEMETRY driver that contains 60 * data for all telemetry regions of type @pfname. 61 * Valid if the system supports the event group, 62 * NULL otherwise. 63 * @force_off: True when "rdt" command line or architecture code disables 64 * this event group due to insufficient RMIDs. 65 * @force_on: True when "rdt" command line overrides disable of this 66 * event group. 67 * @guid: Unique number per XML description file. 68 * @num_rmid: Number of RMIDs supported by this group. May be 69 * adjusted downwards if enumeration from 70 * intel_pmt_get_regions_by_feature() indicates fewer 71 * RMIDs can be tracked simultaneously. 72 * @mmio_size: Number of bytes of MMIO registers for this group. 73 * @num_events: Number of events in this group. 74 * @evts: Array of event descriptors. 75 */ 76 struct event_group { 77 /* Data fields for additional structures to manage this group. */ 78 const char *pfname; 79 struct pmt_feature_group *pfg; 80 bool force_off, force_on; 81 82 /* Remaining fields initialized from XML file. */ 83 u32 guid; 84 u32 num_rmid; 85 size_t mmio_size; 86 unsigned int num_events; 87 struct pmt_event evts[] __counted_by(num_events); 88 }; 89 90 #define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \ 91 (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64)) 92 93 /* 94 * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml 95 */ 96 static struct event_group energy_0x26696143 = { 97 .pfname = "energy", 98 .guid = 0x26696143, 99 .num_rmid = 576, 100 .mmio_size = XML_MMIO_SIZE(576, 2, 3), 101 .num_events = 2, 102 .evts = { 103 EVT(PMT_EVENT_ENERGY, 0, 18), 104 EVT(PMT_EVENT_ACTIVITY, 1, 18), 105 } 106 }; 107 108 /* 109 * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml 110 */ 111 static struct event_group perf_0x26557651 = { 112 .pfname = "perf", 113 .guid = 0x26557651, 114 .num_rmid = 576, 115 .mmio_size = XML_MMIO_SIZE(576, 7, 3), 116 .num_events = 7, 117 .evts = { 118 EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0), 119 EVT(PMT_EVENT_C1_RES, 1, 0), 120 EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0), 121 EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0), 122 EVT(PMT_EVENT_AUTO_C6_RES, 4, 0), 123 EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0), 124 EVT(PMT_EVENT_UOPS_RETIRED, 6, 0), 125 } 126 }; 127 128 static struct event_group *known_event_groups[] = { 129 &energy_0x26696143, 130 &perf_0x26557651, 131 }; 132 133 #define for_each_event_group(_peg) \ 134 for (_peg = known_event_groups; \ 135 _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)]; \ 136 _peg++) 137 138 bool intel_handle_aet_option(bool force_off, char *tok) 139 { 140 struct event_group **peg; 141 bool ret = false; 142 u32 guid = 0; 143 char *name; 144 145 if (!tok) 146 return false; 147 148 name = strsep(&tok, ":"); 149 if (tok && kstrtou32(tok, 16, &guid)) 150 return false; 151 152 for_each_event_group(peg) { 153 if (strcmp(name, (*peg)->pfname)) 154 continue; 155 if (guid && (*peg)->guid != guid) 156 continue; 157 if (force_off) 158 (*peg)->force_off = true; 159 else 160 (*peg)->force_on = true; 161 ret = true; 162 } 163 164 return ret; 165 } 166 167 static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e) 168 { 169 if (tr->guid != e->guid) 170 return true; 171 if (tr->plat_info.package_id >= topology_max_packages()) { 172 pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id, 173 tr->guid); 174 return true; 175 } 176 if (tr->size != e->mmio_size) { 177 pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n", 178 tr->size, e->guid, e->mmio_size); 179 return true; 180 } 181 182 return false; 183 } 184 185 static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p) 186 { 187 bool usable_regions = false; 188 189 for (int i = 0; i < p->count; i++) { 190 if (skip_telem_region(&p->regions[i], e)) { 191 /* 192 * Clear the address field of regions that did not pass the checks in 193 * skip_telem_region() so they will not be used by intel_aet_read_event(). 194 * This is safe to do because intel_pmt_get_regions_by_feature() allocates 195 * a new pmt_feature_group structure to return to each caller and only makes 196 * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group() 197 * returns the structure. 198 */ 199 p->regions[i].addr = NULL; 200 201 continue; 202 } 203 usable_regions = true; 204 } 205 206 return usable_regions; 207 } 208 209 static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p) 210 { 211 struct telemetry_region *tr; 212 213 for (int i = 0; i < p->count; i++) { 214 if (!p->regions[i].addr) 215 continue; 216 tr = &p->regions[i]; 217 if (tr->num_rmids < e->num_rmid) { 218 e->force_off = true; 219 return false; 220 } 221 } 222 223 return true; 224 } 225 226 static bool enable_events(struct event_group *e, struct pmt_feature_group *p) 227 { 228 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl; 229 int skipped_events = 0; 230 231 if (e->force_off) 232 return false; 233 234 if (!group_has_usable_regions(e, p)) 235 return false; 236 237 /* 238 * Only enable event group with insufficient RMIDs if the user requested 239 * it from the kernel command line. 240 */ 241 if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) { 242 pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n", 243 r->name, e->pfname, e->guid); 244 return false; 245 } 246 247 for (int i = 0; i < p->count; i++) { 248 if (!p->regions[i].addr) 249 continue; 250 /* 251 * e->num_rmid only adjusted lower if user (via rdt= kernel 252 * parameter) forces an event group with insufficient RMID 253 * to be enabled. 254 */ 255 e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids); 256 } 257 258 for (int j = 0; j < e->num_events; j++) { 259 if (!resctrl_enable_mon_event(e->evts[j].id, true, 260 e->evts[j].bin_bits, &e->evts[j])) 261 skipped_events++; 262 } 263 if (e->num_events == skipped_events) { 264 pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid); 265 return false; 266 } 267 268 if (r->mon.num_rmid) 269 r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid); 270 else 271 r->mon.num_rmid = e->num_rmid; 272 273 if (skipped_events) 274 pr_info("%s %s:0x%x monitoring detected (skipped %d events)\n", r->name, 275 e->pfname, e->guid, skipped_events); 276 else 277 pr_info("%s %s:0x%x monitoring detected\n", r->name, e->pfname, e->guid); 278 279 return true; 280 } 281 282 static enum pmt_feature_id lookup_pfid(const char *pfname) 283 { 284 if (!strcmp(pfname, "energy")) 285 return FEATURE_PER_RMID_ENERGY_TELEM; 286 else if (!strcmp(pfname, "perf")) 287 return FEATURE_PER_RMID_PERF_TELEM; 288 289 pr_warn("Unknown PMT feature name '%s'\n", pfname); 290 291 return FEATURE_INVALID; 292 } 293 294 /* 295 * Request a copy of struct pmt_feature_group for each event group. If there is 296 * one, the returned structure has an array of telemetry_region structures, 297 * each element of the array describes one telemetry aggregator. The 298 * telemetry aggregators may have different GUIDs so obtain duplicate struct 299 * pmt_feature_group for event groups with same feature type but different 300 * GUID. Post-processing ensures an event group can only use the telemetry 301 * aggregators that match its GUID. An event group keeps a pointer to its 302 * struct pmt_feature_group to indicate that its events are successfully 303 * enabled. 304 */ 305 bool intel_aet_get_events(void) 306 { 307 struct pmt_feature_group *p; 308 enum pmt_feature_id pfid; 309 struct event_group **peg; 310 bool ret = false; 311 312 for_each_event_group(peg) { 313 pfid = lookup_pfid((*peg)->pfname); 314 p = intel_pmt_get_regions_by_feature(pfid); 315 if (IS_ERR_OR_NULL(p)) 316 continue; 317 if (enable_events(*peg, p)) { 318 (*peg)->pfg = p; 319 ret = true; 320 } else { 321 intel_pmt_put_feature_group(p); 322 } 323 } 324 325 return ret; 326 } 327 328 void __exit intel_aet_exit(void) 329 { 330 struct event_group **peg; 331 332 for_each_event_group(peg) { 333 if ((*peg)->pfg) { 334 intel_pmt_put_feature_group((*peg)->pfg); 335 (*peg)->pfg = NULL; 336 } 337 } 338 } 339 340 #define DATA_VALID BIT_ULL(63) 341 #define DATA_BITS GENMASK_ULL(62, 0) 342 343 /* 344 * Read counter for an event on a domain (summing all aggregators on the 345 * domain). If an aggregator hasn't received any data for a specific RMID, 346 * the MMIO read indicates that data is not valid. Return success if at 347 * least one aggregator has valid data. 348 */ 349 int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val) 350 { 351 struct pmt_event *pevt = arch_priv; 352 struct event_group *e; 353 bool valid = false; 354 u64 total = 0; 355 u64 evtcount; 356 void *pevt0; 357 u32 idx; 358 359 pevt0 = pevt - pevt->idx; 360 e = container_of(pevt0, struct event_group, evts); 361 idx = rmid * e->num_events; 362 idx += pevt->idx; 363 364 if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) { 365 pr_warn_once("MMIO index %u out of range\n", idx); 366 return -EIO; 367 } 368 369 for (int i = 0; i < e->pfg->count; i++) { 370 if (!e->pfg->regions[i].addr) 371 continue; 372 if (e->pfg->regions[i].plat_info.package_id != domid) 373 continue; 374 evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64)); 375 if (!(evtcount & DATA_VALID)) 376 continue; 377 total += evtcount & DATA_BITS; 378 valid = true; 379 } 380 381 if (valid) 382 *val = total; 383 384 return valid ? 0 : -EINVAL; 385 } 386 387 void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r, 388 struct list_head *add_pos) 389 { 390 struct rdt_perf_pkg_mon_domain *d; 391 int err; 392 393 d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu)); 394 if (!d) 395 return; 396 397 d->hdr.id = id; 398 d->hdr.type = RESCTRL_MON_DOMAIN; 399 d->hdr.rid = RDT_RESOURCE_PERF_PKG; 400 cpumask_set_cpu(cpu, &d->hdr.cpu_mask); 401 list_add_tail_rcu(&d->hdr.list, add_pos); 402 403 err = resctrl_online_mon_domain(r, &d->hdr); 404 if (err) { 405 list_del_rcu(&d->hdr.list); 406 synchronize_rcu(); 407 kfree(d); 408 } 409 } 410