1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Support Intel IOMMU PerfMon 4 * Copyright(c) 2023 Intel Corporation. 5 */ 6 #define pr_fmt(fmt) "DMAR: " fmt 7 #define dev_fmt(fmt) pr_fmt(fmt) 8 9 #include <linux/dmar.h> 10 #include "iommu.h" 11 #include "perfmon.h" 12 13 PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */ 14 PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */ 15 16 static struct attribute *iommu_pmu_format_attrs[] = { 17 &format_attr_event_group.attr, 18 &format_attr_event.attr, 19 NULL 20 }; 21 22 static struct attribute_group iommu_pmu_format_attr_group = { 23 .name = "format", 24 .attrs = iommu_pmu_format_attrs, 25 }; 26 27 /* The available events are added in attr_update later */ 28 static struct attribute *attrs_empty[] = { 29 NULL 30 }; 31 32 static struct attribute_group iommu_pmu_events_attr_group = { 33 .name = "events", 34 .attrs = attrs_empty, 35 }; 36 37 static cpumask_t iommu_pmu_cpu_mask; 38 39 static ssize_t 40 cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) 41 { 42 return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask); 43 } 44 static DEVICE_ATTR_RO(cpumask); 45 46 static struct attribute *iommu_pmu_cpumask_attrs[] = { 47 &dev_attr_cpumask.attr, 48 NULL 49 }; 50 51 static struct attribute_group iommu_pmu_cpumask_attr_group = { 52 .attrs = iommu_pmu_cpumask_attrs, 53 }; 54 55 static const struct attribute_group *iommu_pmu_attr_groups[] = { 56 &iommu_pmu_format_attr_group, 57 &iommu_pmu_events_attr_group, 58 &iommu_pmu_cpumask_attr_group, 59 NULL 60 }; 61 62 static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev) 63 { 64 /* 65 * The perf_event creates its own dev for each PMU. 66 * See pmu_dev_alloc() 67 */ 68 return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu); 69 } 70 71 #define IOMMU_PMU_ATTR(_name, _format, _filter) \ 72 PMU_FORMAT_ATTR(_name, _format); \ 73 \ 74 static struct attribute *_name##_attr[] = { \ 75 &format_attr_##_name.attr, \ 76 NULL \ 77 }; \ 78 \ 79 static umode_t \ 80 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ 81 { \ 82 struct device *dev = kobj_to_dev(kobj); \ 83 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ 84 \ 85 if (!iommu_pmu) \ 86 return 0; \ 87 return (iommu_pmu->filter & _filter) ? attr->mode : 0; \ 88 } \ 89 \ 90 static struct attribute_group _name = { \ 91 .name = "format", \ 92 .attrs = _name##_attr, \ 93 .is_visible = _name##_is_visible, \ 94 }; 95 96 IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID); 97 IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN); 98 IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID); 99 IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS); 100 IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE); 101 IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID); 102 IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN); 103 IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID); 104 IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS); 105 IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE); 106 107 #define iommu_pmu_en_requester_id(e) ((e) & 0x1) 108 #define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1) 109 #define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1) 110 #define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1) 111 #define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1) 112 #define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff) 113 #define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff) 114 #define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff) 115 #define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f) 116 #define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f) 117 118 #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \ 119 { \ 120 if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \ 121 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ 122 IOMMU_PMU_CFG_SIZE + \ 123 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ 124 iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\ 125 } \ 126 } 127 128 #define iommu_pmu_clear_filter(_filter, _idx) \ 129 { \ 130 if (iommu_pmu->filter & _filter) { \ 131 dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ 132 IOMMU_PMU_CFG_SIZE + \ 133 (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ 134 0); \ 135 } \ 136 } 137 138 /* 139 * Define the event attr related functions 140 * Input: _name: event attr name 141 * _string: string of the event in sysfs 142 * _g_idx: event group encoding 143 * _event: event encoding 144 */ 145 #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \ 146 PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \ 147 \ 148 static struct attribute *_name##_attr[] = { \ 149 &event_attr_##_name.attr.attr, \ 150 NULL \ 151 }; \ 152 \ 153 static umode_t \ 154 _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ 155 { \ 156 struct device *dev = kobj_to_dev(kobj); \ 157 struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ 158 \ 159 if (!iommu_pmu) \ 160 return 0; \ 161 return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \ 162 } \ 163 \ 164 static struct attribute_group _name = { \ 165 .name = "events", \ 166 .attrs = _name##_attr, \ 167 .is_visible = _name##_is_visible, \ 168 }; 169 170 IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001) 171 IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002) 172 IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004) 173 IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008) 174 IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001) 175 IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020) 176 IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040) 177 IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001) 178 IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002) 179 IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004) 180 IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008) 181 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010) 182 IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020) 183 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040) 184 IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080) 185 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100) 186 IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200) 187 IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001) 188 IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002) 189 IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004) 190 IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008) 191 IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001) 192 IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002) 193 IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004) 194 195 static const struct attribute_group *iommu_pmu_attr_update[] = { 196 &filter_requester_id_en, 197 &filter_domain_en, 198 &filter_pasid_en, 199 &filter_ats_en, 200 &filter_page_table_en, 201 &filter_requester_id, 202 &filter_domain, 203 &filter_pasid, 204 &filter_ats, 205 &filter_page_table, 206 &iommu_clocks, 207 &iommu_requests, 208 &pw_occupancy, 209 &ats_blocked, 210 &iommu_mrds, 211 &iommu_mem_blocked, 212 &pg_req_posted, 213 &ctxt_cache_lookup, 214 &ctxt_cache_hit, 215 &pasid_cache_lookup, 216 &pasid_cache_hit, 217 &ss_nonleaf_lookup, 218 &ss_nonleaf_hit, 219 &fs_nonleaf_lookup, 220 &fs_nonleaf_hit, 221 &hpt_nonleaf_lookup, 222 &hpt_nonleaf_hit, 223 &iotlb_lookup, 224 &iotlb_hit, 225 &hpt_leaf_lookup, 226 &hpt_leaf_hit, 227 &int_cache_lookup, 228 &int_cache_hit_nonposted, 229 &int_cache_hit_posted, 230 NULL 231 }; 232 233 static inline void __iomem * 234 iommu_event_base(struct iommu_pmu *iommu_pmu, int idx) 235 { 236 return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride; 237 } 238 239 static inline void __iomem * 240 iommu_config_base(struct iommu_pmu *iommu_pmu, int idx) 241 { 242 return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET; 243 } 244 245 static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event) 246 { 247 return container_of(event->pmu, struct iommu_pmu, pmu); 248 } 249 250 static inline u64 iommu_event_config(struct perf_event *event) 251 { 252 u64 config = event->attr.config; 253 254 return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) | 255 (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) | 256 IOMMU_EVENT_CFG_INT; 257 } 258 259 static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu, 260 struct perf_event *event) 261 { 262 return event->pmu == &iommu_pmu->pmu; 263 } 264 265 static int iommu_pmu_validate_event(struct perf_event *event) 266 { 267 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 268 u32 event_group = iommu_event_group(event->attr.config); 269 270 if (event_group >= iommu_pmu->num_eg) 271 return -EINVAL; 272 273 return 0; 274 } 275 276 static int iommu_pmu_validate_group(struct perf_event *event) 277 { 278 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 279 struct perf_event *sibling; 280 int nr = 0; 281 282 /* 283 * All events in a group must be scheduled simultaneously. 284 * Check whether there is enough counters for all the events. 285 */ 286 for_each_sibling_event(sibling, event->group_leader) { 287 if (!is_iommu_pmu_event(iommu_pmu, sibling) || 288 sibling->state <= PERF_EVENT_STATE_OFF) 289 continue; 290 291 if (++nr > iommu_pmu->num_cntr) 292 return -EINVAL; 293 } 294 295 return 0; 296 } 297 298 static int iommu_pmu_event_init(struct perf_event *event) 299 { 300 struct hw_perf_event *hwc = &event->hw; 301 302 if (event->attr.type != event->pmu->type) 303 return -ENOENT; 304 305 /* sampling not supported */ 306 if (event->attr.sample_period) 307 return -EINVAL; 308 309 if (event->cpu < 0) 310 return -EINVAL; 311 312 if (iommu_pmu_validate_event(event)) 313 return -EINVAL; 314 315 hwc->config = iommu_event_config(event); 316 317 return iommu_pmu_validate_group(event); 318 } 319 320 static void iommu_pmu_event_update(struct perf_event *event) 321 { 322 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 323 struct hw_perf_event *hwc = &event->hw; 324 u64 prev_count, new_count, delta; 325 int shift = 64 - iommu_pmu->cntr_width; 326 327 again: 328 prev_count = local64_read(&hwc->prev_count); 329 new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); 330 if (local64_xchg(&hwc->prev_count, new_count) != prev_count) 331 goto again; 332 333 /* 334 * The counter width is enumerated. Always shift the counter 335 * before using it. 336 */ 337 delta = (new_count << shift) - (prev_count << shift); 338 delta >>= shift; 339 340 local64_add(delta, &event->count); 341 } 342 343 static void iommu_pmu_start(struct perf_event *event, int flags) 344 { 345 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 346 struct intel_iommu *iommu = iommu_pmu->iommu; 347 struct hw_perf_event *hwc = &event->hw; 348 u64 count; 349 350 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 351 return; 352 353 if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX)) 354 return; 355 356 if (flags & PERF_EF_RELOAD) 357 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 358 359 hwc->state = 0; 360 361 /* Always reprogram the period */ 362 count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); 363 local64_set((&hwc->prev_count), count); 364 365 /* 366 * The error of ecmd will be ignored. 367 * - The existing perf_event subsystem doesn't handle the error. 368 * Only IOMMU PMU returns runtime HW error. We don't want to 369 * change the existing generic interfaces for the specific case. 370 * - It's a corner case caused by HW, which is very unlikely to 371 * happen. There is nothing SW can do. 372 * - The worst case is that the user will get <not count> with 373 * perf command, which can give the user some hints. 374 */ 375 ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0); 376 377 perf_event_update_userpage(event); 378 } 379 380 static void iommu_pmu_stop(struct perf_event *event, int flags) 381 { 382 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 383 struct intel_iommu *iommu = iommu_pmu->iommu; 384 struct hw_perf_event *hwc = &event->hw; 385 386 if (!(hwc->state & PERF_HES_STOPPED)) { 387 ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0); 388 389 iommu_pmu_event_update(event); 390 391 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 392 } 393 } 394 395 static inline int 396 iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu, 397 int idx, struct perf_event *event) 398 { 399 u32 event_group = iommu_event_group(event->attr.config); 400 u32 select = iommu_event_select(event->attr.config); 401 402 if (!(iommu_pmu->cntr_evcap[idx][event_group] & select)) 403 return -EINVAL; 404 405 return 0; 406 } 407 408 static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, 409 struct perf_event *event) 410 { 411 struct hw_perf_event *hwc = &event->hw; 412 int idx; 413 414 /* 415 * The counters which support limited events are usually at the end. 416 * Schedule them first to accommodate more events. 417 */ 418 for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) { 419 if (test_and_set_bit(idx, iommu_pmu->used_mask)) 420 continue; 421 /* Check per-counter event capabilities */ 422 if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event)) 423 break; 424 clear_bit(idx, iommu_pmu->used_mask); 425 } 426 if (idx < 0) 427 return -EINVAL; 428 429 iommu_pmu->event_list[idx] = event; 430 hwc->idx = idx; 431 432 /* config events */ 433 dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config); 434 435 iommu_pmu_set_filter(requester_id, event->attr.config1, 436 IOMMU_PMU_FILTER_REQUESTER_ID, idx, 437 event->attr.config1); 438 iommu_pmu_set_filter(domain, event->attr.config1, 439 IOMMU_PMU_FILTER_DOMAIN, idx, 440 event->attr.config1); 441 iommu_pmu_set_filter(pasid, event->attr.config1, 442 IOMMU_PMU_FILTER_PASID, idx, 443 event->attr.config1); 444 iommu_pmu_set_filter(ats, event->attr.config2, 445 IOMMU_PMU_FILTER_ATS, idx, 446 event->attr.config1); 447 iommu_pmu_set_filter(page_table, event->attr.config2, 448 IOMMU_PMU_FILTER_PAGE_TABLE, idx, 449 event->attr.config1); 450 451 return 0; 452 } 453 454 static int iommu_pmu_add(struct perf_event *event, int flags) 455 { 456 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 457 struct hw_perf_event *hwc = &event->hw; 458 int ret; 459 460 ret = iommu_pmu_assign_event(iommu_pmu, event); 461 if (ret < 0) 462 return ret; 463 464 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 465 466 if (flags & PERF_EF_START) 467 iommu_pmu_start(event, 0); 468 469 return 0; 470 } 471 472 static void iommu_pmu_del(struct perf_event *event, int flags) 473 { 474 struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 475 int idx = event->hw.idx; 476 477 iommu_pmu_stop(event, PERF_EF_UPDATE); 478 479 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx); 480 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx); 481 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx); 482 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx); 483 iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx); 484 485 iommu_pmu->event_list[idx] = NULL; 486 event->hw.idx = -1; 487 clear_bit(idx, iommu_pmu->used_mask); 488 489 perf_event_update_userpage(event); 490 } 491 492 static void iommu_pmu_enable(struct pmu *pmu) 493 { 494 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); 495 struct intel_iommu *iommu = iommu_pmu->iommu; 496 497 ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0); 498 } 499 500 static void iommu_pmu_disable(struct pmu *pmu) 501 { 502 struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); 503 struct intel_iommu *iommu = iommu_pmu->iommu; 504 505 ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0); 506 } 507 508 static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu) 509 { 510 struct perf_event *event; 511 u64 status; 512 int i; 513 514 /* 515 * Two counters may be overflowed very close. Always check 516 * whether there are more to handle. 517 */ 518 while ((status = dmar_readq(iommu_pmu->overflow))) { 519 for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) { 520 /* 521 * Find the assigned event of the counter. 522 * Accumulate the value into the event->count. 523 */ 524 event = iommu_pmu->event_list[i]; 525 if (!event) { 526 pr_warn_once("Cannot find the assigned event for counter %d\n", i); 527 continue; 528 } 529 iommu_pmu_event_update(event); 530 } 531 532 dmar_writeq(iommu_pmu->overflow, status); 533 } 534 } 535 536 static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id) 537 { 538 struct intel_iommu *iommu = dev_id; 539 540 if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG)) 541 return IRQ_NONE; 542 543 iommu_pmu_counter_overflow(iommu->pmu); 544 545 /* Clear the status bit */ 546 dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS); 547 548 return IRQ_HANDLED; 549 } 550 551 static int __iommu_pmu_register(struct intel_iommu *iommu) 552 { 553 struct iommu_pmu *iommu_pmu = iommu->pmu; 554 555 iommu_pmu->pmu.name = iommu->name; 556 iommu_pmu->pmu.task_ctx_nr = perf_invalid_context; 557 iommu_pmu->pmu.event_init = iommu_pmu_event_init; 558 iommu_pmu->pmu.pmu_enable = iommu_pmu_enable; 559 iommu_pmu->pmu.pmu_disable = iommu_pmu_disable; 560 iommu_pmu->pmu.add = iommu_pmu_add; 561 iommu_pmu->pmu.del = iommu_pmu_del; 562 iommu_pmu->pmu.start = iommu_pmu_start; 563 iommu_pmu->pmu.stop = iommu_pmu_stop; 564 iommu_pmu->pmu.read = iommu_pmu_event_update; 565 iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; 566 iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; 567 iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; 568 iommu_pmu->pmu.module = THIS_MODULE; 569 570 return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1); 571 } 572 573 static inline void __iomem * 574 get_perf_reg_address(struct intel_iommu *iommu, u32 offset) 575 { 576 u32 off = dmar_readl(iommu->reg + offset); 577 578 return iommu->reg + off; 579 } 580 581 int alloc_iommu_pmu(struct intel_iommu *iommu) 582 { 583 struct iommu_pmu *iommu_pmu; 584 int i, j, ret; 585 u64 perfcap; 586 u32 cap; 587 588 if (!ecap_pms(iommu->ecap)) 589 return 0; 590 591 /* The IOMMU PMU requires the ECMD support as well */ 592 if (!cap_ecmds(iommu->cap)) 593 return -ENODEV; 594 595 perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); 596 /* The performance monitoring is not supported. */ 597 if (!perfcap) 598 return -ENODEV; 599 600 /* Sanity check for the number of the counters and event groups */ 601 if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) 602 return -ENODEV; 603 604 /* The interrupt on overflow is required */ 605 if (!pcap_interrupt(perfcap)) 606 return -ENODEV; 607 608 /* Check required Enhanced Command Capability */ 609 if (!ecmd_has_pmu_essential(iommu)) 610 return -ENODEV; 611 612 iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); 613 if (!iommu_pmu) 614 return -ENOMEM; 615 616 iommu_pmu->num_cntr = pcap_num_cntr(perfcap); 617 if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) { 618 pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!", 619 iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX); 620 iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX; 621 } 622 623 iommu_pmu->cntr_width = pcap_cntr_width(perfcap); 624 iommu_pmu->filter = pcap_filters_mask(perfcap); 625 iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); 626 iommu_pmu->num_eg = pcap_num_event_group(perfcap); 627 628 iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); 629 if (!iommu_pmu->evcap) { 630 ret = -ENOMEM; 631 goto free_pmu; 632 } 633 634 /* Parse event group capabilities */ 635 for (i = 0; i < iommu_pmu->num_eg; i++) { 636 u64 pcap; 637 638 pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + 639 i * IOMMU_PMU_CAP_REGS_STEP); 640 iommu_pmu->evcap[i] = pecap_es(pcap); 641 } 642 643 iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); 644 if (!iommu_pmu->cntr_evcap) { 645 ret = -ENOMEM; 646 goto free_pmu_evcap; 647 } 648 for (i = 0; i < iommu_pmu->num_cntr; i++) { 649 iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); 650 if (!iommu_pmu->cntr_evcap[i]) { 651 ret = -ENOMEM; 652 goto free_pmu_cntr_evcap; 653 } 654 /* 655 * Set to the global capabilities, will adjust according 656 * to per-counter capabilities later. 657 */ 658 for (j = 0; j < iommu_pmu->num_eg; j++) 659 iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; 660 } 661 662 iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); 663 iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); 664 iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); 665 666 /* 667 * Check per-counter capabilities. All counters should have the 668 * same capabilities on Interrupt on Overflow Support and Counter 669 * Width. 670 */ 671 for (i = 0; i < iommu_pmu->num_cntr; i++) { 672 cap = dmar_readl(iommu_pmu->cfg_reg + 673 i * IOMMU_PMU_CFG_OFFSET + 674 IOMMU_PMU_CFG_CNTRCAP_OFFSET); 675 if (!iommu_cntrcap_pcc(cap)) 676 continue; 677 678 /* 679 * It's possible that some counters have a different 680 * capability because of e.g., HW bug. Check the corner 681 * case here and simply drop those counters. 682 */ 683 if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || 684 !iommu_cntrcap_ios(cap)) { 685 iommu_pmu->num_cntr = i; 686 pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", 687 iommu_pmu->num_cntr); 688 } 689 690 /* Clear the pre-defined events group */ 691 for (j = 0; j < iommu_pmu->num_eg; j++) 692 iommu_pmu->cntr_evcap[i][j] = 0; 693 694 /* Override with per-counter event capabilities */ 695 for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { 696 cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + 697 IOMMU_PMU_CFG_CNTREVCAP_OFFSET + 698 (j * IOMMU_PMU_OFF_REGS_STEP)); 699 iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); 700 /* 701 * Some events may only be supported by a specific counter. 702 * Track them in the evcap as well. 703 */ 704 iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); 705 } 706 } 707 708 iommu_pmu->iommu = iommu; 709 iommu->pmu = iommu_pmu; 710 711 return 0; 712 713 free_pmu_cntr_evcap: 714 for (i = 0; i < iommu_pmu->num_cntr; i++) 715 kfree(iommu_pmu->cntr_evcap[i]); 716 kfree(iommu_pmu->cntr_evcap); 717 free_pmu_evcap: 718 kfree(iommu_pmu->evcap); 719 free_pmu: 720 kfree(iommu_pmu); 721 722 return ret; 723 } 724 725 void free_iommu_pmu(struct intel_iommu *iommu) 726 { 727 struct iommu_pmu *iommu_pmu = iommu->pmu; 728 729 if (!iommu_pmu) 730 return; 731 732 if (iommu_pmu->evcap) { 733 int i; 734 735 for (i = 0; i < iommu_pmu->num_cntr; i++) 736 kfree(iommu_pmu->cntr_evcap[i]); 737 kfree(iommu_pmu->cntr_evcap); 738 } 739 kfree(iommu_pmu->evcap); 740 kfree(iommu_pmu); 741 iommu->pmu = NULL; 742 } 743 744 static int iommu_pmu_set_interrupt(struct intel_iommu *iommu) 745 { 746 struct iommu_pmu *iommu_pmu = iommu->pmu; 747 int irq, ret; 748 749 irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu); 750 if (irq <= 0) 751 return -EINVAL; 752 753 snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id); 754 755 iommu->perf_irq = irq; 756 ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler, 757 IRQF_ONESHOT, iommu_pmu->irq_name, iommu); 758 if (ret) { 759 dmar_free_hwirq(irq); 760 iommu->perf_irq = 0; 761 return ret; 762 } 763 return 0; 764 } 765 766 static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) 767 { 768 if (!iommu->perf_irq) 769 return; 770 771 free_irq(iommu->perf_irq, iommu); 772 dmar_free_hwirq(iommu->perf_irq); 773 iommu->perf_irq = 0; 774 } 775 776 static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) 777 { 778 struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); 779 780 if (cpumask_empty(&iommu_pmu_cpu_mask)) 781 cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); 782 783 if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask)) 784 iommu_pmu->cpu = cpu; 785 786 return 0; 787 } 788 789 static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) 790 { 791 struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); 792 int target = cpumask_first(&iommu_pmu_cpu_mask); 793 794 /* 795 * The iommu_pmu_cpu_mask has been updated when offline the CPU 796 * for the first iommu_pmu. Migrate the other iommu_pmu to the 797 * new target. 798 */ 799 if (target < nr_cpu_ids && target != iommu_pmu->cpu) { 800 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); 801 iommu_pmu->cpu = target; 802 return 0; 803 } 804 805 if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) 806 return 0; 807 808 target = cpumask_any_but(cpu_online_mask, cpu); 809 810 if (target < nr_cpu_ids) 811 cpumask_set_cpu(target, &iommu_pmu_cpu_mask); 812 else 813 return 0; 814 815 perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); 816 iommu_pmu->cpu = target; 817 818 return 0; 819 } 820 821 static int nr_iommu_pmu; 822 static enum cpuhp_state iommu_cpuhp_slot; 823 824 static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) 825 { 826 int ret; 827 828 if (!nr_iommu_pmu) { 829 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, 830 "driver/iommu/intel/perfmon:online", 831 iommu_pmu_cpu_online, 832 iommu_pmu_cpu_offline); 833 if (ret < 0) 834 return ret; 835 iommu_cpuhp_slot = ret; 836 } 837 838 ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); 839 if (ret) { 840 if (!nr_iommu_pmu) 841 cpuhp_remove_multi_state(iommu_cpuhp_slot); 842 return ret; 843 } 844 nr_iommu_pmu++; 845 846 return 0; 847 } 848 849 static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) 850 { 851 cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); 852 853 if (--nr_iommu_pmu) 854 return; 855 856 cpuhp_remove_multi_state(iommu_cpuhp_slot); 857 } 858 859 void iommu_pmu_register(struct intel_iommu *iommu) 860 { 861 struct iommu_pmu *iommu_pmu = iommu->pmu; 862 863 if (!iommu_pmu) 864 return; 865 866 if (__iommu_pmu_register(iommu)) 867 goto err; 868 869 if (iommu_pmu_cpuhp_setup(iommu_pmu)) 870 goto unregister; 871 872 /* Set interrupt for overflow */ 873 if (iommu_pmu_set_interrupt(iommu)) 874 goto cpuhp_free; 875 876 return; 877 878 cpuhp_free: 879 iommu_pmu_cpuhp_free(iommu_pmu); 880 unregister: 881 perf_pmu_unregister(&iommu_pmu->pmu); 882 err: 883 pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id); 884 free_iommu_pmu(iommu); 885 } 886 887 void iommu_pmu_unregister(struct intel_iommu *iommu) 888 { 889 struct iommu_pmu *iommu_pmu = iommu->pmu; 890 891 if (!iommu_pmu) 892 return; 893 894 iommu_pmu_unset_interrupt(iommu); 895 iommu_pmu_cpuhp_free(iommu_pmu); 896 perf_pmu_unregister(&iommu_pmu->pmu); 897 } 898