xref: /linux/drivers/perf/cxl_pmu.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /*
4  * Copyright(c) 2023 Huawei
5  *
6  * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
7  * called the CXL PMU, or CPMU. In order to allow a high degree of
8  * implementation flexibility the specification provides a wide range of
9  * options all of which are self describing.
10  *
11  * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
12  */
13 
14 #include <linux/io-64-nonatomic-lo-hi.h>
15 #include <linux/perf_event.h>
16 #include <linux/bitops.h>
17 #include <linux/device.h>
18 #include <linux/bits.h>
19 #include <linux/list.h>
20 #include <linux/bug.h>
21 #include <linux/pci.h>
22 
23 #include "../cxl/cxlpci.h"
24 #include "../cxl/cxl.h"
25 #include "../cxl/pmu.h"
26 
27 #define CXL_PMU_CAP_REG			0x0
28 #define   CXL_PMU_CAP_NUM_COUNTERS_MSK			GENMASK_ULL(5, 0)
29 #define   CXL_PMU_CAP_COUNTER_WIDTH_MSK			GENMASK_ULL(15, 8)
30 #define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK		GENMASK_ULL(24, 20)
31 #define   CXL_PMU_CAP_FILTERS_SUP_MSK			GENMASK_ULL(39, 32)
32 #define     CXL_PMU_FILTER_HDM				BIT(0)
33 #define     CXL_PMU_FILTER_CHAN_RANK_BANK		BIT(1)
34 #define   CXL_PMU_CAP_MSI_N_MSK				GENMASK_ULL(47, 44)
35 #define   CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN		BIT_ULL(48)
36 #define   CXL_PMU_CAP_FREEZE				BIT_ULL(49)
37 #define   CXL_PMU_CAP_INT				BIT_ULL(50)
38 #define   CXL_PMU_CAP_VERSION_MSK			GENMASK_ULL(63, 60)
39 
40 #define CXL_PMU_OVERFLOW_REG		0x10
41 #define CXL_PMU_FREEZE_REG		0x18
42 #define CXL_PMU_EVENT_CAP_REG(n)	(0x100 + 8 * (n))
43 #define   CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK	GENMASK_ULL(31, 0)
44 #define   CXL_PMU_EVENT_CAP_GROUP_ID_MSK		GENMASK_ULL(47, 32)
45 #define   CXL_PMU_EVENT_CAP_VENDOR_ID_MSK		GENMASK_ULL(63, 48)
46 
47 #define CXL_PMU_COUNTER_CFG_REG(n)	(0x200 + 8 * (n))
48 #define   CXL_PMU_COUNTER_CFG_TYPE_MSK			GENMASK_ULL(1, 0)
49 #define     CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN		0
50 #define     CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN		1
51 #define     CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE	2
52 #define   CXL_PMU_COUNTER_CFG_ENABLE			BIT_ULL(8)
53 #define   CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW		BIT_ULL(9)
54 #define   CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW		BIT_ULL(10)
55 #define   CXL_PMU_COUNTER_CFG_EDGE			BIT_ULL(11)
56 #define   CXL_PMU_COUNTER_CFG_INVERT			BIT_ULL(12)
57 #define   CXL_PMU_COUNTER_CFG_THRESHOLD_MSK		GENMASK_ULL(23, 16)
58 #define   CXL_PMU_COUNTER_CFG_EVENTS_MSK		GENMASK_ULL(55, 24)
59 #define   CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK	GENMASK_ULL(63, 59)
60 
61 #define CXL_PMU_FILTER_CFG_REG(n, f)	(0x400 + 4 * ((f) + (n) * 8))
62 #define   CXL_PMU_FILTER_CFG_VALUE_MSK			GENMASK(31, 0)
63 
64 #define CXL_PMU_COUNTER_REG(n)		(0xc00 + 8 * (n))
65 
66 /* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
67 #define CXL_PMU_GID_CLOCK_TICKS		0x00
68 #define CXL_PMU_GID_D2H_REQ		0x0010
69 #define CXL_PMU_GID_D2H_RSP		0x0011
70 #define CXL_PMU_GID_H2D_REQ		0x0012
71 #define CXL_PMU_GID_H2D_RSP		0x0013
72 #define CXL_PMU_GID_CACHE_DATA		0x0014
73 #define CXL_PMU_GID_M2S_REQ		0x0020
74 #define CXL_PMU_GID_M2S_RWD		0x0021
75 #define CXL_PMU_GID_M2S_BIRSP		0x0022
76 #define CXL_PMU_GID_S2M_BISNP		0x0023
77 #define CXL_PMU_GID_S2M_NDR		0x0024
78 #define CXL_PMU_GID_S2M_DRS		0x0025
79 #define CXL_PMU_GID_DDR			0x8000
80 
81 static int cxl_pmu_cpuhp_state_num;
82 
83 struct cxl_pmu_ev_cap {
84 	u16 vid;
85 	u16 gid;
86 	u32 msk;
87 	union {
88 		int counter_idx; /* fixed counters */
89 		int event_idx; /* configurable counters */
90 	};
91 	struct list_head node;
92 };
93 
94 #define CXL_PMU_MAX_COUNTERS 64
95 struct cxl_pmu_info {
96 	struct pmu pmu;
97 	void __iomem *base;
98 	struct perf_event **hw_events;
99 	struct list_head event_caps_configurable;
100 	struct list_head event_caps_fixed;
101 	DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
102 	DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
103 	u16 counter_width;
104 	u8 num_counters;
105 	u8 num_event_capabilities;
106 	int on_cpu;
107 	struct hlist_node node;
108 	bool filter_hdm;
109 	int irq;
110 };
111 
112 #define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
113 
114 /*
115  * All CPMU counters are discoverable via the Event Capabilities Registers.
116  * Each Event Capability register contains a a VID / GroupID.
117  * A counter may then count any combination (by summing) of events in
118  * that group which are in the Supported Events Bitmask.
119  * However, there are some complexities to the scheme.
120  *  - Fixed function counters refer to an Event Capabilities register.
121  *    That event capability register is not then used for Configurable
122  *    counters.
123  */
124 static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
125 {
126 	unsigned long fixed_counter_event_cap_bm = 0;
127 	void __iomem *base = info->base;
128 	bool freeze_for_enable;
129 	u64 val, eval;
130 	int i;
131 
132 	val = readq(base + CXL_PMU_CAP_REG);
133 	freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
134 		FIELD_GET(CXL_PMU_CAP_FREEZE, val);
135 	if (!freeze_for_enable) {
136 		dev_err(dev, "Counters not writable while frozen\n");
137 		return -ENODEV;
138 	}
139 
140 	info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
141 	info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
142 	info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
143 
144 	info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
145 	if (FIELD_GET(CXL_PMU_CAP_INT, val))
146 		info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
147 	else
148 		info->irq = -1;
149 
150 	/* First handle fixed function counters; note if configurable counters found */
151 	for (i = 0; i < info->num_counters; i++) {
152 		struct cxl_pmu_ev_cap *pmu_ev;
153 		u32 events_msk;
154 		u8 group_idx;
155 
156 		val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
157 
158 		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
159 			CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
160 			set_bit(i, info->conf_counter_bm);
161 		}
162 
163 		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
164 		    CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
165 			continue;
166 
167 		/* In this case we know which fields are const */
168 		group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
169 		events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
170 		eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
171 		pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
172 		if (!pmu_ev)
173 			return -ENOMEM;
174 
175 		pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
176 		pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
177 		/* For a fixed purpose counter use the events mask from the counter CFG */
178 		pmu_ev->msk = events_msk;
179 		pmu_ev->counter_idx = i;
180 		/* This list add is never unwound as all entries deleted on remove */
181 		list_add(&pmu_ev->node, &info->event_caps_fixed);
182 		/*
183 		 * Configurable counters must not use an Event Capability registers that
184 		 * is in use for a Fixed counter
185 		 */
186 		set_bit(group_idx, &fixed_counter_event_cap_bm);
187 	}
188 
189 	if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
190 		struct cxl_pmu_ev_cap *pmu_ev;
191 		int j;
192 		/* Walk event capabilities unused by fixed counters */
193 		for_each_clear_bit(j, &fixed_counter_event_cap_bm,
194 				   info->num_event_capabilities) {
195 			pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
196 			if (!pmu_ev)
197 				return -ENOMEM;
198 
199 			eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
200 			pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
201 			pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
202 			pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
203 			pmu_ev->event_idx = j;
204 			list_add(&pmu_ev->node, &info->event_caps_configurable);
205 		}
206 	}
207 
208 	return 0;
209 }
210 
211 #define CXL_PMU_FORMAT_ATTR(_name, _format)\
212 	(&((struct dev_ext_attribute[]) {					\
213 		{								\
214 			.attr = __ATTR(_name, 0444, device_show_string, NULL),	\
215 			.var = (void *)_format					\
216 		}								\
217 		})[0].attr.attr)
218 
219 enum {
220 	cxl_pmu_mask_attr,
221 	cxl_pmu_gid_attr,
222 	cxl_pmu_vid_attr,
223 	cxl_pmu_threshold_attr,
224 	cxl_pmu_invert_attr,
225 	cxl_pmu_edge_attr,
226 	cxl_pmu_hdm_filter_en_attr,
227 	cxl_pmu_hdm_attr,
228 };
229 
230 static struct attribute *cxl_pmu_format_attr[] = {
231 	[cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
232 	[cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
233 	[cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
234 	[cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
235 	[cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
236 	[cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
237 	[cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
238 	[cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
239 	NULL
240 };
241 
242 #define CXL_PMU_ATTR_CONFIG_MASK_MSK		GENMASK_ULL(31, 0)
243 #define CXL_PMU_ATTR_CONFIG_GID_MSK		GENMASK_ULL(47, 32)
244 #define CXL_PMU_ATTR_CONFIG_VID_MSK		GENMASK_ULL(63, 48)
245 #define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK	GENMASK_ULL(15, 0)
246 #define CXL_PMU_ATTR_CONFIG1_INVERT_MSK		BIT(16)
247 #define CXL_PMU_ATTR_CONFIG1_EDGE_MSK		BIT(17)
248 #define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK	BIT(18)
249 #define CXL_PMU_ATTR_CONFIG2_HDM_MSK		GENMASK(15, 0)
250 
251 static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
252 					 struct attribute *attr, int a)
253 {
254 	struct device *dev = kobj_to_dev(kobj);
255 	struct cxl_pmu_info *info = dev_get_drvdata(dev);
256 
257 	/*
258 	 * Filter capability at the CPMU level, so hide the attributes if the particular
259 	 * filter is not supported.
260 	 */
261 	if (!info->filter_hdm &&
262 	    (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
263 	     attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
264 		return 0;
265 
266 	return attr->mode;
267 }
268 
269 static const struct attribute_group cxl_pmu_format_group = {
270 	.name = "format",
271 	.attrs = cxl_pmu_format_attr,
272 	.is_visible = cxl_pmu_format_is_visible,
273 };
274 
275 static u32 cxl_pmu_config_get_mask(struct perf_event *event)
276 {
277 	return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
278 }
279 
280 static u16 cxl_pmu_config_get_gid(struct perf_event *event)
281 {
282 	return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
283 }
284 
285 static u16 cxl_pmu_config_get_vid(struct perf_event *event)
286 {
287 	return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
288 }
289 
290 static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
291 {
292 	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
293 }
294 
295 static bool cxl_pmu_config1_get_invert(struct perf_event *event)
296 {
297 	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
298 }
299 
300 static bool cxl_pmu_config1_get_edge(struct perf_event *event)
301 {
302 	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
303 }
304 
305 /*
306  * CPMU specification allows for 8 filters, each with a 32 bit value...
307  * So we need to find 8x32bits to store it in.
308  * As the value used for disable is 0xffff_ffff, a separate enable switch
309  * is needed.
310  */
311 
312 static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
313 {
314 	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
315 }
316 
317 static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
318 {
319 	return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
320 }
321 
322 static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
323 					struct device_attribute *attr, char *buf)
324 {
325 	struct perf_pmu_events_attr *pmu_attr =
326 		container_of(attr, struct perf_pmu_events_attr, attr);
327 
328 	return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
329 }
330 
331 #define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk)			\
332 	PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show,		\
333 			  ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
334 
335 /* For CXL spec defined events */
336 #define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk)			\
337 	CXL_PMU_EVENT_ATTR(_name, PCI_VENDOR_ID_CXL, _gid, _msk)
338 
339 static struct attribute *cxl_pmu_event_attrs[] = {
340 	CXL_PMU_EVENT_CXL_ATTR(clock_ticks,			CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
341 	/* CXL rev 3.0 Table 3-17 - Device to Host Requests */
342 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr,			CXL_PMU_GID_D2H_REQ, BIT(1)),
343 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown,			CXL_PMU_GID_D2H_REQ, BIT(2)),
344 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared,		CXL_PMU_GID_D2H_REQ, BIT(3)),
345 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany,			CXL_PMU_GID_D2H_REQ, BIT(4)),
346 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata,		CXL_PMU_GID_D2H_REQ, BIT(5)),
347 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr,			CXL_PMU_GID_D2H_REQ, BIT(6)),
348 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr,			CXL_PMU_GID_D2H_REQ, BIT(7)),
349 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush,			CXL_PMU_GID_D2H_REQ, BIT(8)),
350 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict,		CXL_PMU_GID_D2H_REQ, BIT(9)),
351 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict,		CXL_PMU_GID_D2H_REQ, BIT(10)),
352 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata,	CXL_PMU_GID_D2H_REQ, BIT(11)),
353 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv,			CXL_PMU_GID_D2H_REQ, BIT(12)),
354 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf,		CXL_PMU_GID_D2H_REQ, BIT(13)),
355 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv,			CXL_PMU_GID_D2H_REQ, BIT(14)),
356 	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed,		CXL_PMU_GID_D2H_REQ, BIT(16)),
357 	/* CXL rev 3.0 Table 3-20 - D2H Response Encodings */
358 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti,		CXL_PMU_GID_D2H_RSP, BIT(4)),
359 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv,		CXL_PMU_GID_D2H_RSP, BIT(6)),
360 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse,		CXL_PMU_GID_D2H_RSP, BIT(5)),
361 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse,		CXL_PMU_GID_D2H_RSP, BIT(1)),
362 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm,		CXL_PMU_GID_D2H_RSP, BIT(7)),
363 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm,		CXL_PMU_GID_D2H_RSP, BIT(15)),
364 	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv,		CXL_PMU_GID_D2H_RSP, BIT(22)),
365 	/* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */
366 	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata,			CXL_PMU_GID_H2D_REQ, BIT(1)),
367 	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv,			CXL_PMU_GID_H2D_REQ, BIT(2)),
368 	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur,			CXL_PMU_GID_H2D_REQ, BIT(3)),
369 	/* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */
370 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull,		CXL_PMU_GID_H2D_RSP, BIT(1)),
371 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go,			CXL_PMU_GID_H2D_RSP, BIT(4)),
372 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull,		CXL_PMU_GID_H2D_RSP, BIT(5)),
373 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp,			CXL_PMU_GID_H2D_RSP, BIT(6)),
374 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop,		CXL_PMU_GID_H2D_RSP, BIT(8)),
375 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull,		CXL_PMU_GID_H2D_RSP, BIT(13)),
376 	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull,		CXL_PMU_GID_H2D_RSP, BIT(15)),
377 	/* CXL rev 3.0 Table 13-5 directly lists these */
378 	CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data,		CXL_PMU_GID_CACHE_DATA, BIT(0)),
379 	CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data,		CXL_PMU_GID_CACHE_DATA, BIT(1)),
380 	/* CXL rev 3.1 Table 3-35 M2S Req Memory Opcodes */
381 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv,			CXL_PMU_GID_M2S_REQ, BIT(0)),
382 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd,			CXL_PMU_GID_M2S_REQ, BIT(1)),
383 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata,		CXL_PMU_GID_M2S_REQ, BIT(2)),
384 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd,		CXL_PMU_GID_M2S_REQ, BIT(3)),
385 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd,		CXL_PMU_GID_M2S_REQ, BIT(4)),
386 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdtee,		CXL_PMU_GID_M2S_REQ, BIT(5)),
387 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddatatee,		CXL_PMU_GID_M2S_REQ, BIT(6)),
388 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd,		CXL_PMU_GID_M2S_REQ, BIT(8)),
389 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt,		CXL_PMU_GID_M2S_REQ, BIT(9)),
390 	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict,		CXL_PMU_GID_M2S_REQ, BIT(10)),
391 	/* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */
392 	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr,			CXL_PMU_GID_M2S_RWD, BIT(1)),
393 	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl,		CXL_PMU_GID_M2S_RWD, BIT(2)),
394 	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict,		CXL_PMU_GID_M2S_RWD, BIT(4)),
395 	/* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */
396 	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i,			CXL_PMU_GID_M2S_BIRSP, BIT(0)),
397 	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s,			CXL_PMU_GID_M2S_BIRSP, BIT(1)),
398 	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e,			CXL_PMU_GID_M2S_BIRSP, BIT(2)),
399 	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk,			CXL_PMU_GID_M2S_BIRSP, BIT(4)),
400 	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk,			CXL_PMU_GID_M2S_BIRSP, BIT(5)),
401 	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk,			CXL_PMU_GID_M2S_BIRSP, BIT(6)),
402 	/* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */
403 	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur,			CXL_PMU_GID_S2M_BISNP, BIT(0)),
404 	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data,			CXL_PMU_GID_S2M_BISNP, BIT(1)),
405 	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv,			CXL_PMU_GID_S2M_BISNP, BIT(2)),
406 	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk,		CXL_PMU_GID_S2M_BISNP, BIT(4)),
407 	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk,		CXL_PMU_GID_S2M_BISNP, BIT(5)),
408 	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk,		CXL_PMU_GID_S2M_BISNP, BIT(6)),
409 	/* CXL rev 3.1 Table 3-50 S2M NDR Opcopdes */
410 	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp,			CXL_PMU_GID_S2M_NDR, BIT(0)),
411 	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps,			CXL_PMU_GID_S2M_NDR, BIT(1)),
412 	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe,			CXL_PMU_GID_S2M_NDR, BIT(2)),
413 	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpm,			CXL_PMU_GID_S2M_NDR, BIT(3)),
414 	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack,		CXL_PMU_GID_S2M_NDR, BIT(4)),
415 	/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
416 	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata,			CXL_PMU_GID_S2M_DRS, BIT(0)),
417 	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm,		CXL_PMU_GID_S2M_DRS, BIT(1)),
418 	/* CXL rev 3.0 Table 13-5 directly lists these */
419 	CXL_PMU_EVENT_CXL_ATTR(ddr_act,				CXL_PMU_GID_DDR, BIT(0)),
420 	CXL_PMU_EVENT_CXL_ATTR(ddr_pre,				CXL_PMU_GID_DDR, BIT(1)),
421 	CXL_PMU_EVENT_CXL_ATTR(ddr_casrd,			CXL_PMU_GID_DDR, BIT(2)),
422 	CXL_PMU_EVENT_CXL_ATTR(ddr_caswr,			CXL_PMU_GID_DDR, BIT(3)),
423 	CXL_PMU_EVENT_CXL_ATTR(ddr_refresh,			CXL_PMU_GID_DDR, BIT(4)),
424 	CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent,		CXL_PMU_GID_DDR, BIT(5)),
425 	CXL_PMU_EVENT_CXL_ATTR(ddr_rfm,				CXL_PMU_GID_DDR, BIT(6)),
426 	NULL
427 };
428 
429 static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info,
430 								int vid, int gid, int msk)
431 {
432 	struct cxl_pmu_ev_cap *pmu_ev;
433 
434 	list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) {
435 		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
436 			continue;
437 
438 		/* Precise match for fixed counter */
439 		if (msk == pmu_ev->msk)
440 			return pmu_ev;
441 	}
442 
443 	return ERR_PTR(-EINVAL);
444 }
445 
446 static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info,
447 								 int vid, int gid, int msk)
448 {
449 	struct cxl_pmu_ev_cap *pmu_ev;
450 
451 	list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) {
452 		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
453 			continue;
454 
455 		/* Request mask must be subset of supported */
456 		if (msk & ~pmu_ev->msk)
457 			continue;
458 
459 		return pmu_ev;
460 	}
461 
462 	return ERR_PTR(-EINVAL);
463 }
464 
465 static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a)
466 {
467 	struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr);
468 	struct perf_pmu_events_attr *pmu_attr =
469 		container_of(dev_attr, struct perf_pmu_events_attr, attr);
470 	struct device *dev = kobj_to_dev(kobj);
471 	struct cxl_pmu_info *info = dev_get_drvdata(dev);
472 	int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id);
473 	int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id);
474 	int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id);
475 
476 	if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk)))
477 		return attr->mode;
478 
479 	if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk)))
480 		return attr->mode;
481 
482 	return 0;
483 }
484 
485 static const struct attribute_group cxl_pmu_events = {
486 	.name = "events",
487 	.attrs = cxl_pmu_event_attrs,
488 	.is_visible = cxl_pmu_event_is_visible,
489 };
490 
491 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
492 			    char *buf)
493 {
494 	struct cxl_pmu_info *info = dev_get_drvdata(dev);
495 
496 	return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu));
497 }
498 static DEVICE_ATTR_RO(cpumask);
499 
500 static struct attribute *cxl_pmu_cpumask_attrs[] = {
501 	&dev_attr_cpumask.attr,
502 	NULL
503 };
504 
505 static const struct attribute_group cxl_pmu_cpumask_group = {
506 	.attrs = cxl_pmu_cpumask_attrs,
507 };
508 
509 static const struct attribute_group *cxl_pmu_attr_groups[] = {
510 	&cxl_pmu_events,
511 	&cxl_pmu_format_group,
512 	&cxl_pmu_cpumask_group,
513 	NULL
514 };
515 
516 /* If counter_idx == NULL, don't try to allocate a counter. */
517 static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx,
518 				 int *event_idx)
519 {
520 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
521 	DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS);
522 	struct cxl_pmu_ev_cap *pmu_ev;
523 	u32 mask;
524 	u16 gid, vid;
525 	int i;
526 
527 	vid = cxl_pmu_config_get_vid(event);
528 	gid = cxl_pmu_config_get_gid(event);
529 	mask = cxl_pmu_config_get_mask(event);
530 
531 	pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask);
532 	if (!IS_ERR(pmu_ev)) {
533 		if (!counter_idx)
534 			return 0;
535 		if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) {
536 			*counter_idx = pmu_ev->counter_idx;
537 			return 0;
538 		}
539 		/* Fixed counter is in use, but maybe a configurable one? */
540 	}
541 
542 	pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask);
543 	if (!IS_ERR(pmu_ev)) {
544 		if (!counter_idx)
545 			return 0;
546 
547 		bitmap_andnot(configurable_and_free, info->conf_counter_bm,
548 			info->used_counter_bm, CXL_PMU_MAX_COUNTERS);
549 
550 		i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS);
551 		if (i == CXL_PMU_MAX_COUNTERS)
552 			return -EINVAL;
553 
554 		*counter_idx = i;
555 		return 0;
556 	}
557 
558 	return -EINVAL;
559 }
560 
561 static int cxl_pmu_event_init(struct perf_event *event)
562 {
563 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
564 	int rc;
565 
566 	/* Top level type sanity check - is this a Hardware Event being requested */
567 	if (event->attr.type != event->pmu->type)
568 		return -ENOENT;
569 
570 	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
571 		return -EOPNOTSUPP;
572 	/* TODO: Validation of any filter */
573 
574 	/*
575 	 * Verify that it is possible to count what was requested. Either must
576 	 * be a fixed counter that is a precise match or a configurable counter
577 	 * where this is a subset.
578 	 */
579 	rc = cxl_pmu_get_event_idx(event, NULL, NULL);
580 	if (rc < 0)
581 		return rc;
582 
583 	event->cpu = info->on_cpu;
584 
585 	return 0;
586 }
587 
588 static void cxl_pmu_enable(struct pmu *pmu)
589 {
590 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
591 	void __iomem *base = info->base;
592 
593 	/* Can assume frozen at this stage */
594 	writeq(0, base + CXL_PMU_FREEZE_REG);
595 }
596 
597 static void cxl_pmu_disable(struct pmu *pmu)
598 {
599 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
600 	void __iomem *base = info->base;
601 
602 	/*
603 	 * Whilst bits above number of counters are RsvdZ
604 	 * they are unlikely to be repurposed given
605 	 * number of counters is allowed to be 64 leaving
606 	 * no reserved bits.  Hence this is only slightly
607 	 * naughty.
608 	 */
609 	writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG);
610 }
611 
612 static void cxl_pmu_event_start(struct perf_event *event, int flags)
613 {
614 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
615 	struct hw_perf_event *hwc = &event->hw;
616 	void __iomem *base = info->base;
617 	u64 cfg;
618 
619 	/*
620 	 * All paths to here should either set these flags directly or
621 	 * call cxl_pmu_event_stop() which will ensure the correct state.
622 	 */
623 	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
624 		return;
625 
626 	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
627 	hwc->state = 0;
628 
629 	/*
630 	 * Currently only hdm filter control is implemnted, this code will
631 	 * want generalizing when more filters are added.
632 	 */
633 	if (info->filter_hdm) {
634 		if (cxl_pmu_config1_hdm_filter_en(event))
635 			cfg = cxl_pmu_config2_get_hdm_decoder(event);
636 		else
637 			cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */
638 		writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
639 	}
640 
641 	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
642 	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1);
643 	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1);
644 	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1);
645 	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE,
646 			  cxl_pmu_config1_get_edge(event) ? 1 : 0);
647 	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT,
648 			  cxl_pmu_config1_get_invert(event) ? 1 : 0);
649 
650 	/* Fixed purpose counters have next two fields RO */
651 	if (test_bit(hwc->idx, info->conf_counter_bm)) {
652 		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK,
653 				  hwc->event_base);
654 		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK,
655 				  cxl_pmu_config_get_mask(event));
656 	}
657 	cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK;
658 	/*
659 	 * For events that generate only 1 count per clock the CXL 3.0 spec
660 	 * states the threshold shall be set to 1 but if set to 0 it will
661 	 * count the raw value anwyay?
662 	 * There is no definition of what events will count multiple per cycle
663 	 * and hence to which non 1 values of threshold can apply.
664 	 * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition)
665 	 */
666 	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK,
667 			  cxl_pmu_config1_get_threshold(event));
668 	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
669 
670 	local64_set(&hwc->prev_count, 0);
671 	writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx));
672 
673 	perf_event_update_userpage(event);
674 }
675 
676 static u64 cxl_pmu_read_counter(struct perf_event *event)
677 {
678 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
679 	void __iomem *base = info->base;
680 
681 	return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx));
682 }
683 
684 static void __cxl_pmu_read(struct perf_event *event, bool overflow)
685 {
686 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
687 	struct hw_perf_event *hwc = &event->hw;
688 	u64 new_cnt, prev_cnt, delta;
689 
690 	do {
691 		prev_cnt = local64_read(&hwc->prev_count);
692 		new_cnt = cxl_pmu_read_counter(event);
693 	} while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt);
694 
695 	/*
696 	 * If we know an overflow occur then take that into account.
697 	 * Note counter is not reset as that would lose events
698 	 */
699 	delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0);
700 	if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0))
701 		delta += (1UL << info->counter_width);
702 
703 	local64_add(delta, &event->count);
704 }
705 
706 static void cxl_pmu_read(struct perf_event *event)
707 {
708 	__cxl_pmu_read(event, false);
709 }
710 
711 static void cxl_pmu_event_stop(struct perf_event *event, int flags)
712 {
713 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
714 	void __iomem *base = info->base;
715 	struct hw_perf_event *hwc = &event->hw;
716 	u64 cfg;
717 
718 	cxl_pmu_read(event);
719 	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
720 	hwc->state |= PERF_HES_STOPPED;
721 
722 	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
723 	cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) |
724 		 FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1));
725 	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
726 
727 	hwc->state |= PERF_HES_UPTODATE;
728 }
729 
730 static int cxl_pmu_event_add(struct perf_event *event, int flags)
731 {
732 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
733 	struct hw_perf_event *hwc = &event->hw;
734 	int idx, rc;
735 	int event_idx = 0;
736 
737 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
738 
739 	rc = cxl_pmu_get_event_idx(event, &idx, &event_idx);
740 	if (rc < 0)
741 		return rc;
742 
743 	hwc->idx = idx;
744 
745 	/* Only set for configurable counters */
746 	hwc->event_base = event_idx;
747 	info->hw_events[idx] = event;
748 	set_bit(idx, info->used_counter_bm);
749 
750 	if (flags & PERF_EF_START)
751 		cxl_pmu_event_start(event, PERF_EF_RELOAD);
752 
753 	return 0;
754 }
755 
756 static void cxl_pmu_event_del(struct perf_event *event, int flags)
757 {
758 	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
759 	struct hw_perf_event *hwc = &event->hw;
760 
761 	cxl_pmu_event_stop(event, PERF_EF_UPDATE);
762 	clear_bit(hwc->idx, info->used_counter_bm);
763 	info->hw_events[hwc->idx] = NULL;
764 	perf_event_update_userpage(event);
765 }
766 
767 static irqreturn_t cxl_pmu_irq(int irq, void *data)
768 {
769 	struct cxl_pmu_info *info = data;
770 	void __iomem *base = info->base;
771 	u64 overflowed;
772 	DECLARE_BITMAP(overflowedbm, 64);
773 	int i;
774 
775 	overflowed = readq(base + CXL_PMU_OVERFLOW_REG);
776 
777 	/* Interrupt may be shared, so maybe it isn't ours */
778 	if (!overflowed)
779 		return IRQ_NONE;
780 
781 	bitmap_from_arr64(overflowedbm, &overflowed, 64);
782 	for_each_set_bit(i, overflowedbm, info->num_counters) {
783 		struct perf_event *event = info->hw_events[i];
784 
785 		if (!event) {
786 			dev_dbg(info->pmu.dev,
787 				"overflow but on non enabled counter %d\n", i);
788 			continue;
789 		}
790 
791 		__cxl_pmu_read(event, true);
792 	}
793 
794 	writeq(overflowed, base + CXL_PMU_OVERFLOW_REG);
795 
796 	return IRQ_HANDLED;
797 }
798 
799 static void cxl_pmu_perf_unregister(void *_info)
800 {
801 	struct cxl_pmu_info *info = _info;
802 
803 	perf_pmu_unregister(&info->pmu);
804 }
805 
806 static void cxl_pmu_cpuhp_remove(void *_info)
807 {
808 	struct cxl_pmu_info *info = _info;
809 
810 	cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node);
811 }
812 
813 static int cxl_pmu_probe(struct device *dev)
814 {
815 	struct cxl_pmu *pmu = to_cxl_pmu(dev);
816 	struct pci_dev *pdev = to_pci_dev(dev->parent);
817 	struct cxl_pmu_info *info;
818 	char *irq_name;
819 	char *dev_name;
820 	int rc, irq;
821 
822 	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
823 	if (!info)
824 		return -ENOMEM;
825 
826 	dev_set_drvdata(dev, info);
827 	INIT_LIST_HEAD(&info->event_caps_fixed);
828 	INIT_LIST_HEAD(&info->event_caps_configurable);
829 
830 	info->base = pmu->base;
831 
832 	info->on_cpu = -1;
833 	rc = cxl_pmu_parse_caps(dev, info);
834 	if (rc)
835 		return rc;
836 
837 	info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events),
838 				       info->num_counters, GFP_KERNEL);
839 	if (!info->hw_events)
840 		return -ENOMEM;
841 
842 	switch (pmu->type) {
843 	case CXL_PMU_MEMDEV:
844 		dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d",
845 					  pmu->assoc_id, pmu->index);
846 		break;
847 	}
848 	if (!dev_name)
849 		return -ENOMEM;
850 
851 	info->pmu = (struct pmu) {
852 		.name = dev_name,
853 		.parent = dev,
854 		.module = THIS_MODULE,
855 		.event_init = cxl_pmu_event_init,
856 		.pmu_enable = cxl_pmu_enable,
857 		.pmu_disable = cxl_pmu_disable,
858 		.add = cxl_pmu_event_add,
859 		.del = cxl_pmu_event_del,
860 		.start = cxl_pmu_event_start,
861 		.stop = cxl_pmu_event_stop,
862 		.read = cxl_pmu_read,
863 		.task_ctx_nr = perf_invalid_context,
864 		.attr_groups = cxl_pmu_attr_groups,
865 		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
866 	};
867 
868 	if (info->irq <= 0)
869 		return -EINVAL;
870 
871 	rc = pci_irq_vector(pdev, info->irq);
872 	if (rc < 0)
873 		return rc;
874 	irq = rc;
875 
876 	irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name);
877 	if (!irq_name)
878 		return -ENOMEM;
879 
880 	rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT,
881 			      irq_name, info);
882 	if (rc)
883 		return rc;
884 	info->irq = irq;
885 
886 	rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node);
887 	if (rc)
888 		return rc;
889 
890 	rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info);
891 	if (rc)
892 		return rc;
893 
894 	rc = perf_pmu_register(&info->pmu, info->pmu.name, -1);
895 	if (rc)
896 		return rc;
897 
898 	rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info);
899 	if (rc)
900 		return rc;
901 
902 	return 0;
903 }
904 
905 static struct cxl_driver cxl_pmu_driver = {
906 	.name = "cxl_pmu",
907 	.probe = cxl_pmu_probe,
908 	.id = CXL_DEVICE_PMU,
909 };
910 
911 static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
912 {
913 	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
914 
915 	if (info->on_cpu != -1)
916 		return 0;
917 
918 	info->on_cpu = cpu;
919 	/*
920 	 * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet
921 	 * gone away again.
922 	 */
923 	WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu)));
924 
925 	return 0;
926 }
927 
928 static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
929 {
930 	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
931 	unsigned int target;
932 
933 	if (info->on_cpu != cpu)
934 		return 0;
935 
936 	info->on_cpu = -1;
937 	target = cpumask_any_but(cpu_online_mask, cpu);
938 	if (target >= nr_cpu_ids) {
939 		dev_err(info->pmu.dev, "Unable to find a suitable CPU\n");
940 		return 0;
941 	}
942 
943 	perf_pmu_migrate_context(&info->pmu, cpu, target);
944 	info->on_cpu = target;
945 	/*
946 	 * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet
947 	 * gone away.
948 	 */
949 	WARN_ON(irq_set_affinity(info->irq, cpumask_of(target)));
950 
951 	return 0;
952 }
953 
954 static __init int cxl_pmu_init(void)
955 {
956 	int rc;
957 
958 	rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
959 				     "AP_PERF_CXL_PMU_ONLINE",
960 				     cxl_pmu_online_cpu, cxl_pmu_offline_cpu);
961 	if (rc < 0)
962 		return rc;
963 	cxl_pmu_cpuhp_state_num = rc;
964 
965 	rc = cxl_driver_register(&cxl_pmu_driver);
966 	if (rc)
967 		cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
968 
969 	return rc;
970 }
971 
972 static __exit void cxl_pmu_exit(void)
973 {
974 	cxl_driver_unregister(&cxl_pmu_driver);
975 	cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
976 }
977 
978 MODULE_DESCRIPTION("CXL Performance Monitor Driver");
979 MODULE_LICENSE("GPL");
980 MODULE_IMPORT_NS(CXL);
981 module_init(cxl_pmu_init);
982 module_exit(cxl_pmu_exit);
983 MODULE_ALIAS_CXL(CXL_DEVICE_PMU);
984