xref: /linux/drivers/perf/marvell_pem_pmu.c (revision c34e9ab9a612ee8b18273398ef75c207b01f516d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Marvell PEM(PCIe RC) Performance Monitor Driver
4  *
5  * Copyright (C) 2024 Marvell.
6  */
7 
8 #include <linux/acpi.h>
9 #include <linux/init.h>
10 #include <linux/io.h>
11 #include <linux/module.h>
12 #include <linux/perf_event.h>
13 #include <linux/platform_device.h>
14 
15 /*
16  * Each of these events maps to a free running 64 bit counter
17  * with no event control, but can be reset.
18  */
19 enum pem_events {
20 	IB_TLP_NPR,
21 	IB_TLP_PR,
22 	IB_TLP_CPL,
23 	IB_TLP_DWORDS_NPR,
24 	IB_TLP_DWORDS_PR,
25 	IB_TLP_DWORDS_CPL,
26 	IB_INFLIGHT,
27 	IB_READS,
28 	IB_REQ_NO_RO_NCB,
29 	IB_REQ_NO_RO_EBUS,
30 	OB_TLP_NPR,
31 	OB_TLP_PR,
32 	OB_TLP_CPL,
33 	OB_TLP_DWORDS_NPR,
34 	OB_TLP_DWORDS_PR,
35 	OB_TLP_DWORDS_CPL,
36 	OB_INFLIGHT,
37 	OB_READS,
38 	OB_MERGES_NPR,
39 	OB_MERGES_PR,
40 	OB_MERGES_CPL,
41 	ATS_TRANS,
42 	ATS_TRANS_LATENCY,
43 	ATS_PRI,
44 	ATS_PRI_LATENCY,
45 	ATS_INV,
46 	ATS_INV_LATENCY,
47 	PEM_EVENTIDS_MAX
48 };
49 
50 static u64 eventid_to_offset_table[] = {
51 	[IB_TLP_NPR]	     = 0x0,
52 	[IB_TLP_PR]	     = 0x8,
53 	[IB_TLP_CPL]	     = 0x10,
54 	[IB_TLP_DWORDS_NPR]  = 0x100,
55 	[IB_TLP_DWORDS_PR]   = 0x108,
56 	[IB_TLP_DWORDS_CPL]  = 0x110,
57 	[IB_INFLIGHT]	     = 0x200,
58 	[IB_READS]	     = 0x300,
59 	[IB_REQ_NO_RO_NCB]   = 0x400,
60 	[IB_REQ_NO_RO_EBUS]  = 0x408,
61 	[OB_TLP_NPR]         = 0x500,
62 	[OB_TLP_PR]          = 0x508,
63 	[OB_TLP_CPL]         = 0x510,
64 	[OB_TLP_DWORDS_NPR]  = 0x600,
65 	[OB_TLP_DWORDS_PR]   = 0x608,
66 	[OB_TLP_DWORDS_CPL]  = 0x610,
67 	[OB_INFLIGHT]        = 0x700,
68 	[OB_READS]	     = 0x800,
69 	[OB_MERGES_NPR]      = 0x900,
70 	[OB_MERGES_PR]       = 0x908,
71 	[OB_MERGES_CPL]      = 0x910,
72 	[ATS_TRANS]          = 0x2D18,
73 	[ATS_TRANS_LATENCY]  = 0x2D20,
74 	[ATS_PRI]            = 0x2D28,
75 	[ATS_PRI_LATENCY]    = 0x2D30,
76 	[ATS_INV]            = 0x2D38,
77 	[ATS_INV_LATENCY]    = 0x2D40,
78 };
79 
80 struct pem_pmu {
81 	struct pmu pmu;
82 	void __iomem *base;
83 	unsigned int cpu;
84 	struct	device *dev;
85 	struct hlist_node node;
86 };
87 
88 #define to_pem_pmu(p)	container_of(p, struct pem_pmu, pmu)
89 
90 static int eventid_to_offset(int eventid)
91 {
92 	return eventid_to_offset_table[eventid];
93 }
94 
95 /* Events */
96 static ssize_t pem_pmu_event_show(struct device *dev,
97 				  struct device_attribute *attr,
98 				  char *page)
99 {
100 	struct perf_pmu_events_attr *pmu_attr;
101 
102 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
103 	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
104 }
105 
106 #define PEM_EVENT_ATTR(_name, _id)					\
107 	(&((struct perf_pmu_events_attr[]) {				\
108 	{ .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL),	\
109 		.id = _id, }						\
110 	})[0].attr.attr)
111 
112 static struct attribute *pem_perf_events_attrs[] = {
113 	PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR),
114 	PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR),
115 	PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL),
116 	PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR),
117 	PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR),
118 	PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL),
119 	PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT),
120 	PEM_EVENT_ATTR(ib_reads, IB_READS),
121 	PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB),
122 	PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS),
123 	PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR),
124 	PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR),
125 	PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL),
126 	PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR),
127 	PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR),
128 	PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL),
129 	PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT),
130 	PEM_EVENT_ATTR(ob_reads_partid, OB_READS),
131 	PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR),
132 	PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR),
133 	PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL),
134 	PEM_EVENT_ATTR(ats_trans, ATS_TRANS),
135 	PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY),
136 	PEM_EVENT_ATTR(ats_pri, ATS_PRI),
137 	PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY),
138 	PEM_EVENT_ATTR(ats_inv, ATS_INV),
139 	PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY),
140 	NULL
141 };
142 
143 static struct attribute_group pem_perf_events_attr_group = {
144 	.name = "events",
145 	.attrs = pem_perf_events_attrs,
146 };
147 
148 PMU_FORMAT_ATTR(event, "config:0-5");
149 
150 static struct attribute *pem_perf_format_attrs[] = {
151 	&format_attr_event.attr,
152 	NULL
153 };
154 
155 static struct attribute_group pem_perf_format_attr_group = {
156 	.name = "format",
157 	.attrs = pem_perf_format_attrs,
158 };
159 
160 /* cpumask */
161 static ssize_t pem_perf_cpumask_show(struct device *dev,
162 				     struct device_attribute *attr,
163 				     char *buf)
164 {
165 	struct pem_pmu *pmu = dev_get_drvdata(dev);
166 
167 	return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
168 }
169 
170 static struct device_attribute pem_perf_cpumask_attr =
171 	__ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL);
172 
173 static struct attribute *pem_perf_cpumask_attrs[] = {
174 	&pem_perf_cpumask_attr.attr,
175 	NULL
176 };
177 
178 static struct attribute_group pem_perf_cpumask_attr_group = {
179 	.attrs = pem_perf_cpumask_attrs,
180 };
181 
182 static const struct attribute_group *pem_perf_attr_groups[] = {
183 	&pem_perf_events_attr_group,
184 	&pem_perf_cpumask_attr_group,
185 	&pem_perf_format_attr_group,
186 	NULL
187 };
188 
189 static int pem_perf_event_init(struct perf_event *event)
190 {
191 	struct pem_pmu *pmu = to_pem_pmu(event->pmu);
192 	struct hw_perf_event *hwc = &event->hw;
193 	struct perf_event *sibling;
194 
195 	if (event->attr.type != event->pmu->type)
196 		return -ENOENT;
197 
198 	if (event->attr.config >= PEM_EVENTIDS_MAX)
199 		return -EINVAL;
200 
201 	if (is_sampling_event(event) ||
202 	    event->attach_state & PERF_ATTACH_TASK) {
203 		return -EOPNOTSUPP;
204 	}
205 
206 	if (event->cpu < 0)
207 		return -EOPNOTSUPP;
208 
209 	/*  We must NOT create groups containing mixed PMUs */
210 	if (event->group_leader->pmu != event->pmu &&
211 	    !is_software_event(event->group_leader))
212 		return -EINVAL;
213 
214 	for_each_sibling_event(sibling, event->group_leader) {
215 		if (sibling->pmu != event->pmu &&
216 		    !is_software_event(sibling))
217 			return -EINVAL;
218 	}
219 	/*
220 	 * Set ownership of event to one CPU, same event can not be observed
221 	 * on multiple cpus at same time.
222 	 */
223 	event->cpu = pmu->cpu;
224 	hwc->idx = -1;
225 	return 0;
226 }
227 
228 static u64 pem_perf_read_counter(struct pem_pmu *pmu,
229 				 struct perf_event *event, int eventid)
230 {
231 	return readq_relaxed(pmu->base + eventid_to_offset(eventid));
232 }
233 
234 static void pem_perf_event_update(struct perf_event *event)
235 {
236 	struct pem_pmu *pmu = to_pem_pmu(event->pmu);
237 	struct hw_perf_event *hwc = &event->hw;
238 	u64 prev_count, new_count;
239 
240 	do {
241 		prev_count = local64_read(&hwc->prev_count);
242 		new_count = pem_perf_read_counter(pmu, event, hwc->idx);
243 	} while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
244 
245 	local64_add((new_count - prev_count), &event->count);
246 }
247 
248 static void pem_perf_event_start(struct perf_event *event, int flags)
249 {
250 	struct pem_pmu *pmu = to_pem_pmu(event->pmu);
251 	struct hw_perf_event *hwc = &event->hw;
252 	int eventid = hwc->idx;
253 
254 	/*
255 	 * All counters are free-running and associated with
256 	 * a fixed event to track in Hardware
257 	 */
258 	local64_set(&hwc->prev_count,
259 		    pem_perf_read_counter(pmu, event, eventid));
260 
261 	hwc->state = 0;
262 }
263 
264 static int pem_perf_event_add(struct perf_event *event, int flags)
265 {
266 	struct hw_perf_event *hwc = &event->hw;
267 
268 	hwc->idx = event->attr.config;
269 	if (WARN_ON_ONCE(hwc->idx >= PEM_EVENTIDS_MAX))
270 		return -EINVAL;
271 	hwc->state |= PERF_HES_STOPPED;
272 
273 	if (flags & PERF_EF_START)
274 		pem_perf_event_start(event, flags);
275 
276 	return 0;
277 }
278 
279 static void pem_perf_event_stop(struct perf_event *event, int flags)
280 {
281 	struct hw_perf_event *hwc = &event->hw;
282 
283 	if (flags & PERF_EF_UPDATE)
284 		pem_perf_event_update(event);
285 
286 	hwc->state |= PERF_HES_STOPPED;
287 }
288 
289 static void pem_perf_event_del(struct perf_event *event, int flags)
290 {
291 	struct hw_perf_event *hwc = &event->hw;
292 
293 	pem_perf_event_stop(event, PERF_EF_UPDATE);
294 	hwc->idx = -1;
295 }
296 
297 static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
298 {
299 	struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu, node);
300 	unsigned int target;
301 
302 	if (cpu != pmu->cpu)
303 		return 0;
304 
305 	target = cpumask_any_but(cpu_online_mask, cpu);
306 	if (target >= nr_cpu_ids)
307 		return 0;
308 
309 	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
310 	pmu->cpu = target;
311 	return 0;
312 }
313 
314 static int pem_perf_probe(struct platform_device *pdev)
315 {
316 	struct pem_pmu *pem_pmu;
317 	struct resource *res;
318 	void __iomem *base;
319 	char *name;
320 	int ret;
321 
322 	pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL);
323 	if (!pem_pmu)
324 		return -ENOMEM;
325 
326 	pem_pmu->dev = &pdev->dev;
327 	platform_set_drvdata(pdev, pem_pmu);
328 
329 	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
330 	if (IS_ERR(base))
331 		return PTR_ERR(base);
332 
333 	pem_pmu->base = base;
334 
335 	pem_pmu->pmu = (struct pmu) {
336 		.module	      = THIS_MODULE,
337 		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
338 		.task_ctx_nr = perf_invalid_context,
339 		.attr_groups = pem_perf_attr_groups,
340 		.event_init  = pem_perf_event_init,
341 		.add	     = pem_perf_event_add,
342 		.del	     = pem_perf_event_del,
343 		.start	     = pem_perf_event_start,
344 		.stop	     = pem_perf_event_stop,
345 		.read	     = pem_perf_event_update,
346 	};
347 
348 	/* Choose this cpu to collect perf data */
349 	pem_pmu->cpu = raw_smp_processor_id();
350 
351 	name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx",
352 			      res->start);
353 	if (!name)
354 		return -ENOMEM;
355 
356 	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
357 					 &pem_pmu->node);
358 
359 	ret = perf_pmu_register(&pem_pmu->pmu, name, -1);
360 	if (ret)
361 		goto error;
362 
363 	return 0;
364 error:
365 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
366 					    &pem_pmu->node);
367 	return ret;
368 }
369 
370 static void pem_perf_remove(struct platform_device *pdev)
371 {
372 	struct pem_pmu *pem_pmu = platform_get_drvdata(pdev);
373 
374 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
375 					    &pem_pmu->node);
376 
377 	perf_pmu_unregister(&pem_pmu->pmu);
378 }
379 
380 #ifdef CONFIG_ACPI
381 static const struct acpi_device_id pem_pmu_acpi_match[] = {
382 	{"MRVL000E", 0},
383 	{}
384 };
385 MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match);
386 #endif
387 
388 static struct platform_driver pem_pmu_driver = {
389 	.driver	= {
390 		.name   = "pem-pmu",
391 		.acpi_match_table = ACPI_PTR(pem_pmu_acpi_match),
392 		.suppress_bind_attrs = true,
393 	},
394 	.probe		= pem_perf_probe,
395 	.remove		= pem_perf_remove,
396 };
397 
398 static int __init pem_pmu_init(void)
399 {
400 	int ret;
401 
402 	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
403 				      "perf/marvell/pem:online", NULL,
404 				       pem_pmu_offline_cpu);
405 	if (ret)
406 		return ret;
407 
408 	ret = platform_driver_register(&pem_pmu_driver);
409 	if (ret)
410 		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
411 	return ret;
412 }
413 
414 static void __exit pem_pmu_exit(void)
415 {
416 	platform_driver_unregister(&pem_pmu_driver);
417 	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
418 }
419 
420 module_init(pem_pmu_init);
421 module_exit(pem_pmu_exit);
422 
423 MODULE_DESCRIPTION("Marvell PEM Perf driver");
424 MODULE_AUTHOR("Gowthami Thiagarajan <gthiagarajan@marvell.com>");
425 MODULE_LICENSE("GPL");
426