xref: /linux/drivers/perf/arm_cspmu/nvidia_cspmu.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4  *
5  */
6 
7 /* Support for NVIDIA specific attributes. */
8 
9 #include <linux/io.h>
10 #include <linux/module.h>
11 #include <linux/topology.h>
12 
13 #include "arm_cspmu.h"
14 
15 #define NV_PCIE_PORT_COUNT           10ULL
16 #define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
17 
18 #define NV_NVL_C2C_PORT_COUNT        2ULL
19 #define NV_NVL_C2C_FILTER_ID_MASK    GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
20 
21 #define NV_CNVL_PORT_COUNT           4ULL
22 #define NV_CNVL_FILTER_ID_MASK       GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
23 
24 #define NV_GENERIC_FILTER_ID_MASK    GENMASK_ULL(31, 0)
25 
26 #define NV_PRODID_MASK	(PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION)
27 
28 #define NV_FORMAT_NAME_GENERIC	0
29 
30 #define to_nv_cspmu_ctx(cspmu)	((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
31 
32 #define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config)	\
33 	ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
34 
35 #define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config)			\
36 	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config),	\
37 	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1),	\
38 	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2),	\
39 	NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
40 
41 struct nv_cspmu_ctx {
42 	const char *name;
43 
44 	struct attribute **event_attr;
45 	struct attribute **format_attr;
46 
47 	u32 filter_mask;
48 	u32 filter_default_val;
49 	u32 filter2_mask;
50 	u32 filter2_default_val;
51 
52 	u32 (*get_filter)(const struct perf_event *event);
53 	u32 (*get_filter2)(const struct perf_event *event);
54 
55 	void *data;
56 
57 	int (*init_data)(struct arm_cspmu *cspmu);
58 };
59 
60 static struct attribute *scf_pmu_event_attrs[] = {
61 	ARM_CSPMU_EVENT_ATTR(bus_cycles,			0x1d),
62 
63 	ARM_CSPMU_EVENT_ATTR(scf_cache_allocate,		0xF0),
64 	ARM_CSPMU_EVENT_ATTR(scf_cache_refill,			0xF1),
65 	ARM_CSPMU_EVENT_ATTR(scf_cache,				0xF2),
66 	ARM_CSPMU_EVENT_ATTR(scf_cache_wb,			0xF3),
67 
68 	NV_CSPMU_EVENT_ATTR_4(socket, rd_data,			0x101),
69 	NV_CSPMU_EVENT_ATTR_4(socket, wb_data,			0x109),
70 
71 	NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding,		0x115),
72 
73 	NV_CSPMU_EVENT_ATTR_4(socket, rd_access,		0x12d),
74 	NV_CSPMU_EVENT_ATTR_4(socket, wb_access,		0x135),
75 	NV_CSPMU_EVENT_ATTR_4(socket, wr_access,		0x139),
76 
77 	ARM_CSPMU_EVENT_ATTR(gmem_rd_data,			0x16d),
78 	ARM_CSPMU_EVENT_ATTR(gmem_rd_access,			0x16e),
79 	ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding,		0x16f),
80 	ARM_CSPMU_EVENT_ATTR(gmem_wb_data,			0x173),
81 	ARM_CSPMU_EVENT_ATTR(gmem_wb_access,			0x174),
82 	ARM_CSPMU_EVENT_ATTR(gmem_wr_data,			0x179),
83 	ARM_CSPMU_EVENT_ATTR(gmem_wr_access,			0x17b),
84 
85 	NV_CSPMU_EVENT_ATTR_4(socket, wr_data,			0x17c),
86 
87 	ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes,		0x1a0),
88 	ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes,	0x1a1),
89 	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data,		0x1a2),
90 	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding,	0x1a3),
91 	ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access,		0x1a4),
92 
93 	ARM_CSPMU_EVENT_ATTR(cmem_rd_data,			0x1a5),
94 	ARM_CSPMU_EVENT_ATTR(cmem_rd_access,			0x1a6),
95 	ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding,		0x1a7),
96 	ARM_CSPMU_EVENT_ATTR(cmem_wb_data,			0x1ab),
97 	ARM_CSPMU_EVENT_ATTR(cmem_wb_access,			0x1ac),
98 	ARM_CSPMU_EVENT_ATTR(cmem_wr_data,			0x1b1),
99 
100 	ARM_CSPMU_EVENT_ATTR(cmem_wr_access,			0x1ca),
101 
102 	ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes,		0x1db),
103 
104 	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
105 	NULL,
106 };
107 
108 static struct attribute *mcf_pmu_event_attrs[] = {
109 	ARM_CSPMU_EVENT_ATTR(rd_bytes_loc,			0x0),
110 	ARM_CSPMU_EVENT_ATTR(rd_bytes_rem,			0x1),
111 	ARM_CSPMU_EVENT_ATTR(wr_bytes_loc,			0x2),
112 	ARM_CSPMU_EVENT_ATTR(wr_bytes_rem,			0x3),
113 	ARM_CSPMU_EVENT_ATTR(total_bytes_loc,			0x4),
114 	ARM_CSPMU_EVENT_ATTR(total_bytes_rem,			0x5),
115 	ARM_CSPMU_EVENT_ATTR(rd_req_loc,			0x6),
116 	ARM_CSPMU_EVENT_ATTR(rd_req_rem,			0x7),
117 	ARM_CSPMU_EVENT_ATTR(wr_req_loc,			0x8),
118 	ARM_CSPMU_EVENT_ATTR(wr_req_rem,			0x9),
119 	ARM_CSPMU_EVENT_ATTR(total_req_loc,			0xa),
120 	ARM_CSPMU_EVENT_ATTR(total_req_rem,			0xb),
121 	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc,			0xc),
122 	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem,			0xd),
123 	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
124 	NULL,
125 };
126 
127 static struct attribute *generic_pmu_event_attrs[] = {
128 	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
129 	NULL,
130 };
131 
132 static struct attribute *scf_pmu_format_attrs[] = {
133 	ARM_CSPMU_FORMAT_EVENT_ATTR,
134 	NULL,
135 };
136 
137 static struct attribute *pcie_pmu_format_attrs[] = {
138 	ARM_CSPMU_FORMAT_EVENT_ATTR,
139 	ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
140 	NULL,
141 };
142 
143 static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
144 	ARM_CSPMU_FORMAT_EVENT_ATTR,
145 	ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
146 	NULL,
147 };
148 
149 static struct attribute *cnvlink_pmu_format_attrs[] = {
150 	ARM_CSPMU_FORMAT_EVENT_ATTR,
151 	ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
152 	NULL,
153 };
154 
155 static struct attribute *generic_pmu_format_attrs[] = {
156 	ARM_CSPMU_FORMAT_EVENT_ATTR,
157 	ARM_CSPMU_FORMAT_FILTER_ATTR,
158 	ARM_CSPMU_FORMAT_FILTER2_ATTR,
159 	NULL,
160 };
161 
162 static struct attribute **
163 nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
164 {
165 	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
166 
167 	return ctx->event_attr;
168 }
169 
170 static struct attribute **
171 nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
172 {
173 	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
174 
175 	return ctx->format_attr;
176 }
177 
178 static const char *
179 nv_cspmu_get_name(const struct arm_cspmu *cspmu)
180 {
181 	const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
182 
183 	return ctx->name;
184 }
185 
186 static u32 nv_cspmu_event_filter(const struct perf_event *event)
187 {
188 	const struct nv_cspmu_ctx *ctx =
189 		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
190 
191 	const u32 filter_val = event->attr.config1 & ctx->filter_mask;
192 
193 	if (filter_val == 0)
194 		return ctx->filter_default_val;
195 
196 	return filter_val;
197 }
198 
199 static u32 nv_cspmu_event_filter2(const struct perf_event *event)
200 {
201 	const struct nv_cspmu_ctx *ctx =
202 		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
203 
204 	const u32 filter_val = event->attr.config2 & ctx->filter2_mask;
205 
206 	if (filter_val == 0)
207 		return ctx->filter2_default_val;
208 
209 	return filter_val;
210 }
211 
212 static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
213 				   const struct perf_event *event)
214 {
215 	u32 filter, offset;
216 	const struct nv_cspmu_ctx *ctx =
217 		to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
218 	offset = 4 * event->hw.idx;
219 
220 	if (ctx->get_filter) {
221 		filter = ctx->get_filter(event);
222 		writel(filter, cspmu->base0 + PMEVFILTR + offset);
223 	}
224 
225 	if (ctx->get_filter2) {
226 		filter = ctx->get_filter2(event);
227 		writel(filter, cspmu->base0 + PMEVFILT2R + offset);
228 	}
229 }
230 
231 static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
232 				   const struct perf_event *event)
233 {
234 	u32 filter = nv_cspmu_event_filter(event);
235 
236 	writel(filter, cspmu->base0 + PMCCFILTR);
237 }
238 
239 
240 enum nv_cspmu_name_fmt {
241 	NAME_FMT_GENERIC,
242 	NAME_FMT_SOCKET
243 };
244 
245 struct nv_cspmu_match {
246 	u32 prodid;
247 	u32 prodid_mask;
248 	const char *name_pattern;
249 	enum nv_cspmu_name_fmt name_fmt;
250 	struct nv_cspmu_ctx template_ctx;
251 	struct arm_cspmu_impl_ops ops;
252 };
253 
254 static const struct nv_cspmu_match nv_cspmu_match[] = {
255 	{
256 	  .prodid = 0x10300000,
257 	  .prodid_mask = NV_PRODID_MASK,
258 	  .name_pattern = "nvidia_pcie_pmu_%u",
259 	  .name_fmt = NAME_FMT_SOCKET,
260 	  .template_ctx = {
261 		.event_attr = mcf_pmu_event_attrs,
262 		.format_attr = pcie_pmu_format_attrs,
263 		.filter_mask = NV_PCIE_FILTER_ID_MASK,
264 		.filter_default_val = NV_PCIE_FILTER_ID_MASK,
265 		.filter2_mask = 0x0,
266 		.filter2_default_val = 0x0,
267 		.get_filter = nv_cspmu_event_filter,
268 		.get_filter2 = NULL,
269 		.data = NULL,
270 		.init_data = NULL
271 	  },
272 	},
273 	{
274 	  .prodid = 0x10400000,
275 	  .prodid_mask = NV_PRODID_MASK,
276 	  .name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
277 	  .name_fmt = NAME_FMT_SOCKET,
278 	  .template_ctx = {
279 		.event_attr = mcf_pmu_event_attrs,
280 		.format_attr = nvlink_c2c_pmu_format_attrs,
281 		.filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
282 		.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
283 		.filter2_mask = 0x0,
284 		.filter2_default_val = 0x0,
285 		.get_filter = nv_cspmu_event_filter,
286 		.get_filter2 = NULL,
287 		.data = NULL,
288 		.init_data = NULL
289 	  },
290 	},
291 	{
292 	  .prodid = 0x10500000,
293 	  .prodid_mask = NV_PRODID_MASK,
294 	  .name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
295 	  .name_fmt = NAME_FMT_SOCKET,
296 	  .template_ctx = {
297 		.event_attr = mcf_pmu_event_attrs,
298 		.format_attr = nvlink_c2c_pmu_format_attrs,
299 		.filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
300 		.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
301 		.filter2_mask = 0x0,
302 		.filter2_default_val = 0x0,
303 		.get_filter = nv_cspmu_event_filter,
304 		.get_filter2 = NULL,
305 		.data = NULL,
306 		.init_data = NULL
307 	  },
308 	},
309 	{
310 	  .prodid = 0x10600000,
311 	  .prodid_mask = NV_PRODID_MASK,
312 	  .name_pattern = "nvidia_cnvlink_pmu_%u",
313 	  .name_fmt = NAME_FMT_SOCKET,
314 	  .template_ctx = {
315 		.event_attr = mcf_pmu_event_attrs,
316 		.format_attr = cnvlink_pmu_format_attrs,
317 		.filter_mask = NV_CNVL_FILTER_ID_MASK,
318 		.filter_default_val = NV_CNVL_FILTER_ID_MASK,
319 		.filter2_mask = 0x0,
320 		.filter2_default_val = 0x0,
321 		.get_filter = nv_cspmu_event_filter,
322 		.get_filter2 = NULL,
323 		.data = NULL,
324 		.init_data = NULL
325 	  },
326 	},
327 	{
328 	  .prodid = 0x2CF00000,
329 	  .prodid_mask = NV_PRODID_MASK,
330 	  .name_pattern = "nvidia_scf_pmu_%u",
331 	  .name_fmt = NAME_FMT_SOCKET,
332 	  .template_ctx = {
333 		.event_attr = scf_pmu_event_attrs,
334 		.format_attr = scf_pmu_format_attrs,
335 		.filter_mask = 0x0,
336 		.filter_default_val = 0x0,
337 		.filter2_mask = 0x0,
338 		.filter2_default_val = 0x0,
339 		.get_filter = nv_cspmu_event_filter,
340 		.get_filter2 = NULL,
341 		.data = NULL,
342 		.init_data = NULL
343 	  },
344 	},
345 	{
346 	  .prodid = 0,
347 	  .prodid_mask = 0,
348 	  .name_pattern = "nvidia_uncore_pmu_%u",
349 	  .name_fmt = NAME_FMT_GENERIC,
350 	  .template_ctx = {
351 		.event_attr = generic_pmu_event_attrs,
352 		.format_attr = generic_pmu_format_attrs,
353 		.filter_mask = NV_GENERIC_FILTER_ID_MASK,
354 		.filter_default_val = NV_GENERIC_FILTER_ID_MASK,
355 		.filter2_mask = NV_GENERIC_FILTER_ID_MASK,
356 		.filter2_default_val = NV_GENERIC_FILTER_ID_MASK,
357 		.get_filter = nv_cspmu_event_filter,
358 		.get_filter2 = nv_cspmu_event_filter2,
359 		.data = NULL,
360 		.init_data = NULL
361 	  },
362 	},
363 };
364 
365 static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
366 				  const struct nv_cspmu_match *match)
367 {
368 	char *name;
369 	struct device *dev = cspmu->dev;
370 
371 	static atomic_t pmu_generic_idx = {0};
372 
373 	switch (match->name_fmt) {
374 	case NAME_FMT_SOCKET: {
375 		const int cpu = cpumask_first(&cspmu->associated_cpus);
376 		const int socket = cpu_to_node(cpu);
377 
378 		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
379 				       socket);
380 		break;
381 	}
382 	case NAME_FMT_GENERIC:
383 		name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
384 				       atomic_fetch_inc(&pmu_generic_idx));
385 		break;
386 	default:
387 		name = NULL;
388 		break;
389 	}
390 
391 	return name;
392 }
393 
394 #define SET_OP(name, impl, match, default_op) \
395 	do { \
396 		if (match->ops.name) \
397 			impl->name = match->ops.name; \
398 		else if (default_op != NULL) \
399 			impl->name = default_op; \
400 	} while (false)
401 
402 static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
403 {
404 	struct nv_cspmu_ctx *ctx;
405 	struct device *dev = cspmu->dev;
406 	struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
407 	const struct nv_cspmu_match *match = nv_cspmu_match;
408 
409 	ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
410 	if (!ctx)
411 		return -ENOMEM;
412 
413 	/* Find matching PMU. */
414 	for (; match->prodid; match++) {
415 		const u32 prodid_mask = match->prodid_mask;
416 
417 		if ((match->prodid & prodid_mask) ==
418 		    (cspmu->impl.pmiidr & prodid_mask))
419 			break;
420 	}
421 
422 	/* Initialize the context with the matched template. */
423 	memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx));
424 	ctx->name = nv_cspmu_format_name(cspmu, match);
425 
426 	cspmu->impl.ctx = ctx;
427 
428 	/* NVIDIA specific callbacks. */
429 	SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter);
430 	SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter);
431 	SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs);
432 	SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs);
433 	SET_OP(get_name, impl_ops, match, nv_cspmu_get_name);
434 
435 	if (ctx->init_data)
436 		return ctx->init_data(cspmu);
437 
438 	return 0;
439 }
440 
441 /* Match all NVIDIA Coresight PMU devices */
442 static const struct arm_cspmu_impl_match nv_cspmu_param = {
443 	.pmiidr_val	= ARM_CSPMU_IMPL_ID_NVIDIA,
444 	.module		= THIS_MODULE,
445 	.impl_init_ops	= nv_cspmu_init_ops
446 };
447 
448 static int __init nvidia_cspmu_init(void)
449 {
450 	int ret;
451 
452 	ret = arm_cspmu_impl_register(&nv_cspmu_param);
453 	if (ret)
454 		pr_err("nvidia_cspmu backend registration error: %d\n", ret);
455 
456 	return ret;
457 }
458 
459 static void __exit nvidia_cspmu_exit(void)
460 {
461 	arm_cspmu_impl_unregister(&nv_cspmu_param);
462 }
463 
464 module_init(nvidia_cspmu_init);
465 module_exit(nvidia_cspmu_exit);
466 
467 MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver");
468 MODULE_LICENSE("GPL v2");
469