xref: /linux/drivers/perf/nvidia_t410_cmem_latency_pmu.c (revision c43267e6794a36013fd495a4d81bf7f748fe4615)
1*429b7638SBesar Wicaksono // SPDX-License-Identifier: GPL-2.0
2*429b7638SBesar Wicaksono /*
3*429b7638SBesar Wicaksono  * NVIDIA Tegra410 CPU Memory (CMEM) Latency PMU driver.
4*429b7638SBesar Wicaksono  *
5*429b7638SBesar Wicaksono  * Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6*429b7638SBesar Wicaksono  */
7*429b7638SBesar Wicaksono 
8*429b7638SBesar Wicaksono #include <linux/acpi.h>
9*429b7638SBesar Wicaksono #include <linux/bitops.h>
10*429b7638SBesar Wicaksono #include <linux/cpumask.h>
11*429b7638SBesar Wicaksono #include <linux/device.h>
12*429b7638SBesar Wicaksono #include <linux/interrupt.h>
13*429b7638SBesar Wicaksono #include <linux/io.h>
14*429b7638SBesar Wicaksono #include <linux/module.h>
15*429b7638SBesar Wicaksono #include <linux/perf_event.h>
16*429b7638SBesar Wicaksono #include <linux/platform_device.h>
17*429b7638SBesar Wicaksono 
18*429b7638SBesar Wicaksono #define NUM_INSTANCES    14
19*429b7638SBesar Wicaksono 
20*429b7638SBesar Wicaksono /* Register offsets. */
21*429b7638SBesar Wicaksono #define CMEM_LAT_CG_CTRL         0x800
22*429b7638SBesar Wicaksono #define CMEM_LAT_CTRL            0x808
23*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS          0x810
24*429b7638SBesar Wicaksono #define CMEM_LAT_CYCLE_CNTR      0x818
25*429b7638SBesar Wicaksono #define CMEM_LAT_MC0_REQ_CNTR    0x820
26*429b7638SBesar Wicaksono #define CMEM_LAT_MC0_AOR_CNTR    0x830
27*429b7638SBesar Wicaksono #define CMEM_LAT_MC1_REQ_CNTR    0x838
28*429b7638SBesar Wicaksono #define CMEM_LAT_MC1_AOR_CNTR    0x848
29*429b7638SBesar Wicaksono #define CMEM_LAT_MC2_REQ_CNTR    0x850
30*429b7638SBesar Wicaksono #define CMEM_LAT_MC2_AOR_CNTR    0x860
31*429b7638SBesar Wicaksono 
32*429b7638SBesar Wicaksono /* CMEM_LAT_CTRL values. */
33*429b7638SBesar Wicaksono #define CMEM_LAT_CTRL_DISABLE    0x0ULL
34*429b7638SBesar Wicaksono #define CMEM_LAT_CTRL_ENABLE     0x1ULL
35*429b7638SBesar Wicaksono #define CMEM_LAT_CTRL_CLR        0x2ULL
36*429b7638SBesar Wicaksono 
37*429b7638SBesar Wicaksono /* CMEM_LAT_CG_CTRL values. */
38*429b7638SBesar Wicaksono #define CMEM_LAT_CG_CTRL_DISABLE    0x0ULL
39*429b7638SBesar Wicaksono #define CMEM_LAT_CG_CTRL_ENABLE     0x1ULL
40*429b7638SBesar Wicaksono 
41*429b7638SBesar Wicaksono /* CMEM_LAT_STATUS register field. */
42*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_CYCLE_OVF      BIT(0)
43*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_MC0_AOR_OVF    BIT(1)
44*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_MC0_REQ_OVF    BIT(3)
45*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_MC1_AOR_OVF    BIT(4)
46*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_MC1_REQ_OVF    BIT(6)
47*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_MC2_AOR_OVF    BIT(7)
48*429b7638SBesar Wicaksono #define CMEM_LAT_STATUS_MC2_REQ_OVF    BIT(9)
49*429b7638SBesar Wicaksono 
50*429b7638SBesar Wicaksono /* Events. */
51*429b7638SBesar Wicaksono #define CMEM_LAT_EVENT_CYCLES    0x0
52*429b7638SBesar Wicaksono #define CMEM_LAT_EVENT_REQ       0x1
53*429b7638SBesar Wicaksono #define CMEM_LAT_EVENT_AOR       0x2
54*429b7638SBesar Wicaksono 
55*429b7638SBesar Wicaksono #define CMEM_LAT_NUM_EVENTS           0x3
56*429b7638SBesar Wicaksono #define CMEM_LAT_MASK_EVENT           0x3
57*429b7638SBesar Wicaksono #define CMEM_LAT_MAX_ACTIVE_EVENTS    32
58*429b7638SBesar Wicaksono 
59*429b7638SBesar Wicaksono #define CMEM_LAT_ACTIVE_CPU_MASK        0x0
60*429b7638SBesar Wicaksono #define CMEM_LAT_ASSOCIATED_CPU_MASK    0x1
61*429b7638SBesar Wicaksono 
62*429b7638SBesar Wicaksono static unsigned long cmem_lat_pmu_cpuhp_state;
63*429b7638SBesar Wicaksono 
64*429b7638SBesar Wicaksono struct cmem_lat_pmu_hw_events {
65*429b7638SBesar Wicaksono 	struct perf_event *events[CMEM_LAT_MAX_ACTIVE_EVENTS];
66*429b7638SBesar Wicaksono 	DECLARE_BITMAP(used_ctrs, CMEM_LAT_MAX_ACTIVE_EVENTS);
67*429b7638SBesar Wicaksono };
68*429b7638SBesar Wicaksono 
69*429b7638SBesar Wicaksono struct cmem_lat_pmu {
70*429b7638SBesar Wicaksono 	struct pmu pmu;
71*429b7638SBesar Wicaksono 	struct device *dev;
72*429b7638SBesar Wicaksono 	const char *name;
73*429b7638SBesar Wicaksono 	const char *identifier;
74*429b7638SBesar Wicaksono 	void __iomem *base_broadcast;
75*429b7638SBesar Wicaksono 	void __iomem *base[NUM_INSTANCES];
76*429b7638SBesar Wicaksono 	cpumask_t associated_cpus;
77*429b7638SBesar Wicaksono 	cpumask_t active_cpu;
78*429b7638SBesar Wicaksono 	struct hlist_node node;
79*429b7638SBesar Wicaksono 	struct cmem_lat_pmu_hw_events hw_events;
80*429b7638SBesar Wicaksono };
81*429b7638SBesar Wicaksono 
82*429b7638SBesar Wicaksono #define to_cmem_lat_pmu(p) \
83*429b7638SBesar Wicaksono 	container_of(p, struct cmem_lat_pmu, pmu)
84*429b7638SBesar Wicaksono 
85*429b7638SBesar Wicaksono 
86*429b7638SBesar Wicaksono /* Get event type from perf_event. */
87*429b7638SBesar Wicaksono static inline u32 get_event_type(struct perf_event *event)
88*429b7638SBesar Wicaksono {
89*429b7638SBesar Wicaksono 	return (event->attr.config) & CMEM_LAT_MASK_EVENT;
90*429b7638SBesar Wicaksono }
91*429b7638SBesar Wicaksono 
92*429b7638SBesar Wicaksono /* PMU operations. */
93*429b7638SBesar Wicaksono static int cmem_lat_pmu_get_event_idx(struct cmem_lat_pmu_hw_events *hw_events,
94*429b7638SBesar Wicaksono 				struct perf_event *event)
95*429b7638SBesar Wicaksono {
96*429b7638SBesar Wicaksono 	unsigned int idx;
97*429b7638SBesar Wicaksono 
98*429b7638SBesar Wicaksono 	idx = find_first_zero_bit(hw_events->used_ctrs, CMEM_LAT_MAX_ACTIVE_EVENTS);
99*429b7638SBesar Wicaksono 	if (idx >= CMEM_LAT_MAX_ACTIVE_EVENTS)
100*429b7638SBesar Wicaksono 		return -EAGAIN;
101*429b7638SBesar Wicaksono 
102*429b7638SBesar Wicaksono 	set_bit(idx, hw_events->used_ctrs);
103*429b7638SBesar Wicaksono 
104*429b7638SBesar Wicaksono 	return idx;
105*429b7638SBesar Wicaksono }
106*429b7638SBesar Wicaksono 
107*429b7638SBesar Wicaksono static bool cmem_lat_pmu_validate_event(struct pmu *pmu,
108*429b7638SBesar Wicaksono 				 struct cmem_lat_pmu_hw_events *hw_events,
109*429b7638SBesar Wicaksono 				 struct perf_event *event)
110*429b7638SBesar Wicaksono {
111*429b7638SBesar Wicaksono 	int ret;
112*429b7638SBesar Wicaksono 
113*429b7638SBesar Wicaksono 	if (is_software_event(event))
114*429b7638SBesar Wicaksono 		return true;
115*429b7638SBesar Wicaksono 
116*429b7638SBesar Wicaksono 	/* Reject groups spanning multiple HW PMUs. */
117*429b7638SBesar Wicaksono 	if (event->pmu != pmu)
118*429b7638SBesar Wicaksono 		return false;
119*429b7638SBesar Wicaksono 
120*429b7638SBesar Wicaksono 	ret = cmem_lat_pmu_get_event_idx(hw_events, event);
121*429b7638SBesar Wicaksono 	if (ret < 0)
122*429b7638SBesar Wicaksono 		return false;
123*429b7638SBesar Wicaksono 
124*429b7638SBesar Wicaksono 	return true;
125*429b7638SBesar Wicaksono }
126*429b7638SBesar Wicaksono 
127*429b7638SBesar Wicaksono /* Make sure the group of events can be scheduled at once on the PMU. */
128*429b7638SBesar Wicaksono static bool cmem_lat_pmu_validate_group(struct perf_event *event)
129*429b7638SBesar Wicaksono {
130*429b7638SBesar Wicaksono 	struct perf_event *sibling, *leader = event->group_leader;
131*429b7638SBesar Wicaksono 	struct cmem_lat_pmu_hw_events fake_hw_events;
132*429b7638SBesar Wicaksono 
133*429b7638SBesar Wicaksono 	if (event->group_leader == event)
134*429b7638SBesar Wicaksono 		return true;
135*429b7638SBesar Wicaksono 
136*429b7638SBesar Wicaksono 	memset(&fake_hw_events, 0, sizeof(fake_hw_events));
137*429b7638SBesar Wicaksono 
138*429b7638SBesar Wicaksono 	if (!cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, leader))
139*429b7638SBesar Wicaksono 		return false;
140*429b7638SBesar Wicaksono 
141*429b7638SBesar Wicaksono 	for_each_sibling_event(sibling, leader) {
142*429b7638SBesar Wicaksono 		if (!cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, sibling))
143*429b7638SBesar Wicaksono 			return false;
144*429b7638SBesar Wicaksono 	}
145*429b7638SBesar Wicaksono 
146*429b7638SBesar Wicaksono 	return cmem_lat_pmu_validate_event(event->pmu, &fake_hw_events, event);
147*429b7638SBesar Wicaksono }
148*429b7638SBesar Wicaksono 
149*429b7638SBesar Wicaksono static int cmem_lat_pmu_event_init(struct perf_event *event)
150*429b7638SBesar Wicaksono {
151*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
152*429b7638SBesar Wicaksono 	struct hw_perf_event *hwc = &event->hw;
153*429b7638SBesar Wicaksono 	u32 event_type = get_event_type(event);
154*429b7638SBesar Wicaksono 
155*429b7638SBesar Wicaksono 	if (event->attr.type != event->pmu->type ||
156*429b7638SBesar Wicaksono 	    event_type >= CMEM_LAT_NUM_EVENTS)
157*429b7638SBesar Wicaksono 		return -ENOENT;
158*429b7638SBesar Wicaksono 
159*429b7638SBesar Wicaksono 	/*
160*429b7638SBesar Wicaksono 	 * Sampling, per-process mode, and per-task counters are not supported
161*429b7638SBesar Wicaksono 	 * since this PMU is shared across all CPUs.
162*429b7638SBesar Wicaksono 	 */
163*429b7638SBesar Wicaksono 	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) {
164*429b7638SBesar Wicaksono 		dev_dbg(cmem_lat_pmu->pmu.dev,
165*429b7638SBesar Wicaksono 				"Can't support sampling and per-process mode\n");
166*429b7638SBesar Wicaksono 		return -EOPNOTSUPP;
167*429b7638SBesar Wicaksono 	}
168*429b7638SBesar Wicaksono 
169*429b7638SBesar Wicaksono 	if (event->cpu < 0) {
170*429b7638SBesar Wicaksono 		dev_dbg(cmem_lat_pmu->pmu.dev, "Can't support per-task counters\n");
171*429b7638SBesar Wicaksono 		return -EINVAL;
172*429b7638SBesar Wicaksono 	}
173*429b7638SBesar Wicaksono 
174*429b7638SBesar Wicaksono 	/*
175*429b7638SBesar Wicaksono 	 * Make sure the CPU assignment is on one of the CPUs associated with
176*429b7638SBesar Wicaksono 	 * this PMU.
177*429b7638SBesar Wicaksono 	 */
178*429b7638SBesar Wicaksono 	if (!cpumask_test_cpu(event->cpu, &cmem_lat_pmu->associated_cpus)) {
179*429b7638SBesar Wicaksono 		dev_dbg(cmem_lat_pmu->pmu.dev,
180*429b7638SBesar Wicaksono 				"Requested cpu is not associated with the PMU\n");
181*429b7638SBesar Wicaksono 		return -EINVAL;
182*429b7638SBesar Wicaksono 	}
183*429b7638SBesar Wicaksono 
184*429b7638SBesar Wicaksono 	/* Enforce the current active CPU to handle the events in this PMU. */
185*429b7638SBesar Wicaksono 	event->cpu = cpumask_first(&cmem_lat_pmu->active_cpu);
186*429b7638SBesar Wicaksono 	if (event->cpu >= nr_cpu_ids)
187*429b7638SBesar Wicaksono 		return -EINVAL;
188*429b7638SBesar Wicaksono 
189*429b7638SBesar Wicaksono 	if (!cmem_lat_pmu_validate_group(event))
190*429b7638SBesar Wicaksono 		return -EINVAL;
191*429b7638SBesar Wicaksono 
192*429b7638SBesar Wicaksono 	hwc->idx = -1;
193*429b7638SBesar Wicaksono 	hwc->config = event_type;
194*429b7638SBesar Wicaksono 
195*429b7638SBesar Wicaksono 	return 0;
196*429b7638SBesar Wicaksono }
197*429b7638SBesar Wicaksono 
198*429b7638SBesar Wicaksono static u64 cmem_lat_pmu_read_status(struct cmem_lat_pmu *cmem_lat_pmu,
199*429b7638SBesar Wicaksono 				   unsigned int inst)
200*429b7638SBesar Wicaksono {
201*429b7638SBesar Wicaksono 	return readq(cmem_lat_pmu->base[inst] + CMEM_LAT_STATUS);
202*429b7638SBesar Wicaksono }
203*429b7638SBesar Wicaksono 
204*429b7638SBesar Wicaksono static u64 cmem_lat_pmu_read_cycle_counter(struct perf_event *event)
205*429b7638SBesar Wicaksono {
206*429b7638SBesar Wicaksono 	const unsigned int instance = 0;
207*429b7638SBesar Wicaksono 	u64 status;
208*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
209*429b7638SBesar Wicaksono 	struct device *dev = cmem_lat_pmu->dev;
210*429b7638SBesar Wicaksono 
211*429b7638SBesar Wicaksono 	/*
212*429b7638SBesar Wicaksono 	 * Use the reading from first instance since all instances are
213*429b7638SBesar Wicaksono 	 * identical.
214*429b7638SBesar Wicaksono 	 */
215*429b7638SBesar Wicaksono 	status = cmem_lat_pmu_read_status(cmem_lat_pmu, instance);
216*429b7638SBesar Wicaksono 	if (status & CMEM_LAT_STATUS_CYCLE_OVF)
217*429b7638SBesar Wicaksono 		dev_warn(dev, "Cycle counter overflow\n");
218*429b7638SBesar Wicaksono 
219*429b7638SBesar Wicaksono 	return readq(cmem_lat_pmu->base[instance] + CMEM_LAT_CYCLE_CNTR);
220*429b7638SBesar Wicaksono }
221*429b7638SBesar Wicaksono 
222*429b7638SBesar Wicaksono static u64 cmem_lat_pmu_read_req_counter(struct perf_event *event)
223*429b7638SBesar Wicaksono {
224*429b7638SBesar Wicaksono 	unsigned int i;
225*429b7638SBesar Wicaksono 	u64 status, val = 0;
226*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
227*429b7638SBesar Wicaksono 	struct device *dev = cmem_lat_pmu->dev;
228*429b7638SBesar Wicaksono 
229*429b7638SBesar Wicaksono 	/* Sum up the counts from all instances. */
230*429b7638SBesar Wicaksono 	for (i = 0; i < NUM_INSTANCES; i++) {
231*429b7638SBesar Wicaksono 		status = cmem_lat_pmu_read_status(cmem_lat_pmu, i);
232*429b7638SBesar Wicaksono 		if (status & CMEM_LAT_STATUS_MC0_REQ_OVF)
233*429b7638SBesar Wicaksono 			dev_warn(dev, "MC0 request counter overflow\n");
234*429b7638SBesar Wicaksono 		if (status & CMEM_LAT_STATUS_MC1_REQ_OVF)
235*429b7638SBesar Wicaksono 			dev_warn(dev, "MC1 request counter overflow\n");
236*429b7638SBesar Wicaksono 		if (status & CMEM_LAT_STATUS_MC2_REQ_OVF)
237*429b7638SBesar Wicaksono 			dev_warn(dev, "MC2 request counter overflow\n");
238*429b7638SBesar Wicaksono 
239*429b7638SBesar Wicaksono 		val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC0_REQ_CNTR);
240*429b7638SBesar Wicaksono 		val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC1_REQ_CNTR);
241*429b7638SBesar Wicaksono 		val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC2_REQ_CNTR);
242*429b7638SBesar Wicaksono 	}
243*429b7638SBesar Wicaksono 
244*429b7638SBesar Wicaksono 	return val;
245*429b7638SBesar Wicaksono }
246*429b7638SBesar Wicaksono 
247*429b7638SBesar Wicaksono static u64 cmem_lat_pmu_read_aor_counter(struct perf_event *event)
248*429b7638SBesar Wicaksono {
249*429b7638SBesar Wicaksono 	unsigned int i;
250*429b7638SBesar Wicaksono 	u64 status, val = 0;
251*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
252*429b7638SBesar Wicaksono 	struct device *dev = cmem_lat_pmu->dev;
253*429b7638SBesar Wicaksono 
254*429b7638SBesar Wicaksono 	/* Sum up the counts from all instances. */
255*429b7638SBesar Wicaksono 	for (i = 0; i < NUM_INSTANCES; i++) {
256*429b7638SBesar Wicaksono 		status = cmem_lat_pmu_read_status(cmem_lat_pmu, i);
257*429b7638SBesar Wicaksono 		if (status & CMEM_LAT_STATUS_MC0_AOR_OVF)
258*429b7638SBesar Wicaksono 			dev_warn(dev, "MC0 AOR counter overflow\n");
259*429b7638SBesar Wicaksono 		if (status & CMEM_LAT_STATUS_MC1_AOR_OVF)
260*429b7638SBesar Wicaksono 			dev_warn(dev, "MC1 AOR counter overflow\n");
261*429b7638SBesar Wicaksono 		if (status & CMEM_LAT_STATUS_MC2_AOR_OVF)
262*429b7638SBesar Wicaksono 			dev_warn(dev, "MC2 AOR counter overflow\n");
263*429b7638SBesar Wicaksono 
264*429b7638SBesar Wicaksono 		val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC0_AOR_CNTR);
265*429b7638SBesar Wicaksono 		val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC1_AOR_CNTR);
266*429b7638SBesar Wicaksono 		val += readq(cmem_lat_pmu->base[i] + CMEM_LAT_MC2_AOR_CNTR);
267*429b7638SBesar Wicaksono 	}
268*429b7638SBesar Wicaksono 
269*429b7638SBesar Wicaksono 	return val;
270*429b7638SBesar Wicaksono }
271*429b7638SBesar Wicaksono 
272*429b7638SBesar Wicaksono static u64 (*read_counter_fn[CMEM_LAT_NUM_EVENTS])(struct perf_event *) = {
273*429b7638SBesar Wicaksono 	[CMEM_LAT_EVENT_CYCLES] = cmem_lat_pmu_read_cycle_counter,
274*429b7638SBesar Wicaksono 	[CMEM_LAT_EVENT_REQ] = cmem_lat_pmu_read_req_counter,
275*429b7638SBesar Wicaksono 	[CMEM_LAT_EVENT_AOR] = cmem_lat_pmu_read_aor_counter,
276*429b7638SBesar Wicaksono };
277*429b7638SBesar Wicaksono 
278*429b7638SBesar Wicaksono static void cmem_lat_pmu_event_update(struct perf_event *event)
279*429b7638SBesar Wicaksono {
280*429b7638SBesar Wicaksono 	u32 event_type;
281*429b7638SBesar Wicaksono 	u64 prev, now;
282*429b7638SBesar Wicaksono 	struct hw_perf_event *hwc = &event->hw;
283*429b7638SBesar Wicaksono 
284*429b7638SBesar Wicaksono 	if (hwc->state & PERF_HES_STOPPED)
285*429b7638SBesar Wicaksono 		return;
286*429b7638SBesar Wicaksono 
287*429b7638SBesar Wicaksono 	event_type = hwc->config;
288*429b7638SBesar Wicaksono 
289*429b7638SBesar Wicaksono 	do {
290*429b7638SBesar Wicaksono 		prev = local64_read(&hwc->prev_count);
291*429b7638SBesar Wicaksono 		now = read_counter_fn[event_type](event);
292*429b7638SBesar Wicaksono 	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
293*429b7638SBesar Wicaksono 
294*429b7638SBesar Wicaksono 	local64_add(now - prev, &event->count);
295*429b7638SBesar Wicaksono 
296*429b7638SBesar Wicaksono 	hwc->state |= PERF_HES_UPTODATE;
297*429b7638SBesar Wicaksono }
298*429b7638SBesar Wicaksono 
299*429b7638SBesar Wicaksono static void cmem_lat_pmu_start(struct perf_event *event, int pmu_flags)
300*429b7638SBesar Wicaksono {
301*429b7638SBesar Wicaksono 	event->hw.state = 0;
302*429b7638SBesar Wicaksono }
303*429b7638SBesar Wicaksono 
304*429b7638SBesar Wicaksono static void cmem_lat_pmu_stop(struct perf_event *event, int pmu_flags)
305*429b7638SBesar Wicaksono {
306*429b7638SBesar Wicaksono 	event->hw.state |= PERF_HES_STOPPED;
307*429b7638SBesar Wicaksono }
308*429b7638SBesar Wicaksono 
309*429b7638SBesar Wicaksono static int cmem_lat_pmu_add(struct perf_event *event, int flags)
310*429b7638SBesar Wicaksono {
311*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
312*429b7638SBesar Wicaksono 	struct cmem_lat_pmu_hw_events *hw_events = &cmem_lat_pmu->hw_events;
313*429b7638SBesar Wicaksono 	struct hw_perf_event *hwc = &event->hw;
314*429b7638SBesar Wicaksono 	int idx;
315*429b7638SBesar Wicaksono 
316*429b7638SBesar Wicaksono 	if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
317*429b7638SBesar Wicaksono 					   &cmem_lat_pmu->associated_cpus)))
318*429b7638SBesar Wicaksono 		return -ENOENT;
319*429b7638SBesar Wicaksono 
320*429b7638SBesar Wicaksono 	idx = cmem_lat_pmu_get_event_idx(hw_events, event);
321*429b7638SBesar Wicaksono 	if (idx < 0)
322*429b7638SBesar Wicaksono 		return idx;
323*429b7638SBesar Wicaksono 
324*429b7638SBesar Wicaksono 	hw_events->events[idx] = event;
325*429b7638SBesar Wicaksono 	hwc->idx = idx;
326*429b7638SBesar Wicaksono 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
327*429b7638SBesar Wicaksono 
328*429b7638SBesar Wicaksono 	if (flags & PERF_EF_START)
329*429b7638SBesar Wicaksono 		cmem_lat_pmu_start(event, PERF_EF_RELOAD);
330*429b7638SBesar Wicaksono 
331*429b7638SBesar Wicaksono 	/* Propagate changes to the userspace mapping. */
332*429b7638SBesar Wicaksono 	perf_event_update_userpage(event);
333*429b7638SBesar Wicaksono 
334*429b7638SBesar Wicaksono 	return 0;
335*429b7638SBesar Wicaksono }
336*429b7638SBesar Wicaksono 
337*429b7638SBesar Wicaksono static void cmem_lat_pmu_del(struct perf_event *event, int flags)
338*429b7638SBesar Wicaksono {
339*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(event->pmu);
340*429b7638SBesar Wicaksono 	struct cmem_lat_pmu_hw_events *hw_events = &cmem_lat_pmu->hw_events;
341*429b7638SBesar Wicaksono 	struct hw_perf_event *hwc = &event->hw;
342*429b7638SBesar Wicaksono 	int idx = hwc->idx;
343*429b7638SBesar Wicaksono 
344*429b7638SBesar Wicaksono 	cmem_lat_pmu_stop(event, PERF_EF_UPDATE);
345*429b7638SBesar Wicaksono 
346*429b7638SBesar Wicaksono 	hw_events->events[idx] = NULL;
347*429b7638SBesar Wicaksono 
348*429b7638SBesar Wicaksono 	clear_bit(idx, hw_events->used_ctrs);
349*429b7638SBesar Wicaksono 
350*429b7638SBesar Wicaksono 	perf_event_update_userpage(event);
351*429b7638SBesar Wicaksono }
352*429b7638SBesar Wicaksono 
353*429b7638SBesar Wicaksono static void cmem_lat_pmu_read(struct perf_event *event)
354*429b7638SBesar Wicaksono {
355*429b7638SBesar Wicaksono 	cmem_lat_pmu_event_update(event);
356*429b7638SBesar Wicaksono }
357*429b7638SBesar Wicaksono 
358*429b7638SBesar Wicaksono static inline void cmem_lat_pmu_cg_ctrl(struct cmem_lat_pmu *cmem_lat_pmu,
359*429b7638SBesar Wicaksono 										u64 val)
360*429b7638SBesar Wicaksono {
361*429b7638SBesar Wicaksono 	writeq(val, cmem_lat_pmu->base_broadcast + CMEM_LAT_CG_CTRL);
362*429b7638SBesar Wicaksono }
363*429b7638SBesar Wicaksono 
364*429b7638SBesar Wicaksono static inline void cmem_lat_pmu_ctrl(struct cmem_lat_pmu *cmem_lat_pmu, u64 val)
365*429b7638SBesar Wicaksono {
366*429b7638SBesar Wicaksono 	writeq(val, cmem_lat_pmu->base_broadcast + CMEM_LAT_CTRL);
367*429b7638SBesar Wicaksono }
368*429b7638SBesar Wicaksono 
369*429b7638SBesar Wicaksono static void cmem_lat_pmu_enable(struct pmu *pmu)
370*429b7638SBesar Wicaksono {
371*429b7638SBesar Wicaksono 	bool disabled;
372*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu);
373*429b7638SBesar Wicaksono 
374*429b7638SBesar Wicaksono 	disabled = bitmap_empty(cmem_lat_pmu->hw_events.used_ctrs,
375*429b7638SBesar Wicaksono 							CMEM_LAT_MAX_ACTIVE_EVENTS);
376*429b7638SBesar Wicaksono 
377*429b7638SBesar Wicaksono 	if (disabled)
378*429b7638SBesar Wicaksono 		return;
379*429b7638SBesar Wicaksono 
380*429b7638SBesar Wicaksono 	/* Enable all the counters. */
381*429b7638SBesar Wicaksono 	cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_ENABLE);
382*429b7638SBesar Wicaksono 	cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_ENABLE);
383*429b7638SBesar Wicaksono }
384*429b7638SBesar Wicaksono 
385*429b7638SBesar Wicaksono static void cmem_lat_pmu_disable(struct pmu *pmu)
386*429b7638SBesar Wicaksono {
387*429b7638SBesar Wicaksono 	int idx;
388*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu);
389*429b7638SBesar Wicaksono 
390*429b7638SBesar Wicaksono 	/* Disable all the counters. */
391*429b7638SBesar Wicaksono 	cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_DISABLE);
392*429b7638SBesar Wicaksono 
393*429b7638SBesar Wicaksono 	/*
394*429b7638SBesar Wicaksono 	 * The counters will start from 0 again on restart.
395*429b7638SBesar Wicaksono 	 * Update the events immediately to avoid losing the counts.
396*429b7638SBesar Wicaksono 	 */
397*429b7638SBesar Wicaksono 	for_each_set_bit(idx, cmem_lat_pmu->hw_events.used_ctrs,
398*429b7638SBesar Wicaksono 						CMEM_LAT_MAX_ACTIVE_EVENTS) {
399*429b7638SBesar Wicaksono 		struct perf_event *event = cmem_lat_pmu->hw_events.events[idx];
400*429b7638SBesar Wicaksono 
401*429b7638SBesar Wicaksono 		if (!event)
402*429b7638SBesar Wicaksono 			continue;
403*429b7638SBesar Wicaksono 
404*429b7638SBesar Wicaksono 		cmem_lat_pmu_event_update(event);
405*429b7638SBesar Wicaksono 
406*429b7638SBesar Wicaksono 		local64_set(&event->hw.prev_count, 0ULL);
407*429b7638SBesar Wicaksono 	}
408*429b7638SBesar Wicaksono 
409*429b7638SBesar Wicaksono 	cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_CLR);
410*429b7638SBesar Wicaksono 	cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_DISABLE);
411*429b7638SBesar Wicaksono }
412*429b7638SBesar Wicaksono 
413*429b7638SBesar Wicaksono /* PMU identifier attribute. */
414*429b7638SBesar Wicaksono 
415*429b7638SBesar Wicaksono static ssize_t cmem_lat_pmu_identifier_show(struct device *dev,
416*429b7638SBesar Wicaksono 					 struct device_attribute *attr,
417*429b7638SBesar Wicaksono 					 char *page)
418*429b7638SBesar Wicaksono {
419*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(dev_get_drvdata(dev));
420*429b7638SBesar Wicaksono 
421*429b7638SBesar Wicaksono 	return sysfs_emit(page, "%s\n", cmem_lat_pmu->identifier);
422*429b7638SBesar Wicaksono }
423*429b7638SBesar Wicaksono 
424*429b7638SBesar Wicaksono static struct device_attribute cmem_lat_pmu_identifier_attr =
425*429b7638SBesar Wicaksono 	__ATTR(identifier, 0444, cmem_lat_pmu_identifier_show, NULL);
426*429b7638SBesar Wicaksono 
427*429b7638SBesar Wicaksono static struct attribute *cmem_lat_pmu_identifier_attrs[] = {
428*429b7638SBesar Wicaksono 	&cmem_lat_pmu_identifier_attr.attr,
429*429b7638SBesar Wicaksono 	NULL
430*429b7638SBesar Wicaksono };
431*429b7638SBesar Wicaksono 
432*429b7638SBesar Wicaksono static struct attribute_group cmem_lat_pmu_identifier_attr_group = {
433*429b7638SBesar Wicaksono 	.attrs = cmem_lat_pmu_identifier_attrs,
434*429b7638SBesar Wicaksono };
435*429b7638SBesar Wicaksono 
436*429b7638SBesar Wicaksono /* Format attributes. */
437*429b7638SBesar Wicaksono 
438*429b7638SBesar Wicaksono #define NV_PMU_EXT_ATTR(_name, _func, _config)			\
439*429b7638SBesar Wicaksono 	(&((struct dev_ext_attribute[]){				\
440*429b7638SBesar Wicaksono 		{							\
441*429b7638SBesar Wicaksono 			.attr = __ATTR(_name, 0444, _func, NULL),	\
442*429b7638SBesar Wicaksono 			.var = (void *)_config				\
443*429b7638SBesar Wicaksono 		}							\
444*429b7638SBesar Wicaksono 	})[0].attr.attr)
445*429b7638SBesar Wicaksono 
446*429b7638SBesar Wicaksono static struct attribute *cmem_lat_pmu_formats[] = {
447*429b7638SBesar Wicaksono 	NV_PMU_EXT_ATTR(event, device_show_string, "config:0-1"),
448*429b7638SBesar Wicaksono 	NULL
449*429b7638SBesar Wicaksono };
450*429b7638SBesar Wicaksono 
451*429b7638SBesar Wicaksono static const struct attribute_group cmem_lat_pmu_format_group = {
452*429b7638SBesar Wicaksono 	.name = "format",
453*429b7638SBesar Wicaksono 	.attrs = cmem_lat_pmu_formats,
454*429b7638SBesar Wicaksono };
455*429b7638SBesar Wicaksono 
456*429b7638SBesar Wicaksono /* Event attributes. */
457*429b7638SBesar Wicaksono 
458*429b7638SBesar Wicaksono static ssize_t cmem_lat_pmu_sysfs_event_show(struct device *dev,
459*429b7638SBesar Wicaksono 				struct device_attribute *attr, char *buf)
460*429b7638SBesar Wicaksono {
461*429b7638SBesar Wicaksono 	struct perf_pmu_events_attr *pmu_attr;
462*429b7638SBesar Wicaksono 
463*429b7638SBesar Wicaksono 	pmu_attr = container_of(attr, typeof(*pmu_attr), attr);
464*429b7638SBesar Wicaksono 	return sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id);
465*429b7638SBesar Wicaksono }
466*429b7638SBesar Wicaksono 
467*429b7638SBesar Wicaksono #define NV_PMU_EVENT_ATTR(_name, _config)	\
468*429b7638SBesar Wicaksono 	PMU_EVENT_ATTR_ID(_name, cmem_lat_pmu_sysfs_event_show, _config)
469*429b7638SBesar Wicaksono 
470*429b7638SBesar Wicaksono static struct attribute *cmem_lat_pmu_events[] = {
471*429b7638SBesar Wicaksono 	NV_PMU_EVENT_ATTR(cycles, CMEM_LAT_EVENT_CYCLES),
472*429b7638SBesar Wicaksono 	NV_PMU_EVENT_ATTR(rd_req, CMEM_LAT_EVENT_REQ),
473*429b7638SBesar Wicaksono 	NV_PMU_EVENT_ATTR(rd_cum_outs, CMEM_LAT_EVENT_AOR),
474*429b7638SBesar Wicaksono 	NULL
475*429b7638SBesar Wicaksono };
476*429b7638SBesar Wicaksono 
477*429b7638SBesar Wicaksono static const struct attribute_group cmem_lat_pmu_events_group = {
478*429b7638SBesar Wicaksono 	.name = "events",
479*429b7638SBesar Wicaksono 	.attrs = cmem_lat_pmu_events,
480*429b7638SBesar Wicaksono };
481*429b7638SBesar Wicaksono 
482*429b7638SBesar Wicaksono /* Cpumask attributes. */
483*429b7638SBesar Wicaksono 
484*429b7638SBesar Wicaksono static ssize_t cmem_lat_pmu_cpumask_show(struct device *dev,
485*429b7638SBesar Wicaksono 			    struct device_attribute *attr, char *buf)
486*429b7638SBesar Wicaksono {
487*429b7638SBesar Wicaksono 	struct pmu *pmu = dev_get_drvdata(dev);
488*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = to_cmem_lat_pmu(pmu);
489*429b7638SBesar Wicaksono 	struct dev_ext_attribute *eattr =
490*429b7638SBesar Wicaksono 		container_of(attr, struct dev_ext_attribute, attr);
491*429b7638SBesar Wicaksono 	unsigned long mask_id = (unsigned long)eattr->var;
492*429b7638SBesar Wicaksono 	const cpumask_t *cpumask;
493*429b7638SBesar Wicaksono 
494*429b7638SBesar Wicaksono 	switch (mask_id) {
495*429b7638SBesar Wicaksono 	case CMEM_LAT_ACTIVE_CPU_MASK:
496*429b7638SBesar Wicaksono 		cpumask = &cmem_lat_pmu->active_cpu;
497*429b7638SBesar Wicaksono 		break;
498*429b7638SBesar Wicaksono 	case CMEM_LAT_ASSOCIATED_CPU_MASK:
499*429b7638SBesar Wicaksono 		cpumask = &cmem_lat_pmu->associated_cpus;
500*429b7638SBesar Wicaksono 		break;
501*429b7638SBesar Wicaksono 	default:
502*429b7638SBesar Wicaksono 		return 0;
503*429b7638SBesar Wicaksono 	}
504*429b7638SBesar Wicaksono 	return cpumap_print_to_pagebuf(true, buf, cpumask);
505*429b7638SBesar Wicaksono }
506*429b7638SBesar Wicaksono 
507*429b7638SBesar Wicaksono #define NV_PMU_CPUMASK_ATTR(_name, _config)			\
508*429b7638SBesar Wicaksono 	NV_PMU_EXT_ATTR(_name, cmem_lat_pmu_cpumask_show,	\
509*429b7638SBesar Wicaksono 				(unsigned long)_config)
510*429b7638SBesar Wicaksono 
511*429b7638SBesar Wicaksono static struct attribute *cmem_lat_pmu_cpumask_attrs[] = {
512*429b7638SBesar Wicaksono 	NV_PMU_CPUMASK_ATTR(cpumask, CMEM_LAT_ACTIVE_CPU_MASK),
513*429b7638SBesar Wicaksono 	NV_PMU_CPUMASK_ATTR(associated_cpus, CMEM_LAT_ASSOCIATED_CPU_MASK),
514*429b7638SBesar Wicaksono 	NULL
515*429b7638SBesar Wicaksono };
516*429b7638SBesar Wicaksono 
517*429b7638SBesar Wicaksono static const struct attribute_group cmem_lat_pmu_cpumask_attr_group = {
518*429b7638SBesar Wicaksono 	.attrs = cmem_lat_pmu_cpumask_attrs,
519*429b7638SBesar Wicaksono };
520*429b7638SBesar Wicaksono 
521*429b7638SBesar Wicaksono /* Per PMU device attribute groups. */
522*429b7638SBesar Wicaksono 
523*429b7638SBesar Wicaksono static const struct attribute_group *cmem_lat_pmu_attr_groups[] = {
524*429b7638SBesar Wicaksono 	&cmem_lat_pmu_identifier_attr_group,
525*429b7638SBesar Wicaksono 	&cmem_lat_pmu_format_group,
526*429b7638SBesar Wicaksono 	&cmem_lat_pmu_events_group,
527*429b7638SBesar Wicaksono 	&cmem_lat_pmu_cpumask_attr_group,
528*429b7638SBesar Wicaksono 	NULL
529*429b7638SBesar Wicaksono };
530*429b7638SBesar Wicaksono 
531*429b7638SBesar Wicaksono static int cmem_lat_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
532*429b7638SBesar Wicaksono {
533*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu =
534*429b7638SBesar Wicaksono 		hlist_entry_safe(node, struct cmem_lat_pmu, node);
535*429b7638SBesar Wicaksono 
536*429b7638SBesar Wicaksono 	if (!cpumask_test_cpu(cpu, &cmem_lat_pmu->associated_cpus))
537*429b7638SBesar Wicaksono 		return 0;
538*429b7638SBesar Wicaksono 
539*429b7638SBesar Wicaksono 	/* If the PMU is already managed, there is nothing to do */
540*429b7638SBesar Wicaksono 	if (!cpumask_empty(&cmem_lat_pmu->active_cpu))
541*429b7638SBesar Wicaksono 		return 0;
542*429b7638SBesar Wicaksono 
543*429b7638SBesar Wicaksono 	/* Use this CPU for event counting */
544*429b7638SBesar Wicaksono 	cpumask_set_cpu(cpu, &cmem_lat_pmu->active_cpu);
545*429b7638SBesar Wicaksono 
546*429b7638SBesar Wicaksono 	return 0;
547*429b7638SBesar Wicaksono }
548*429b7638SBesar Wicaksono 
549*429b7638SBesar Wicaksono static int cmem_lat_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
550*429b7638SBesar Wicaksono {
551*429b7638SBesar Wicaksono 	unsigned int dst;
552*429b7638SBesar Wicaksono 
553*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu =
554*429b7638SBesar Wicaksono 		hlist_entry_safe(node, struct cmem_lat_pmu, node);
555*429b7638SBesar Wicaksono 
556*429b7638SBesar Wicaksono 	/* Nothing to do if this CPU doesn't own the PMU */
557*429b7638SBesar Wicaksono 	if (!cpumask_test_and_clear_cpu(cpu, &cmem_lat_pmu->active_cpu))
558*429b7638SBesar Wicaksono 		return 0;
559*429b7638SBesar Wicaksono 
560*429b7638SBesar Wicaksono 	/* Choose a new CPU to migrate ownership of the PMU to */
561*429b7638SBesar Wicaksono 	dst = cpumask_any_and_but(&cmem_lat_pmu->associated_cpus,
562*429b7638SBesar Wicaksono 				  cpu_online_mask, cpu);
563*429b7638SBesar Wicaksono 	if (dst >= nr_cpu_ids)
564*429b7638SBesar Wicaksono 		return 0;
565*429b7638SBesar Wicaksono 
566*429b7638SBesar Wicaksono 	/* Use this CPU for event counting */
567*429b7638SBesar Wicaksono 	perf_pmu_migrate_context(&cmem_lat_pmu->pmu, cpu, dst);
568*429b7638SBesar Wicaksono 	cpumask_set_cpu(dst, &cmem_lat_pmu->active_cpu);
569*429b7638SBesar Wicaksono 
570*429b7638SBesar Wicaksono 	return 0;
571*429b7638SBesar Wicaksono }
572*429b7638SBesar Wicaksono 
573*429b7638SBesar Wicaksono static int cmem_lat_pmu_get_cpus(struct cmem_lat_pmu *cmem_lat_pmu,
574*429b7638SBesar Wicaksono 				unsigned int socket)
575*429b7638SBesar Wicaksono {
576*429b7638SBesar Wicaksono 	int cpu;
577*429b7638SBesar Wicaksono 
578*429b7638SBesar Wicaksono 	for_each_possible_cpu(cpu) {
579*429b7638SBesar Wicaksono 		if (cpu_to_node(cpu) == socket)
580*429b7638SBesar Wicaksono 			cpumask_set_cpu(cpu, &cmem_lat_pmu->associated_cpus);
581*429b7638SBesar Wicaksono 	}
582*429b7638SBesar Wicaksono 
583*429b7638SBesar Wicaksono 	if (cpumask_empty(&cmem_lat_pmu->associated_cpus)) {
584*429b7638SBesar Wicaksono 		dev_dbg(cmem_lat_pmu->dev,
585*429b7638SBesar Wicaksono 			"No cpu associated with PMU socket-%u\n", socket);
586*429b7638SBesar Wicaksono 		return -ENODEV;
587*429b7638SBesar Wicaksono 	}
588*429b7638SBesar Wicaksono 
589*429b7638SBesar Wicaksono 	return 0;
590*429b7638SBesar Wicaksono }
591*429b7638SBesar Wicaksono 
592*429b7638SBesar Wicaksono static int cmem_lat_pmu_probe(struct platform_device *pdev)
593*429b7638SBesar Wicaksono {
594*429b7638SBesar Wicaksono 	struct device *dev = &pdev->dev;
595*429b7638SBesar Wicaksono 	struct acpi_device *acpi_dev;
596*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu;
597*429b7638SBesar Wicaksono 	char *name, *uid_str;
598*429b7638SBesar Wicaksono 	int ret, i;
599*429b7638SBesar Wicaksono 	u32 socket;
600*429b7638SBesar Wicaksono 
601*429b7638SBesar Wicaksono 	acpi_dev = ACPI_COMPANION(dev);
602*429b7638SBesar Wicaksono 	if (!acpi_dev)
603*429b7638SBesar Wicaksono 		return -ENODEV;
604*429b7638SBesar Wicaksono 
605*429b7638SBesar Wicaksono 	uid_str = acpi_device_uid(acpi_dev);
606*429b7638SBesar Wicaksono 	if (!uid_str)
607*429b7638SBesar Wicaksono 		return -ENODEV;
608*429b7638SBesar Wicaksono 
609*429b7638SBesar Wicaksono 	ret = kstrtou32(uid_str, 0, &socket);
610*429b7638SBesar Wicaksono 	if (ret)
611*429b7638SBesar Wicaksono 		return ret;
612*429b7638SBesar Wicaksono 
613*429b7638SBesar Wicaksono 	cmem_lat_pmu = devm_kzalloc(dev, sizeof(*cmem_lat_pmu), GFP_KERNEL);
614*429b7638SBesar Wicaksono 	name = devm_kasprintf(dev, GFP_KERNEL, "nvidia_cmem_latency_pmu_%u", socket);
615*429b7638SBesar Wicaksono 	if (!cmem_lat_pmu || !name)
616*429b7638SBesar Wicaksono 		return -ENOMEM;
617*429b7638SBesar Wicaksono 
618*429b7638SBesar Wicaksono 	cmem_lat_pmu->dev = dev;
619*429b7638SBesar Wicaksono 	cmem_lat_pmu->name = name;
620*429b7638SBesar Wicaksono 	cmem_lat_pmu->identifier = acpi_device_hid(acpi_dev);
621*429b7638SBesar Wicaksono 	platform_set_drvdata(pdev, cmem_lat_pmu);
622*429b7638SBesar Wicaksono 
623*429b7638SBesar Wicaksono 	cmem_lat_pmu->pmu = (struct pmu) {
624*429b7638SBesar Wicaksono 		.parent		= &pdev->dev,
625*429b7638SBesar Wicaksono 		.task_ctx_nr	= perf_invalid_context,
626*429b7638SBesar Wicaksono 		.pmu_enable	= cmem_lat_pmu_enable,
627*429b7638SBesar Wicaksono 		.pmu_disable	= cmem_lat_pmu_disable,
628*429b7638SBesar Wicaksono 		.event_init	= cmem_lat_pmu_event_init,
629*429b7638SBesar Wicaksono 		.add		= cmem_lat_pmu_add,
630*429b7638SBesar Wicaksono 		.del		= cmem_lat_pmu_del,
631*429b7638SBesar Wicaksono 		.start		= cmem_lat_pmu_start,
632*429b7638SBesar Wicaksono 		.stop		= cmem_lat_pmu_stop,
633*429b7638SBesar Wicaksono 		.read		= cmem_lat_pmu_read,
634*429b7638SBesar Wicaksono 		.attr_groups	= cmem_lat_pmu_attr_groups,
635*429b7638SBesar Wicaksono 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE |
636*429b7638SBesar Wicaksono 					PERF_PMU_CAP_NO_INTERRUPT,
637*429b7638SBesar Wicaksono 	};
638*429b7638SBesar Wicaksono 
639*429b7638SBesar Wicaksono 	/* Map the address of all the instances. */
640*429b7638SBesar Wicaksono 	for (i = 0; i < NUM_INSTANCES; i++) {
641*429b7638SBesar Wicaksono 		cmem_lat_pmu->base[i] = devm_platform_ioremap_resource(pdev, i);
642*429b7638SBesar Wicaksono 		if (IS_ERR(cmem_lat_pmu->base[i])) {
643*429b7638SBesar Wicaksono 			dev_err(dev, "Failed map address for instance %d\n", i);
644*429b7638SBesar Wicaksono 			return PTR_ERR(cmem_lat_pmu->base[i]);
645*429b7638SBesar Wicaksono 		}
646*429b7638SBesar Wicaksono 	}
647*429b7638SBesar Wicaksono 
648*429b7638SBesar Wicaksono 	/* Map broadcast address. */
649*429b7638SBesar Wicaksono 	cmem_lat_pmu->base_broadcast = devm_platform_ioremap_resource(pdev,
650*429b7638SBesar Wicaksono 										NUM_INSTANCES);
651*429b7638SBesar Wicaksono 	if (IS_ERR(cmem_lat_pmu->base_broadcast)) {
652*429b7638SBesar Wicaksono 		dev_err(dev, "Failed map broadcast address\n");
653*429b7638SBesar Wicaksono 		return PTR_ERR(cmem_lat_pmu->base_broadcast);
654*429b7638SBesar Wicaksono 	}
655*429b7638SBesar Wicaksono 
656*429b7638SBesar Wicaksono 	ret = cmem_lat_pmu_get_cpus(cmem_lat_pmu, socket);
657*429b7638SBesar Wicaksono 	if (ret)
658*429b7638SBesar Wicaksono 		return ret;
659*429b7638SBesar Wicaksono 
660*429b7638SBesar Wicaksono 	ret = cpuhp_state_add_instance(cmem_lat_pmu_cpuhp_state,
661*429b7638SBesar Wicaksono 				       &cmem_lat_pmu->node);
662*429b7638SBesar Wicaksono 	if (ret) {
663*429b7638SBesar Wicaksono 		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
664*429b7638SBesar Wicaksono 		return ret;
665*429b7638SBesar Wicaksono 	}
666*429b7638SBesar Wicaksono 
667*429b7638SBesar Wicaksono 	cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_ENABLE);
668*429b7638SBesar Wicaksono 	cmem_lat_pmu_ctrl(cmem_lat_pmu, CMEM_LAT_CTRL_CLR);
669*429b7638SBesar Wicaksono 	cmem_lat_pmu_cg_ctrl(cmem_lat_pmu, CMEM_LAT_CG_CTRL_DISABLE);
670*429b7638SBesar Wicaksono 
671*429b7638SBesar Wicaksono 	ret = perf_pmu_register(&cmem_lat_pmu->pmu, name, -1);
672*429b7638SBesar Wicaksono 	if (ret) {
673*429b7638SBesar Wicaksono 		dev_err(&pdev->dev, "Failed to register PMU: %d\n", ret);
674*429b7638SBesar Wicaksono 		cpuhp_state_remove_instance(cmem_lat_pmu_cpuhp_state,
675*429b7638SBesar Wicaksono 					    &cmem_lat_pmu->node);
676*429b7638SBesar Wicaksono 		return ret;
677*429b7638SBesar Wicaksono 	}
678*429b7638SBesar Wicaksono 
679*429b7638SBesar Wicaksono 	dev_dbg(&pdev->dev, "Registered %s PMU\n", name);
680*429b7638SBesar Wicaksono 
681*429b7638SBesar Wicaksono 	return 0;
682*429b7638SBesar Wicaksono }
683*429b7638SBesar Wicaksono 
684*429b7638SBesar Wicaksono static void cmem_lat_pmu_device_remove(struct platform_device *pdev)
685*429b7638SBesar Wicaksono {
686*429b7638SBesar Wicaksono 	struct cmem_lat_pmu *cmem_lat_pmu = platform_get_drvdata(pdev);
687*429b7638SBesar Wicaksono 
688*429b7638SBesar Wicaksono 	perf_pmu_unregister(&cmem_lat_pmu->pmu);
689*429b7638SBesar Wicaksono 	cpuhp_state_remove_instance(cmem_lat_pmu_cpuhp_state,
690*429b7638SBesar Wicaksono 				    &cmem_lat_pmu->node);
691*429b7638SBesar Wicaksono }
692*429b7638SBesar Wicaksono 
693*429b7638SBesar Wicaksono static const struct acpi_device_id cmem_lat_pmu_acpi_match[] = {
694*429b7638SBesar Wicaksono 	{ "NVDA2021" },
695*429b7638SBesar Wicaksono 	{ }
696*429b7638SBesar Wicaksono };
697*429b7638SBesar Wicaksono MODULE_DEVICE_TABLE(acpi, cmem_lat_pmu_acpi_match);
698*429b7638SBesar Wicaksono 
699*429b7638SBesar Wicaksono static struct platform_driver cmem_lat_pmu_driver = {
700*429b7638SBesar Wicaksono 	.driver = {
701*429b7638SBesar Wicaksono 		.name = "nvidia-t410-cmem-latency-pmu",
702*429b7638SBesar Wicaksono 		.acpi_match_table = ACPI_PTR(cmem_lat_pmu_acpi_match),
703*429b7638SBesar Wicaksono 		.suppress_bind_attrs = true,
704*429b7638SBesar Wicaksono 	},
705*429b7638SBesar Wicaksono 	.probe = cmem_lat_pmu_probe,
706*429b7638SBesar Wicaksono 	.remove = cmem_lat_pmu_device_remove,
707*429b7638SBesar Wicaksono };
708*429b7638SBesar Wicaksono 
709*429b7638SBesar Wicaksono static int __init cmem_lat_pmu_init(void)
710*429b7638SBesar Wicaksono {
711*429b7638SBesar Wicaksono 	int ret;
712*429b7638SBesar Wicaksono 
713*429b7638SBesar Wicaksono 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
714*429b7638SBesar Wicaksono 				      "perf/nvidia/cmem_latency:online",
715*429b7638SBesar Wicaksono 				      cmem_lat_pmu_cpu_online,
716*429b7638SBesar Wicaksono 				      cmem_lat_pmu_cpu_teardown);
717*429b7638SBesar Wicaksono 	if (ret < 0)
718*429b7638SBesar Wicaksono 		return ret;
719*429b7638SBesar Wicaksono 
720*429b7638SBesar Wicaksono 	cmem_lat_pmu_cpuhp_state = ret;
721*429b7638SBesar Wicaksono 
722*429b7638SBesar Wicaksono 	return platform_driver_register(&cmem_lat_pmu_driver);
723*429b7638SBesar Wicaksono }
724*429b7638SBesar Wicaksono 
725*429b7638SBesar Wicaksono static void __exit cmem_lat_pmu_exit(void)
726*429b7638SBesar Wicaksono {
727*429b7638SBesar Wicaksono 	platform_driver_unregister(&cmem_lat_pmu_driver);
728*429b7638SBesar Wicaksono 	cpuhp_remove_multi_state(cmem_lat_pmu_cpuhp_state);
729*429b7638SBesar Wicaksono }
730*429b7638SBesar Wicaksono 
731*429b7638SBesar Wicaksono module_init(cmem_lat_pmu_init);
732*429b7638SBesar Wicaksono module_exit(cmem_lat_pmu_exit);
733*429b7638SBesar Wicaksono 
734*429b7638SBesar Wicaksono MODULE_LICENSE("GPL");
735*429b7638SBesar Wicaksono MODULE_DESCRIPTION("NVIDIA Tegra410 CPU Memory Latency PMU driver");
736*429b7638SBesar Wicaksono MODULE_AUTHOR("Besar Wicaksono <bwicaksono@nvidia.com>");
737