xref: /linux/drivers/hwtracing/ptt/hisi_ptt.c (revision b92dd11725a7c57f55e148c7d3ce58a86f480575)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Driver for HiSilicon PCIe tune and trace device
4  *
5  * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
6  * Author: Yicong Yang <yangyicong@hisilicon.com>
7  */
8 
9 #include <linux/bitfield.h>
10 #include <linux/bitops.h>
11 #include <linux/cpuhotplug.h>
12 #include <linux/delay.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/interrupt.h>
15 #include <linux/io.h>
16 #include <linux/iommu.h>
17 #include <linux/iopoll.h>
18 #include <linux/module.h>
19 #include <linux/sysfs.h>
20 #include <linux/vmalloc.h>
21 
22 #include "hisi_ptt.h"
23 
24 /* Dynamic CPU hotplug state used by PTT */
25 static enum cpuhp_state hisi_ptt_pmu_online;
26 
27 static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
28 {
29 	u32 val;
30 
31 	return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
32 				   val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
33 				   HISI_PTT_WAIT_POLL_INTERVAL_US,
34 				   HISI_PTT_WAIT_TUNE_TIMEOUT_US);
35 }
36 
37 static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
38 				       struct device_attribute *attr,
39 				       char *buf)
40 {
41 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
42 	struct dev_ext_attribute *ext_attr;
43 	struct hisi_ptt_tune_desc *desc;
44 	u32 reg;
45 	u16 val;
46 
47 	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
48 	desc = ext_attr->var;
49 
50 	mutex_lock(&hisi_ptt->tune_lock);
51 
52 	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
53 	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
54 	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
55 			  desc->event_code);
56 	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
57 
58 	/* Write all 1 to indicates it's the read process */
59 	writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
60 
61 	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
62 		mutex_unlock(&hisi_ptt->tune_lock);
63 		return -ETIMEDOUT;
64 	}
65 
66 	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
67 	reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
68 	val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
69 
70 	mutex_unlock(&hisi_ptt->tune_lock);
71 	return sysfs_emit(buf, "%u\n", val);
72 }
73 
74 static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
75 					struct device_attribute *attr,
76 					const char *buf, size_t count)
77 {
78 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
79 	struct dev_ext_attribute *ext_attr;
80 	struct hisi_ptt_tune_desc *desc;
81 	u32 reg;
82 	u16 val;
83 
84 	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
85 	desc = ext_attr->var;
86 
87 	if (kstrtou16(buf, 10, &val))
88 		return -EINVAL;
89 
90 	mutex_lock(&hisi_ptt->tune_lock);
91 
92 	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
93 	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
94 	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
95 			  desc->event_code);
96 	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
97 	writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
98 	       hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
99 
100 	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
101 		mutex_unlock(&hisi_ptt->tune_lock);
102 		return -ETIMEDOUT;
103 	}
104 
105 	mutex_unlock(&hisi_ptt->tune_lock);
106 	return count;
107 }
108 
109 #define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store)			\
110 	static struct hisi_ptt_tune_desc _name##_desc = {		\
111 		.name = #_name,						\
112 		.event_code = (_val),					\
113 	};								\
114 	static struct dev_ext_attribute hisi_ptt_##_name##_attr = {	\
115 		.attr	= __ATTR(_name, 0600, _show, _store),		\
116 		.var	= &_name##_desc,				\
117 	}
118 
119 #define HISI_PTT_TUNE_ATTR_COMMON(_name, _val)		\
120 	HISI_PTT_TUNE_ATTR(_name, _val,			\
121 			   hisi_ptt_tune_attr_show,	\
122 			   hisi_ptt_tune_attr_store)
123 
124 /*
125  * The value of the tuning event are composed of two parts: main event code
126  * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
127  * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
128  * completion TLPs. See hisi_ptt.rst documentation for more information.
129  */
130 #define HISI_PTT_TUNE_QOS_TX_CPL		(0x4 | (3 << 16))
131 #define HISI_PTT_TUNE_QOS_TX_NP			(0x4 | (4 << 16))
132 #define HISI_PTT_TUNE_QOS_TX_P			(0x4 | (5 << 16))
133 #define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL	(0x5 | (6 << 16))
134 #define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL	(0x5 | (7 << 16))
135 
136 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
137 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
138 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
139 HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
140 HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
141 
142 static struct attribute *hisi_ptt_tune_attrs[] = {
143 	&hisi_ptt_qos_tx_cpl_attr.attr.attr,
144 	&hisi_ptt_qos_tx_np_attr.attr.attr,
145 	&hisi_ptt_qos_tx_p_attr.attr.attr,
146 	&hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
147 	&hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
148 	NULL,
149 };
150 
151 static struct attribute_group hisi_ptt_tune_group = {
152 	.name	= "tune",
153 	.attrs	= hisi_ptt_tune_attrs,
154 };
155 
156 static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
157 {
158 	if (is_port)
159 		return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
160 
161 	return devid;
162 }
163 
164 static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
165 {
166 	u32 val;
167 
168 	return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
169 					  val, val & HISI_PTT_TRACE_IDLE,
170 					  HISI_PTT_WAIT_POLL_INTERVAL_US,
171 					  HISI_PTT_WAIT_TRACE_TIMEOUT_US);
172 }
173 
174 static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
175 {
176 	u32 val;
177 
178 	readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
179 				  val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
180 				  HISI_PTT_RESET_TIMEOUT_US);
181 }
182 
183 static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
184 {
185 	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
186 	hisi_ptt->trace_ctrl.started = false;
187 }
188 
189 static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
190 {
191 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
192 	u32 val;
193 	int i;
194 
195 	/* Check device idle before start trace */
196 	if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
197 		pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
198 		return -EBUSY;
199 	}
200 
201 	ctrl->started = true;
202 
203 	/* Reset the DMA before start tracing */
204 	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
205 	val |= HISI_PTT_TRACE_CTRL_RST;
206 	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
207 
208 	hisi_ptt_wait_dma_reset_done(hisi_ptt);
209 
210 	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
211 	val &= ~HISI_PTT_TRACE_CTRL_RST;
212 	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
213 
214 	/* Reset the index of current buffer */
215 	hisi_ptt->trace_ctrl.buf_index = 0;
216 
217 	/* Zero the trace buffers */
218 	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
219 		memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
220 
221 	/* Clear the interrupt status */
222 	writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
223 	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
224 
225 	/* Set the trace control register */
226 	val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
227 	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
228 	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
229 	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
230 	if (!hisi_ptt->trace_ctrl.is_port)
231 		val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
232 
233 	/* Start the Trace */
234 	val |= HISI_PTT_TRACE_CTRL_EN;
235 	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
236 
237 	return 0;
238 }
239 
240 static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
241 {
242 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
243 	struct perf_output_handle *handle = &ctrl->handle;
244 	struct perf_event *event = handle->event;
245 	struct hisi_ptt_pmu_buf *buf;
246 	size_t size;
247 	void *addr;
248 
249 	buf = perf_get_aux(handle);
250 	if (!buf || !handle->size)
251 		return -EINVAL;
252 
253 	addr = ctrl->trace_buf[ctrl->buf_index].addr;
254 
255 	/*
256 	 * If we're going to stop, read the size of already traced data from
257 	 * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
258 	 * the data size is always HISI_PTT_TRACE_BUF_SIZE.
259 	 */
260 	if (stop) {
261 		u32 reg;
262 
263 		reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
264 		size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
265 	} else {
266 		size = HISI_PTT_TRACE_BUF_SIZE;
267 	}
268 
269 	memcpy(buf->base + buf->pos, addr, size);
270 	buf->pos += size;
271 
272 	/*
273 	 * Just commit the traced data if we're going to stop. Otherwise if the
274 	 * resident AUX buffer cannot contain the data of next trace buffer,
275 	 * apply a new one.
276 	 */
277 	if (stop) {
278 		perf_aux_output_end(handle, buf->pos);
279 	} else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
280 		perf_aux_output_end(handle, buf->pos);
281 
282 		buf = perf_aux_output_begin(handle, event);
283 		if (!buf)
284 			return -EINVAL;
285 
286 		buf->pos = handle->head % buf->length;
287 		if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
288 			perf_aux_output_end(handle, 0);
289 			return -EINVAL;
290 		}
291 	}
292 
293 	return 0;
294 }
295 
296 static irqreturn_t hisi_ptt_isr(int irq, void *context)
297 {
298 	struct hisi_ptt *hisi_ptt = context;
299 	u32 status, buf_idx;
300 
301 	status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
302 	if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
303 		return IRQ_NONE;
304 
305 	buf_idx = ffs(status) - 1;
306 
307 	/* Clear the interrupt status of buffer @buf_idx */
308 	writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
309 
310 	/*
311 	 * Update the AUX buffer and cache the current buffer index,
312 	 * as we need to know this and save the data when the trace
313 	 * is ended out of the interrupt handler. End the trace
314 	 * if the updating fails.
315 	 */
316 	if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
317 		hisi_ptt_trace_end(hisi_ptt);
318 	else
319 		hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
320 
321 	return IRQ_HANDLED;
322 }
323 
324 static void hisi_ptt_irq_free_vectors(void *pdev)
325 {
326 	pci_free_irq_vectors(pdev);
327 }
328 
329 static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
330 {
331 	struct pci_dev *pdev = hisi_ptt->pdev;
332 	int ret;
333 
334 	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
335 	if (ret < 0) {
336 		pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
337 		return ret;
338 	}
339 
340 	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
341 	if (ret < 0)
342 		return ret;
343 
344 	ret = devm_request_threaded_irq(&pdev->dev,
345 					pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ),
346 					NULL, hisi_ptt_isr, 0,
347 					DRV_NAME, hisi_ptt);
348 	if (ret) {
349 		pci_err(pdev, "failed to request irq %d, ret = %d\n",
350 			pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ), ret);
351 		return ret;
352 	}
353 
354 	return 0;
355 }
356 
357 static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
358 {
359 	struct hisi_ptt_filter_desc *filter;
360 	struct hisi_ptt *hisi_ptt = data;
361 
362 	/*
363 	 * We won't fail the probe if filter allocation failed here. The filters
364 	 * should be partial initialized and users would know which filter fails
365 	 * through the log. Other functions of PTT device are still available.
366 	 */
367 	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
368 	if (!filter) {
369 		pci_err(hisi_ptt->pdev, "failed to add filter %s\n", pci_name(pdev));
370 		return -ENOMEM;
371 	}
372 
373 	filter->devid = PCI_DEVID(pdev->bus->number, pdev->devfn);
374 
375 	if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) {
376 		filter->is_port = true;
377 		list_add_tail(&filter->list, &hisi_ptt->port_filters);
378 
379 		/* Update the available port mask */
380 		hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
381 	} else {
382 		list_add_tail(&filter->list, &hisi_ptt->req_filters);
383 	}
384 
385 	return 0;
386 }
387 
388 static void hisi_ptt_release_filters(void *data)
389 {
390 	struct hisi_ptt_filter_desc *filter, *tmp;
391 	struct hisi_ptt *hisi_ptt = data;
392 
393 	list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list) {
394 		list_del(&filter->list);
395 		kfree(filter);
396 	}
397 
398 	list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list) {
399 		list_del(&filter->list);
400 		kfree(filter);
401 	}
402 }
403 
404 static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
405 {
406 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
407 	struct device *dev = &hisi_ptt->pdev->dev;
408 	int i;
409 
410 	ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
411 				       sizeof(*ctrl->trace_buf), GFP_KERNEL);
412 	if (!ctrl->trace_buf)
413 		return -ENOMEM;
414 
415 	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
416 		ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
417 							     &ctrl->trace_buf[i].dma,
418 							     GFP_KERNEL);
419 		if (!ctrl->trace_buf[i].addr)
420 			return -ENOMEM;
421 	}
422 
423 	/* Configure the trace DMA buffer */
424 	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
425 		writel(lower_32_bits(ctrl->trace_buf[i].dma),
426 		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
427 		       i * HISI_PTT_TRACE_ADDR_STRIDE);
428 		writel(upper_32_bits(ctrl->trace_buf[i].dma),
429 		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
430 		       i * HISI_PTT_TRACE_ADDR_STRIDE);
431 	}
432 	writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
433 
434 	return 0;
435 }
436 
437 static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
438 {
439 	struct pci_dev *pdev = hisi_ptt->pdev;
440 	struct pci_bus *bus;
441 	int ret;
442 	u32 reg;
443 
444 	INIT_LIST_HEAD(&hisi_ptt->port_filters);
445 	INIT_LIST_HEAD(&hisi_ptt->req_filters);
446 
447 	ret = hisi_ptt_config_trace_buf(hisi_ptt);
448 	if (ret)
449 		return ret;
450 
451 	/*
452 	 * The device range register provides the information about the root
453 	 * ports which the RCiEP can control and trace. The RCiEP and the root
454 	 * ports which it supports are on the same PCIe core, with same domain
455 	 * number but maybe different bus number. The device range register
456 	 * will tell us which root ports we can support, Bit[31:16] indicates
457 	 * the upper BDF numbers of the root port, while Bit[15:0] indicates
458 	 * the lower.
459 	 */
460 	reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
461 	hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
462 	hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
463 
464 	bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
465 	if (bus)
466 		pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
467 
468 	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
469 	if (ret)
470 		return ret;
471 
472 	hisi_ptt->trace_ctrl.on_cpu = -1;
473 	return 0;
474 }
475 
476 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
477 			    char *buf)
478 {
479 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
480 	const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
481 
482 	return cpumap_print_to_pagebuf(true, buf, cpumask);
483 }
484 static DEVICE_ATTR_RO(cpumask);
485 
486 static struct attribute *hisi_ptt_cpumask_attrs[] = {
487 	&dev_attr_cpumask.attr,
488 	NULL
489 };
490 
491 static const struct attribute_group hisi_ptt_cpumask_attr_group = {
492 	.attrs = hisi_ptt_cpumask_attrs,
493 };
494 
495 /*
496  * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
497  * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
498  * a bit mask of desired ports and for Requester filter it's the Requester ID
499  * of the desired PCIe function. Bit[18:16] is reserved for extension.
500  *
501  * See hisi_ptt.rst documentation for detailed information.
502  */
503 PMU_FORMAT_ATTR(filter,		"config:0-19");
504 PMU_FORMAT_ATTR(direction,	"config:20-23");
505 PMU_FORMAT_ATTR(type,		"config:24-31");
506 PMU_FORMAT_ATTR(format,		"config:32-35");
507 
508 static struct attribute *hisi_ptt_pmu_format_attrs[] = {
509 	&format_attr_filter.attr,
510 	&format_attr_direction.attr,
511 	&format_attr_type.attr,
512 	&format_attr_format.attr,
513 	NULL
514 };
515 
516 static struct attribute_group hisi_ptt_pmu_format_group = {
517 	.name = "format",
518 	.attrs = hisi_ptt_pmu_format_attrs,
519 };
520 
521 static const struct attribute_group *hisi_ptt_pmu_groups[] = {
522 	&hisi_ptt_cpumask_attr_group,
523 	&hisi_ptt_pmu_format_group,
524 	&hisi_ptt_tune_group,
525 	NULL
526 };
527 
528 static int hisi_ptt_trace_valid_direction(u32 val)
529 {
530 	/*
531 	 * The direction values have different effects according to the data
532 	 * format (specified in the parentheses). TLP set A/B means different
533 	 * set of TLP types. See hisi_ptt.rst documentation for more details.
534 	 */
535 	static const u32 hisi_ptt_trace_available_direction[] = {
536 		0,	/* inbound(4DW) or reserved(8DW) */
537 		1,	/* outbound(4DW) */
538 		2,	/* {in, out}bound(4DW) or inbound(8DW), TLP set A */
539 		3,	/* {in, out}bound(4DW) or inbound(8DW), TLP set B */
540 	};
541 	int i;
542 
543 	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
544 		if (val == hisi_ptt_trace_available_direction[i])
545 			return 0;
546 	}
547 
548 	return -EINVAL;
549 }
550 
551 static int hisi_ptt_trace_valid_type(u32 val)
552 {
553 	/* Different types can be set simultaneously */
554 	static const u32 hisi_ptt_trace_available_type[] = {
555 		1,	/* posted_request */
556 		2,	/* non-posted_request */
557 		4,	/* completion */
558 	};
559 	int i;
560 
561 	if (!val)
562 		return -EINVAL;
563 
564 	/*
565 	 * Walk the available list and clear the valid bits of
566 	 * the config. If there is any resident bit after the
567 	 * walk then the config is invalid.
568 	 */
569 	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
570 		val &= ~hisi_ptt_trace_available_type[i];
571 
572 	if (val)
573 		return -EINVAL;
574 
575 	return 0;
576 }
577 
578 static int hisi_ptt_trace_valid_format(u32 val)
579 {
580 	static const u32 hisi_ptt_trace_availble_format[] = {
581 		0,	/* 4DW */
582 		1,	/* 8DW */
583 	};
584 	int i;
585 
586 	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
587 		if (val == hisi_ptt_trace_availble_format[i])
588 			return 0;
589 	}
590 
591 	return -EINVAL;
592 }
593 
594 static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
595 {
596 	unsigned long val, port_mask = hisi_ptt->port_mask;
597 	struct hisi_ptt_filter_desc *filter;
598 
599 	hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
600 	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
601 
602 	/*
603 	 * Port filters are defined as bit mask. For port filters, check
604 	 * the bits in the @val are within the range of hisi_ptt->port_mask
605 	 * and whether it's empty or not, otherwise user has specified
606 	 * some unsupported root ports.
607 	 *
608 	 * For Requester ID filters, walk the available filter list to see
609 	 * whether we have one matched.
610 	 */
611 	if (!hisi_ptt->trace_ctrl.is_port) {
612 		list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
613 			if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
614 				return 0;
615 		}
616 	} else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
617 		return 0;
618 	}
619 
620 	return -EINVAL;
621 }
622 
623 static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
624 {
625 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
626 	u32 val;
627 
628 	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
629 	hisi_ptt->trace_ctrl.filter = val;
630 
631 	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
632 	ctrl->direction = val;
633 
634 	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
635 	ctrl->type = val;
636 
637 	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
638 	ctrl->format = val;
639 }
640 
641 static int hisi_ptt_pmu_event_init(struct perf_event *event)
642 {
643 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
644 	int ret;
645 	u32 val;
646 
647 	if (event->cpu < 0) {
648 		dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
649 		return -EOPNOTSUPP;
650 	}
651 
652 	if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
653 		return -ENOENT;
654 
655 	ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
656 	if (ret < 0)
657 		return ret;
658 
659 	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
660 	ret = hisi_ptt_trace_valid_direction(val);
661 	if (ret < 0)
662 		return ret;
663 
664 	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
665 	ret = hisi_ptt_trace_valid_type(val);
666 	if (ret < 0)
667 		return ret;
668 
669 	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
670 	return hisi_ptt_trace_valid_format(val);
671 }
672 
673 static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
674 				    int nr_pages, bool overwrite)
675 {
676 	struct hisi_ptt_pmu_buf *buf;
677 	struct page **pagelist;
678 	int i;
679 
680 	if (overwrite) {
681 		dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
682 		return NULL;
683 	}
684 
685 	/* If the pages size less than buffers, we cannot start trace */
686 	if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
687 		return NULL;
688 
689 	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
690 	if (!buf)
691 		return NULL;
692 
693 	pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
694 	if (!pagelist)
695 		goto err;
696 
697 	for (i = 0; i < nr_pages; i++)
698 		pagelist[i] = virt_to_page(pages[i]);
699 
700 	buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
701 	if (!buf->base) {
702 		kfree(pagelist);
703 		goto err;
704 	}
705 
706 	buf->nr_pages = nr_pages;
707 	buf->length = nr_pages * PAGE_SIZE;
708 	buf->pos = 0;
709 
710 	kfree(pagelist);
711 	return buf;
712 err:
713 	kfree(buf);
714 	return NULL;
715 }
716 
717 static void hisi_ptt_pmu_free_aux(void *aux)
718 {
719 	struct hisi_ptt_pmu_buf *buf = aux;
720 
721 	vunmap(buf->base);
722 	kfree(buf);
723 }
724 
725 static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
726 {
727 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
728 	struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
729 	struct hw_perf_event *hwc = &event->hw;
730 	struct device *dev = event->pmu->dev;
731 	struct hisi_ptt_pmu_buf *buf;
732 	int cpu = event->cpu;
733 	int ret;
734 
735 	hwc->state = 0;
736 
737 	/* Serialize the perf process if user specified several CPUs */
738 	spin_lock(&hisi_ptt->pmu_lock);
739 	if (hisi_ptt->trace_ctrl.started) {
740 		dev_dbg(dev, "trace has already started\n");
741 		goto stop;
742 	}
743 
744 	/*
745 	 * Handle the interrupt on the same cpu which starts the trace to avoid
746 	 * context mismatch. Otherwise we'll trigger the WARN from the perf
747 	 * core in event_function_local(). If CPU passed is offline we'll fail
748 	 * here, just log it since we can do nothing here.
749 	 */
750 	ret = irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
751 					      cpumask_of(cpu));
752 	if (ret)
753 		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
754 
755 	hisi_ptt->trace_ctrl.on_cpu = cpu;
756 
757 	buf = perf_aux_output_begin(handle, event);
758 	if (!buf) {
759 		dev_dbg(dev, "aux output begin failed\n");
760 		goto stop;
761 	}
762 
763 	buf->pos = handle->head % buf->length;
764 
765 	hisi_ptt_pmu_init_configs(hisi_ptt, event);
766 
767 	ret = hisi_ptt_trace_start(hisi_ptt);
768 	if (ret) {
769 		dev_dbg(dev, "trace start failed, ret = %d\n", ret);
770 		perf_aux_output_end(handle, 0);
771 		goto stop;
772 	}
773 
774 	spin_unlock(&hisi_ptt->pmu_lock);
775 	return;
776 stop:
777 	event->hw.state |= PERF_HES_STOPPED;
778 	spin_unlock(&hisi_ptt->pmu_lock);
779 }
780 
781 static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
782 {
783 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
784 	struct hw_perf_event *hwc = &event->hw;
785 
786 	if (hwc->state & PERF_HES_STOPPED)
787 		return;
788 
789 	spin_lock(&hisi_ptt->pmu_lock);
790 	if (hisi_ptt->trace_ctrl.started) {
791 		hisi_ptt_trace_end(hisi_ptt);
792 
793 		if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
794 			dev_warn(event->pmu->dev, "Device is still busy\n");
795 
796 		hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
797 	}
798 	spin_unlock(&hisi_ptt->pmu_lock);
799 
800 	hwc->state |= PERF_HES_STOPPED;
801 	perf_event_update_userpage(event);
802 	hwc->state |= PERF_HES_UPTODATE;
803 }
804 
805 static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
806 {
807 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
808 	struct hw_perf_event *hwc = &event->hw;
809 	int cpu = event->cpu;
810 
811 	/* Only allow the cpus on the device's node to add the event */
812 	if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
813 		return 0;
814 
815 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
816 
817 	if (flags & PERF_EF_START) {
818 		hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
819 		if (hwc->state & PERF_HES_STOPPED)
820 			return -EINVAL;
821 	}
822 
823 	return 0;
824 }
825 
826 static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
827 {
828 	hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
829 }
830 
831 static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
832 {
833 	cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
834 }
835 
836 static void hisi_ptt_unregister_pmu(void *pmu)
837 {
838 	perf_pmu_unregister(pmu);
839 }
840 
841 static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
842 {
843 	u16 core_id, sicl_id;
844 	char *pmu_name;
845 	u32 reg;
846 	int ret;
847 
848 	ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
849 					       &hisi_ptt->hotplug_node);
850 	if (ret)
851 		return ret;
852 
853 	ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
854 				       hisi_ptt_remove_cpuhp_instance,
855 				       &hisi_ptt->hotplug_node);
856 	if (ret)
857 		return ret;
858 
859 	mutex_init(&hisi_ptt->tune_lock);
860 	spin_lock_init(&hisi_ptt->pmu_lock);
861 
862 	hisi_ptt->hisi_ptt_pmu = (struct pmu) {
863 		.module		= THIS_MODULE,
864 		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
865 		.task_ctx_nr	= perf_sw_context,
866 		.attr_groups	= hisi_ptt_pmu_groups,
867 		.event_init	= hisi_ptt_pmu_event_init,
868 		.setup_aux	= hisi_ptt_pmu_setup_aux,
869 		.free_aux	= hisi_ptt_pmu_free_aux,
870 		.start		= hisi_ptt_pmu_start,
871 		.stop		= hisi_ptt_pmu_stop,
872 		.add		= hisi_ptt_pmu_add,
873 		.del		= hisi_ptt_pmu_del,
874 	};
875 
876 	reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
877 	core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
878 	sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
879 
880 	pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
881 				  sicl_id, core_id);
882 	if (!pmu_name)
883 		return -ENOMEM;
884 
885 	ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
886 	if (ret)
887 		return ret;
888 
889 	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
890 					hisi_ptt_unregister_pmu,
891 					&hisi_ptt->hisi_ptt_pmu);
892 }
893 
894 /*
895  * The DMA of PTT trace can only use direct mappings due to some
896  * hardware restriction. Check whether there is no IOMMU or the
897  * policy of the IOMMU domain is passthrough, otherwise the trace
898  * cannot work.
899  *
900  * The PTT device is supposed to behind an ARM SMMUv3, which
901  * should have passthrough the device by a quirk.
902  */
903 static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
904 {
905 	struct iommu_domain *iommu_domain;
906 
907 	iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
908 	if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
909 		return 0;
910 
911 	return -EOPNOTSUPP;
912 }
913 
914 static int hisi_ptt_probe(struct pci_dev *pdev,
915 			  const struct pci_device_id *id)
916 {
917 	struct hisi_ptt *hisi_ptt;
918 	int ret;
919 
920 	ret = hisi_ptt_check_iommu_mapping(pdev);
921 	if (ret) {
922 		pci_err(pdev, "requires direct DMA mappings\n");
923 		return ret;
924 	}
925 
926 	hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
927 	if (!hisi_ptt)
928 		return -ENOMEM;
929 
930 	hisi_ptt->pdev = pdev;
931 	pci_set_drvdata(pdev, hisi_ptt);
932 
933 	ret = pcim_enable_device(pdev);
934 	if (ret) {
935 		pci_err(pdev, "failed to enable device, ret = %d\n", ret);
936 		return ret;
937 	}
938 
939 	ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
940 	if (ret) {
941 		pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
942 		return ret;
943 	}
944 
945 	hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
946 
947 	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
948 	if (ret) {
949 		pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
950 		return ret;
951 	}
952 
953 	pci_set_master(pdev);
954 
955 	ret = hisi_ptt_register_irq(hisi_ptt);
956 	if (ret)
957 		return ret;
958 
959 	ret = hisi_ptt_init_ctrls(hisi_ptt);
960 	if (ret) {
961 		pci_err(pdev, "failed to init controls, ret = %d\n", ret);
962 		return ret;
963 	}
964 
965 	ret = hisi_ptt_register_pmu(hisi_ptt);
966 	if (ret) {
967 		pci_err(pdev, "failed to register PMU device, ret = %d", ret);
968 		return ret;
969 	}
970 
971 	return 0;
972 }
973 
974 static const struct pci_device_id hisi_ptt_id_tbl[] = {
975 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
976 	{ }
977 };
978 MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
979 
980 static struct pci_driver hisi_ptt_driver = {
981 	.name = DRV_NAME,
982 	.id_table = hisi_ptt_id_tbl,
983 	.probe = hisi_ptt_probe,
984 };
985 
986 static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
987 {
988 	struct hisi_ptt *hisi_ptt;
989 	struct device *dev;
990 	int target, src;
991 
992 	hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
993 	src = hisi_ptt->trace_ctrl.on_cpu;
994 	dev = hisi_ptt->hisi_ptt_pmu.dev;
995 
996 	if (!hisi_ptt->trace_ctrl.started || src != cpu)
997 		return 0;
998 
999 	target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
1000 	if (target >= nr_cpu_ids) {
1001 		dev_err(dev, "no available cpu for perf context migration\n");
1002 		return 0;
1003 	}
1004 
1005 	perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
1006 
1007 	/*
1008 	 * Also make sure the interrupt bind to the migrated CPU as well. Warn
1009 	 * the user on failure here.
1010 	 */
1011 	if (irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
1012 					    cpumask_of(target)))
1013 		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
1014 
1015 	hisi_ptt->trace_ctrl.on_cpu = target;
1016 	return 0;
1017 }
1018 
1019 static int __init hisi_ptt_init(void)
1020 {
1021 	int ret;
1022 
1023 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
1024 				      hisi_ptt_cpu_teardown);
1025 	if (ret < 0)
1026 		return ret;
1027 	hisi_ptt_pmu_online = ret;
1028 
1029 	ret = pci_register_driver(&hisi_ptt_driver);
1030 	if (ret)
1031 		cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1032 
1033 	return ret;
1034 }
1035 module_init(hisi_ptt_init);
1036 
1037 static void __exit hisi_ptt_exit(void)
1038 {
1039 	pci_unregister_driver(&hisi_ptt_driver);
1040 	cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1041 }
1042 module_exit(hisi_ptt_exit);
1043 
1044 MODULE_LICENSE("GPL");
1045 MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
1046 MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");
1047