1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Driver for HiSilicon PCIe tune and trace device 4 * 5 * Copyright (c) 2022 HiSilicon Technologies Co., Ltd. 6 * Author: Yicong Yang <yangyicong@hisilicon.com> 7 */ 8 9 #include <linux/bitfield.h> 10 #include <linux/bitops.h> 11 #include <linux/cpuhotplug.h> 12 #include <linux/delay.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/interrupt.h> 15 #include <linux/io.h> 16 #include <linux/iommu.h> 17 #include <linux/iopoll.h> 18 #include <linux/module.h> 19 #include <linux/sysfs.h> 20 #include <linux/vmalloc.h> 21 22 #include "hisi_ptt.h" 23 24 /* Dynamic CPU hotplug state used by PTT */ 25 static enum cpuhp_state hisi_ptt_pmu_online; 26 27 static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt) 28 { 29 u32 val; 30 31 return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT, 32 val, !(val & HISI_PTT_TUNING_INT_STAT_MASK), 33 HISI_PTT_WAIT_POLL_INTERVAL_US, 34 HISI_PTT_WAIT_TUNE_TIMEOUT_US); 35 } 36 37 static ssize_t hisi_ptt_tune_attr_show(struct device *dev, 38 struct device_attribute *attr, 39 char *buf) 40 { 41 struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev)); 42 struct dev_ext_attribute *ext_attr; 43 struct hisi_ptt_tune_desc *desc; 44 u32 reg; 45 u16 val; 46 47 ext_attr = container_of(attr, struct dev_ext_attribute, attr); 48 desc = ext_attr->var; 49 50 mutex_lock(&hisi_ptt->tune_lock); 51 52 reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL); 53 reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB); 54 reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB, 55 desc->event_code); 56 writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL); 57 58 /* Write all 1 to indicates it's the read process */ 59 writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA); 60 61 if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) { 62 mutex_unlock(&hisi_ptt->tune_lock); 63 return -ETIMEDOUT; 64 } 65 66 reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA); 67 reg &= HISI_PTT_TUNING_DATA_VAL_MASK; 68 val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg); 69 70 mutex_unlock(&hisi_ptt->tune_lock); 71 return sysfs_emit(buf, "%u\n", val); 72 } 73 74 static ssize_t hisi_ptt_tune_attr_store(struct device *dev, 75 struct device_attribute *attr, 76 const char *buf, size_t count) 77 { 78 struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev)); 79 struct dev_ext_attribute *ext_attr; 80 struct hisi_ptt_tune_desc *desc; 81 u32 reg; 82 u16 val; 83 84 ext_attr = container_of(attr, struct dev_ext_attribute, attr); 85 desc = ext_attr->var; 86 87 if (kstrtou16(buf, 10, &val)) 88 return -EINVAL; 89 90 mutex_lock(&hisi_ptt->tune_lock); 91 92 reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL); 93 reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB); 94 reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB, 95 desc->event_code); 96 writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL); 97 writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val), 98 hisi_ptt->iobase + HISI_PTT_TUNING_DATA); 99 100 if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) { 101 mutex_unlock(&hisi_ptt->tune_lock); 102 return -ETIMEDOUT; 103 } 104 105 mutex_unlock(&hisi_ptt->tune_lock); 106 return count; 107 } 108 109 #define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store) \ 110 static struct hisi_ptt_tune_desc _name##_desc = { \ 111 .name = #_name, \ 112 .event_code = (_val), \ 113 }; \ 114 static struct dev_ext_attribute hisi_ptt_##_name##_attr = { \ 115 .attr = __ATTR(_name, 0600, _show, _store), \ 116 .var = &_name##_desc, \ 117 } 118 119 #define HISI_PTT_TUNE_ATTR_COMMON(_name, _val) \ 120 HISI_PTT_TUNE_ATTR(_name, _val, \ 121 hisi_ptt_tune_attr_show, \ 122 hisi_ptt_tune_attr_store) 123 124 /* 125 * The value of the tuning event are composed of two parts: main event code 126 * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is 127 * a subevent of 'Tx path QoS control' which for tuning the weight of Tx 128 * completion TLPs. See hisi_ptt.rst documentation for more information. 129 */ 130 #define HISI_PTT_TUNE_QOS_TX_CPL (0x4 | (3 << 16)) 131 #define HISI_PTT_TUNE_QOS_TX_NP (0x4 | (4 << 16)) 132 #define HISI_PTT_TUNE_QOS_TX_P (0x4 | (5 << 16)) 133 #define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL (0x5 | (6 << 16)) 134 #define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL (0x5 | (7 << 16)) 135 136 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL); 137 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP); 138 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P); 139 HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL); 140 HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL); 141 142 static struct attribute *hisi_ptt_tune_attrs[] = { 143 &hisi_ptt_qos_tx_cpl_attr.attr.attr, 144 &hisi_ptt_qos_tx_np_attr.attr.attr, 145 &hisi_ptt_qos_tx_p_attr.attr.attr, 146 &hisi_ptt_rx_alloc_buf_level_attr.attr.attr, 147 &hisi_ptt_tx_alloc_buf_level_attr.attr.attr, 148 NULL, 149 }; 150 151 static struct attribute_group hisi_ptt_tune_group = { 152 .name = "tune", 153 .attrs = hisi_ptt_tune_attrs, 154 }; 155 156 static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port) 157 { 158 if (is_port) 159 return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff)); 160 161 return devid; 162 } 163 164 static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt) 165 { 166 u32 val; 167 168 return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS, 169 val, val & HISI_PTT_TRACE_IDLE, 170 HISI_PTT_WAIT_POLL_INTERVAL_US, 171 HISI_PTT_WAIT_TRACE_TIMEOUT_US); 172 } 173 174 static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt) 175 { 176 u32 val; 177 178 readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS, 179 val, !val, HISI_PTT_RESET_POLL_INTERVAL_US, 180 HISI_PTT_RESET_TIMEOUT_US); 181 } 182 183 static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt) 184 { 185 writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL); 186 hisi_ptt->trace_ctrl.started = false; 187 } 188 189 static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt) 190 { 191 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl; 192 u32 val; 193 int i; 194 195 /* Check device idle before start trace */ 196 if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) { 197 pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n"); 198 return -EBUSY; 199 } 200 201 ctrl->started = true; 202 203 /* Reset the DMA before start tracing */ 204 val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL); 205 val |= HISI_PTT_TRACE_CTRL_RST; 206 writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL); 207 208 hisi_ptt_wait_dma_reset_done(hisi_ptt); 209 210 val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL); 211 val &= ~HISI_PTT_TRACE_CTRL_RST; 212 writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL); 213 214 /* Reset the index of current buffer */ 215 hisi_ptt->trace_ctrl.buf_index = 0; 216 217 /* Zero the trace buffers */ 218 for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) 219 memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE); 220 221 /* Clear the interrupt status */ 222 writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT); 223 writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK); 224 225 /* Set the trace control register */ 226 val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type); 227 val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction); 228 val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format); 229 val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter); 230 if (!hisi_ptt->trace_ctrl.is_port) 231 val |= HISI_PTT_TRACE_CTRL_FILTER_MODE; 232 233 /* Start the Trace */ 234 val |= HISI_PTT_TRACE_CTRL_EN; 235 writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL); 236 237 return 0; 238 } 239 240 static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop) 241 { 242 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl; 243 struct perf_output_handle *handle = &ctrl->handle; 244 struct perf_event *event = handle->event; 245 struct hisi_ptt_pmu_buf *buf; 246 size_t size; 247 void *addr; 248 249 buf = perf_get_aux(handle); 250 if (!buf || !handle->size) 251 return -EINVAL; 252 253 addr = ctrl->trace_buf[ctrl->buf_index].addr; 254 255 /* 256 * If we're going to stop, read the size of already traced data from 257 * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt, 258 * the data size is always HISI_PTT_TRACE_BUF_SIZE. 259 */ 260 if (stop) { 261 u32 reg; 262 263 reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS); 264 size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg); 265 } else { 266 size = HISI_PTT_TRACE_BUF_SIZE; 267 } 268 269 memcpy(buf->base + buf->pos, addr, size); 270 buf->pos += size; 271 272 /* 273 * Just commit the traced data if we're going to stop. Otherwise if the 274 * resident AUX buffer cannot contain the data of next trace buffer, 275 * apply a new one. 276 */ 277 if (stop) { 278 perf_aux_output_end(handle, buf->pos); 279 } else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) { 280 perf_aux_output_end(handle, buf->pos); 281 282 buf = perf_aux_output_begin(handle, event); 283 if (!buf) 284 return -EINVAL; 285 286 buf->pos = handle->head % buf->length; 287 if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) { 288 perf_aux_output_end(handle, 0); 289 return -EINVAL; 290 } 291 } 292 293 return 0; 294 } 295 296 static irqreturn_t hisi_ptt_isr(int irq, void *context) 297 { 298 struct hisi_ptt *hisi_ptt = context; 299 u32 status, buf_idx; 300 301 status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT); 302 if (!(status & HISI_PTT_TRACE_INT_STAT_MASK)) 303 return IRQ_NONE; 304 305 buf_idx = ffs(status) - 1; 306 307 /* Clear the interrupt status of buffer @buf_idx */ 308 writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT); 309 310 /* 311 * Update the AUX buffer and cache the current buffer index, 312 * as we need to know this and save the data when the trace 313 * is ended out of the interrupt handler. End the trace 314 * if the updating fails. 315 */ 316 if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false)) 317 hisi_ptt_trace_end(hisi_ptt); 318 else 319 hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT; 320 321 return IRQ_HANDLED; 322 } 323 324 static void hisi_ptt_irq_free_vectors(void *pdev) 325 { 326 pci_free_irq_vectors(pdev); 327 } 328 329 static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt) 330 { 331 struct pci_dev *pdev = hisi_ptt->pdev; 332 int ret; 333 334 ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI); 335 if (ret < 0) { 336 pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret); 337 return ret; 338 } 339 340 ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev); 341 if (ret < 0) 342 return ret; 343 344 ret = devm_request_threaded_irq(&pdev->dev, 345 pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ), 346 NULL, hisi_ptt_isr, 0, 347 DRV_NAME, hisi_ptt); 348 if (ret) { 349 pci_err(pdev, "failed to request irq %d, ret = %d\n", 350 pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ), ret); 351 return ret; 352 } 353 354 return 0; 355 } 356 357 static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data) 358 { 359 struct pci_dev *root_port = pcie_find_root_port(pdev); 360 struct hisi_ptt_filter_desc *filter; 361 struct hisi_ptt *hisi_ptt = data; 362 u32 port_devid; 363 364 if (!root_port) 365 return 0; 366 367 port_devid = PCI_DEVID(root_port->bus->number, root_port->devfn); 368 if (port_devid < hisi_ptt->lower_bdf || 369 port_devid > hisi_ptt->upper_bdf) 370 return 0; 371 372 /* 373 * We won't fail the probe if filter allocation failed here. The filters 374 * should be partial initialized and users would know which filter fails 375 * through the log. Other functions of PTT device are still available. 376 */ 377 filter = kzalloc(sizeof(*filter), GFP_KERNEL); 378 if (!filter) { 379 pci_err(hisi_ptt->pdev, "failed to add filter %s\n", pci_name(pdev)); 380 return -ENOMEM; 381 } 382 383 filter->devid = PCI_DEVID(pdev->bus->number, pdev->devfn); 384 385 if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) { 386 filter->is_port = true; 387 list_add_tail(&filter->list, &hisi_ptt->port_filters); 388 389 /* Update the available port mask */ 390 hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true); 391 } else { 392 list_add_tail(&filter->list, &hisi_ptt->req_filters); 393 } 394 395 return 0; 396 } 397 398 static void hisi_ptt_release_filters(void *data) 399 { 400 struct hisi_ptt_filter_desc *filter, *tmp; 401 struct hisi_ptt *hisi_ptt = data; 402 403 list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list) { 404 list_del(&filter->list); 405 kfree(filter); 406 } 407 408 list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list) { 409 list_del(&filter->list); 410 kfree(filter); 411 } 412 } 413 414 static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt) 415 { 416 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl; 417 struct device *dev = &hisi_ptt->pdev->dev; 418 int i; 419 420 ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT, 421 sizeof(*ctrl->trace_buf), GFP_KERNEL); 422 if (!ctrl->trace_buf) 423 return -ENOMEM; 424 425 for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) { 426 ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE, 427 &ctrl->trace_buf[i].dma, 428 GFP_KERNEL); 429 if (!ctrl->trace_buf[i].addr) 430 return -ENOMEM; 431 } 432 433 /* Configure the trace DMA buffer */ 434 for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) { 435 writel(lower_32_bits(ctrl->trace_buf[i].dma), 436 hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 + 437 i * HISI_PTT_TRACE_ADDR_STRIDE); 438 writel(upper_32_bits(ctrl->trace_buf[i].dma), 439 hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 + 440 i * HISI_PTT_TRACE_ADDR_STRIDE); 441 } 442 writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE); 443 444 return 0; 445 } 446 447 static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt) 448 { 449 struct pci_dev *pdev = hisi_ptt->pdev; 450 struct pci_bus *bus; 451 int ret; 452 u32 reg; 453 454 INIT_LIST_HEAD(&hisi_ptt->port_filters); 455 INIT_LIST_HEAD(&hisi_ptt->req_filters); 456 457 ret = hisi_ptt_config_trace_buf(hisi_ptt); 458 if (ret) 459 return ret; 460 461 /* 462 * The device range register provides the information about the root 463 * ports which the RCiEP can control and trace. The RCiEP and the root 464 * ports which it supports are on the same PCIe core, with same domain 465 * number but maybe different bus number. The device range register 466 * will tell us which root ports we can support, Bit[31:16] indicates 467 * the upper BDF numbers of the root port, while Bit[15:0] indicates 468 * the lower. 469 */ 470 reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE); 471 hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg); 472 hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg); 473 474 bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf)); 475 if (bus) 476 pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt); 477 478 ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt); 479 if (ret) 480 return ret; 481 482 hisi_ptt->trace_ctrl.on_cpu = -1; 483 return 0; 484 } 485 486 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, 487 char *buf) 488 { 489 struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev)); 490 const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)); 491 492 return cpumap_print_to_pagebuf(true, buf, cpumask); 493 } 494 static DEVICE_ATTR_RO(cpumask); 495 496 static struct attribute *hisi_ptt_cpumask_attrs[] = { 497 &dev_attr_cpumask.attr, 498 NULL 499 }; 500 501 static const struct attribute_group hisi_ptt_cpumask_attr_group = { 502 .attrs = hisi_ptt_cpumask_attrs, 503 }; 504 505 /* 506 * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester 507 * filter. Bit[15:0] indicates the filter value, for Root Port filter it's 508 * a bit mask of desired ports and for Requester filter it's the Requester ID 509 * of the desired PCIe function. Bit[18:16] is reserved for extension. 510 * 511 * See hisi_ptt.rst documentation for detailed information. 512 */ 513 PMU_FORMAT_ATTR(filter, "config:0-19"); 514 PMU_FORMAT_ATTR(direction, "config:20-23"); 515 PMU_FORMAT_ATTR(type, "config:24-31"); 516 PMU_FORMAT_ATTR(format, "config:32-35"); 517 518 static struct attribute *hisi_ptt_pmu_format_attrs[] = { 519 &format_attr_filter.attr, 520 &format_attr_direction.attr, 521 &format_attr_type.attr, 522 &format_attr_format.attr, 523 NULL 524 }; 525 526 static struct attribute_group hisi_ptt_pmu_format_group = { 527 .name = "format", 528 .attrs = hisi_ptt_pmu_format_attrs, 529 }; 530 531 static const struct attribute_group *hisi_ptt_pmu_groups[] = { 532 &hisi_ptt_cpumask_attr_group, 533 &hisi_ptt_pmu_format_group, 534 &hisi_ptt_tune_group, 535 NULL 536 }; 537 538 static int hisi_ptt_trace_valid_direction(u32 val) 539 { 540 /* 541 * The direction values have different effects according to the data 542 * format (specified in the parentheses). TLP set A/B means different 543 * set of TLP types. See hisi_ptt.rst documentation for more details. 544 */ 545 static const u32 hisi_ptt_trace_available_direction[] = { 546 0, /* inbound(4DW) or reserved(8DW) */ 547 1, /* outbound(4DW) */ 548 2, /* {in, out}bound(4DW) or inbound(8DW), TLP set A */ 549 3, /* {in, out}bound(4DW) or inbound(8DW), TLP set B */ 550 }; 551 int i; 552 553 for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) { 554 if (val == hisi_ptt_trace_available_direction[i]) 555 return 0; 556 } 557 558 return -EINVAL; 559 } 560 561 static int hisi_ptt_trace_valid_type(u32 val) 562 { 563 /* Different types can be set simultaneously */ 564 static const u32 hisi_ptt_trace_available_type[] = { 565 1, /* posted_request */ 566 2, /* non-posted_request */ 567 4, /* completion */ 568 }; 569 int i; 570 571 if (!val) 572 return -EINVAL; 573 574 /* 575 * Walk the available list and clear the valid bits of 576 * the config. If there is any resident bit after the 577 * walk then the config is invalid. 578 */ 579 for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++) 580 val &= ~hisi_ptt_trace_available_type[i]; 581 582 if (val) 583 return -EINVAL; 584 585 return 0; 586 } 587 588 static int hisi_ptt_trace_valid_format(u32 val) 589 { 590 static const u32 hisi_ptt_trace_availble_format[] = { 591 0, /* 4DW */ 592 1, /* 8DW */ 593 }; 594 int i; 595 596 for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) { 597 if (val == hisi_ptt_trace_availble_format[i]) 598 return 0; 599 } 600 601 return -EINVAL; 602 } 603 604 static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config) 605 { 606 unsigned long val, port_mask = hisi_ptt->port_mask; 607 struct hisi_ptt_filter_desc *filter; 608 609 hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config); 610 val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config); 611 612 /* 613 * Port filters are defined as bit mask. For port filters, check 614 * the bits in the @val are within the range of hisi_ptt->port_mask 615 * and whether it's empty or not, otherwise user has specified 616 * some unsupported root ports. 617 * 618 * For Requester ID filters, walk the available filter list to see 619 * whether we have one matched. 620 */ 621 if (!hisi_ptt->trace_ctrl.is_port) { 622 list_for_each_entry(filter, &hisi_ptt->req_filters, list) { 623 if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port)) 624 return 0; 625 } 626 } else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) { 627 return 0; 628 } 629 630 return -EINVAL; 631 } 632 633 static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event) 634 { 635 struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl; 636 u32 val; 637 638 val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config); 639 hisi_ptt->trace_ctrl.filter = val; 640 641 val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config); 642 ctrl->direction = val; 643 644 val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config); 645 ctrl->type = val; 646 647 val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config); 648 ctrl->format = val; 649 } 650 651 static int hisi_ptt_pmu_event_init(struct perf_event *event) 652 { 653 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu); 654 int ret; 655 u32 val; 656 657 if (event->cpu < 0) { 658 dev_dbg(event->pmu->dev, "Per-task mode not supported\n"); 659 return -EOPNOTSUPP; 660 } 661 662 if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type) 663 return -ENOENT; 664 665 ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config); 666 if (ret < 0) 667 return ret; 668 669 val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config); 670 ret = hisi_ptt_trace_valid_direction(val); 671 if (ret < 0) 672 return ret; 673 674 val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config); 675 ret = hisi_ptt_trace_valid_type(val); 676 if (ret < 0) 677 return ret; 678 679 val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config); 680 return hisi_ptt_trace_valid_format(val); 681 } 682 683 static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages, 684 int nr_pages, bool overwrite) 685 { 686 struct hisi_ptt_pmu_buf *buf; 687 struct page **pagelist; 688 int i; 689 690 if (overwrite) { 691 dev_warn(event->pmu->dev, "Overwrite mode is not supported\n"); 692 return NULL; 693 } 694 695 /* If the pages size less than buffers, we cannot start trace */ 696 if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE) 697 return NULL; 698 699 buf = kzalloc(sizeof(*buf), GFP_KERNEL); 700 if (!buf) 701 return NULL; 702 703 pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL); 704 if (!pagelist) 705 goto err; 706 707 for (i = 0; i < nr_pages; i++) 708 pagelist[i] = virt_to_page(pages[i]); 709 710 buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL); 711 if (!buf->base) { 712 kfree(pagelist); 713 goto err; 714 } 715 716 buf->nr_pages = nr_pages; 717 buf->length = nr_pages * PAGE_SIZE; 718 buf->pos = 0; 719 720 kfree(pagelist); 721 return buf; 722 err: 723 kfree(buf); 724 return NULL; 725 } 726 727 static void hisi_ptt_pmu_free_aux(void *aux) 728 { 729 struct hisi_ptt_pmu_buf *buf = aux; 730 731 vunmap(buf->base); 732 kfree(buf); 733 } 734 735 static void hisi_ptt_pmu_start(struct perf_event *event, int flags) 736 { 737 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu); 738 struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle; 739 struct hw_perf_event *hwc = &event->hw; 740 struct device *dev = event->pmu->dev; 741 struct hisi_ptt_pmu_buf *buf; 742 int cpu = event->cpu; 743 int ret; 744 745 hwc->state = 0; 746 747 /* Serialize the perf process if user specified several CPUs */ 748 spin_lock(&hisi_ptt->pmu_lock); 749 if (hisi_ptt->trace_ctrl.started) { 750 dev_dbg(dev, "trace has already started\n"); 751 goto stop; 752 } 753 754 /* 755 * Handle the interrupt on the same cpu which starts the trace to avoid 756 * context mismatch. Otherwise we'll trigger the WARN from the perf 757 * core in event_function_local(). If CPU passed is offline we'll fail 758 * here, just log it since we can do nothing here. 759 */ 760 ret = irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ), 761 cpumask_of(cpu)); 762 if (ret) 763 dev_warn(dev, "failed to set the affinity of trace interrupt\n"); 764 765 hisi_ptt->trace_ctrl.on_cpu = cpu; 766 767 buf = perf_aux_output_begin(handle, event); 768 if (!buf) { 769 dev_dbg(dev, "aux output begin failed\n"); 770 goto stop; 771 } 772 773 buf->pos = handle->head % buf->length; 774 775 hisi_ptt_pmu_init_configs(hisi_ptt, event); 776 777 ret = hisi_ptt_trace_start(hisi_ptt); 778 if (ret) { 779 dev_dbg(dev, "trace start failed, ret = %d\n", ret); 780 perf_aux_output_end(handle, 0); 781 goto stop; 782 } 783 784 spin_unlock(&hisi_ptt->pmu_lock); 785 return; 786 stop: 787 event->hw.state |= PERF_HES_STOPPED; 788 spin_unlock(&hisi_ptt->pmu_lock); 789 } 790 791 static void hisi_ptt_pmu_stop(struct perf_event *event, int flags) 792 { 793 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu); 794 struct hw_perf_event *hwc = &event->hw; 795 796 if (hwc->state & PERF_HES_STOPPED) 797 return; 798 799 spin_lock(&hisi_ptt->pmu_lock); 800 if (hisi_ptt->trace_ctrl.started) { 801 hisi_ptt_trace_end(hisi_ptt); 802 803 if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) 804 dev_warn(event->pmu->dev, "Device is still busy\n"); 805 806 hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true); 807 } 808 spin_unlock(&hisi_ptt->pmu_lock); 809 810 hwc->state |= PERF_HES_STOPPED; 811 perf_event_update_userpage(event); 812 hwc->state |= PERF_HES_UPTODATE; 813 } 814 815 static int hisi_ptt_pmu_add(struct perf_event *event, int flags) 816 { 817 struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu); 818 struct hw_perf_event *hwc = &event->hw; 819 int cpu = event->cpu; 820 821 /* Only allow the cpus on the device's node to add the event */ 822 if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)))) 823 return 0; 824 825 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 826 827 if (flags & PERF_EF_START) { 828 hisi_ptt_pmu_start(event, PERF_EF_RELOAD); 829 if (hwc->state & PERF_HES_STOPPED) 830 return -EINVAL; 831 } 832 833 return 0; 834 } 835 836 static void hisi_ptt_pmu_del(struct perf_event *event, int flags) 837 { 838 hisi_ptt_pmu_stop(event, PERF_EF_UPDATE); 839 } 840 841 static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node) 842 { 843 cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node); 844 } 845 846 static void hisi_ptt_unregister_pmu(void *pmu) 847 { 848 perf_pmu_unregister(pmu); 849 } 850 851 static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt) 852 { 853 u16 core_id, sicl_id; 854 char *pmu_name; 855 u32 reg; 856 int ret; 857 858 ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online, 859 &hisi_ptt->hotplug_node); 860 if (ret) 861 return ret; 862 863 ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev, 864 hisi_ptt_remove_cpuhp_instance, 865 &hisi_ptt->hotplug_node); 866 if (ret) 867 return ret; 868 869 mutex_init(&hisi_ptt->tune_lock); 870 spin_lock_init(&hisi_ptt->pmu_lock); 871 872 hisi_ptt->hisi_ptt_pmu = (struct pmu) { 873 .module = THIS_MODULE, 874 .capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE, 875 .task_ctx_nr = perf_sw_context, 876 .attr_groups = hisi_ptt_pmu_groups, 877 .event_init = hisi_ptt_pmu_event_init, 878 .setup_aux = hisi_ptt_pmu_setup_aux, 879 .free_aux = hisi_ptt_pmu_free_aux, 880 .start = hisi_ptt_pmu_start, 881 .stop = hisi_ptt_pmu_stop, 882 .add = hisi_ptt_pmu_add, 883 .del = hisi_ptt_pmu_del, 884 }; 885 886 reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION); 887 core_id = FIELD_GET(HISI_PTT_CORE_ID, reg); 888 sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg); 889 890 pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u", 891 sicl_id, core_id); 892 if (!pmu_name) 893 return -ENOMEM; 894 895 ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1); 896 if (ret) 897 return ret; 898 899 return devm_add_action_or_reset(&hisi_ptt->pdev->dev, 900 hisi_ptt_unregister_pmu, 901 &hisi_ptt->hisi_ptt_pmu); 902 } 903 904 /* 905 * The DMA of PTT trace can only use direct mappings due to some 906 * hardware restriction. Check whether there is no IOMMU or the 907 * policy of the IOMMU domain is passthrough, otherwise the trace 908 * cannot work. 909 * 910 * The PTT device is supposed to behind an ARM SMMUv3, which 911 * should have passthrough the device by a quirk. 912 */ 913 static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev) 914 { 915 struct iommu_domain *iommu_domain; 916 917 iommu_domain = iommu_get_domain_for_dev(&pdev->dev); 918 if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY) 919 return 0; 920 921 return -EOPNOTSUPP; 922 } 923 924 static int hisi_ptt_probe(struct pci_dev *pdev, 925 const struct pci_device_id *id) 926 { 927 struct hisi_ptt *hisi_ptt; 928 int ret; 929 930 ret = hisi_ptt_check_iommu_mapping(pdev); 931 if (ret) { 932 pci_err(pdev, "requires direct DMA mappings\n"); 933 return ret; 934 } 935 936 hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL); 937 if (!hisi_ptt) 938 return -ENOMEM; 939 940 hisi_ptt->pdev = pdev; 941 pci_set_drvdata(pdev, hisi_ptt); 942 943 ret = pcim_enable_device(pdev); 944 if (ret) { 945 pci_err(pdev, "failed to enable device, ret = %d\n", ret); 946 return ret; 947 } 948 949 ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME); 950 if (ret) { 951 pci_err(pdev, "failed to remap io memory, ret = %d\n", ret); 952 return ret; 953 } 954 955 hisi_ptt->iobase = pcim_iomap_table(pdev)[2]; 956 957 ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 958 if (ret) { 959 pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret); 960 return ret; 961 } 962 963 pci_set_master(pdev); 964 965 ret = hisi_ptt_register_irq(hisi_ptt); 966 if (ret) 967 return ret; 968 969 ret = hisi_ptt_init_ctrls(hisi_ptt); 970 if (ret) { 971 pci_err(pdev, "failed to init controls, ret = %d\n", ret); 972 return ret; 973 } 974 975 ret = hisi_ptt_register_pmu(hisi_ptt); 976 if (ret) { 977 pci_err(pdev, "failed to register PMU device, ret = %d", ret); 978 return ret; 979 } 980 981 return 0; 982 } 983 984 static const struct pci_device_id hisi_ptt_id_tbl[] = { 985 { PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) }, 986 { } 987 }; 988 MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl); 989 990 static struct pci_driver hisi_ptt_driver = { 991 .name = DRV_NAME, 992 .id_table = hisi_ptt_id_tbl, 993 .probe = hisi_ptt_probe, 994 }; 995 996 static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node) 997 { 998 struct hisi_ptt *hisi_ptt; 999 struct device *dev; 1000 int target, src; 1001 1002 hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node); 1003 src = hisi_ptt->trace_ctrl.on_cpu; 1004 dev = hisi_ptt->hisi_ptt_pmu.dev; 1005 1006 if (!hisi_ptt->trace_ctrl.started || src != cpu) 1007 return 0; 1008 1009 target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu); 1010 if (target >= nr_cpu_ids) { 1011 dev_err(dev, "no available cpu for perf context migration\n"); 1012 return 0; 1013 } 1014 1015 perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target); 1016 1017 /* 1018 * Also make sure the interrupt bind to the migrated CPU as well. Warn 1019 * the user on failure here. 1020 */ 1021 if (irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ), 1022 cpumask_of(target))) 1023 dev_warn(dev, "failed to set the affinity of trace interrupt\n"); 1024 1025 hisi_ptt->trace_ctrl.on_cpu = target; 1026 return 0; 1027 } 1028 1029 static int __init hisi_ptt_init(void) 1030 { 1031 int ret; 1032 1033 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL, 1034 hisi_ptt_cpu_teardown); 1035 if (ret < 0) 1036 return ret; 1037 hisi_ptt_pmu_online = ret; 1038 1039 ret = pci_register_driver(&hisi_ptt_driver); 1040 if (ret) 1041 cpuhp_remove_multi_state(hisi_ptt_pmu_online); 1042 1043 return ret; 1044 } 1045 module_init(hisi_ptt_init); 1046 1047 static void __exit hisi_ptt_exit(void) 1048 { 1049 pci_unregister_driver(&hisi_ptt_driver); 1050 cpuhp_remove_multi_state(hisi_ptt_pmu_online); 1051 } 1052 module_exit(hisi_ptt_exit); 1053 1054 MODULE_LICENSE("GPL"); 1055 MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>"); 1056 MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device"); 1057