1 // SPDX-License-Identifier: GPL-2.0+ 2 // Copyright 2017 IBM Corp. 3 #include <asm/pnv-ocxl.h> 4 #include <asm/opal.h> 5 #include <misc/ocxl-config.h> 6 #include "pci.h" 7 8 #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full 9 #define PNV_OCXL_ACTAG_MAX 64 10 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ 11 #define PNV_OCXL_PASID_BITS 15 12 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) 13 14 #define AFU_PRESENT (1 << 31) 15 #define AFU_INDEX_MASK 0x3F000000 16 #define AFU_INDEX_SHIFT 24 17 #define ACTAG_MASK 0xFFF 18 19 20 struct actag_range { 21 u16 start; 22 u16 count; 23 }; 24 25 struct npu_link { 26 struct list_head list; 27 int domain; 28 int bus; 29 int dev; 30 u16 fn_desired_actags[8]; 31 struct actag_range fn_actags[8]; 32 bool assignment_done; 33 }; 34 static struct list_head links_list = LIST_HEAD_INIT(links_list); 35 static DEFINE_MUTEX(links_list_lock); 36 37 38 /* 39 * opencapi actags handling: 40 * 41 * When sending commands, the opencapi device references the memory 42 * context it's targeting with an 'actag', which is really an alias 43 * for a (BDF, pasid) combination. When it receives a command, the NPU 44 * must do a lookup of the actag to identify the memory context. The 45 * hardware supports a finite number of actags per link (64 for 46 * POWER9). 47 * 48 * The device can carry multiple functions, and each function can have 49 * multiple AFUs. Each AFU advertises in its config space the number 50 * of desired actags. The host must configure in the config space of 51 * the AFU how many actags the AFU is really allowed to use (which can 52 * be less than what the AFU desires). 53 * 54 * When a PCI function is probed by the driver, it has no visibility 55 * about the other PCI functions and how many actags they'd like, 56 * which makes it impossible to distribute actags fairly among AFUs. 57 * 58 * Unfortunately, the only way to know how many actags a function 59 * desires is by looking at the data for each AFU in the config space 60 * and add them up. Similarly, the only way to know how many actags 61 * all the functions of the physical device desire is by adding the 62 * previously computed function counts. Then we can match that against 63 * what the hardware supports. 64 * 65 * To get a comprehensive view, we use a 'pci fixup': at the end of 66 * PCI enumeration, each function counts how many actags its AFUs 67 * desire and we save it in a 'npu_link' structure, shared between all 68 * the PCI functions of a same device. Therefore, when the first 69 * function is probed by the driver, we can get an idea of the total 70 * count of desired actags for the device, and assign the actags to 71 * the AFUs, by pro-rating if needed. 72 */ 73 74 static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) 75 { 76 int vsec = pos; 77 u16 vendor, id; 78 79 while ((vsec = pci_find_next_ext_capability(dev, vsec, 80 OCXL_EXT_CAP_ID_DVSEC))) { 81 pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, 82 &vendor); 83 pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); 84 if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) 85 return vsec; 86 } 87 return 0; 88 } 89 90 static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) 91 { 92 int vsec = 0; 93 u8 idx; 94 95 while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, 96 vsec))) { 97 pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, 98 &idx); 99 if (idx == afu_idx) 100 return vsec; 101 } 102 return 0; 103 } 104 105 static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) 106 { 107 int pos; 108 u32 val; 109 110 pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM, 111 OCXL_DVSEC_FUNC_ID); 112 if (!pos) 113 return -ESRCH; 114 115 pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); 116 if (val & AFU_PRESENT) 117 *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; 118 else 119 *afu_idx = -1; 120 return 0; 121 } 122 123 static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) 124 { 125 int pos; 126 u16 actag_sup; 127 128 pos = find_dvsec_afu_ctrl(dev, afu_idx); 129 if (!pos) 130 return -ESRCH; 131 132 pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, 133 &actag_sup); 134 *actag = actag_sup & ACTAG_MASK; 135 return 0; 136 } 137 138 static struct npu_link *find_link(struct pci_dev *dev) 139 { 140 struct npu_link *link; 141 142 list_for_each_entry(link, &links_list, list) { 143 /* The functions of a device all share the same link */ 144 if (link->domain == pci_domain_nr(dev->bus) && 145 link->bus == dev->bus->number && 146 link->dev == PCI_SLOT(dev->devfn)) { 147 return link; 148 } 149 } 150 151 /* link doesn't exist yet. Allocate one */ 152 link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); 153 if (!link) 154 return NULL; 155 link->domain = pci_domain_nr(dev->bus); 156 link->bus = dev->bus->number; 157 link->dev = PCI_SLOT(dev->devfn); 158 list_add(&link->list, &links_list); 159 return link; 160 } 161 162 static void pnv_ocxl_fixup_actag(struct pci_dev *dev) 163 { 164 struct pci_controller *hose = pci_bus_to_host(dev->bus); 165 struct pnv_phb *phb = hose->private_data; 166 struct npu_link *link; 167 int rc, afu_idx = -1, i, actag; 168 169 if (!machine_is(powernv)) 170 return; 171 172 if (phb->type != PNV_PHB_NPU_OCAPI) 173 return; 174 175 guard(mutex)(&links_list_lock); 176 177 link = find_link(dev); 178 if (!link) { 179 dev_warn(&dev->dev, "couldn't update actag information\n"); 180 return; 181 } 182 183 /* 184 * Check how many actags are desired for the AFUs under that 185 * function and add it to the count for the link 186 */ 187 rc = get_max_afu_index(dev, &afu_idx); 188 if (rc) { 189 /* Most likely an invalid config space */ 190 dev_dbg(&dev->dev, "couldn't find AFU information\n"); 191 afu_idx = -1; 192 } 193 194 link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; 195 for (i = 0; i <= afu_idx; i++) { 196 /* 197 * AFU index 'holes' are allowed. So don't fail if we 198 * can't read the actag info for an index 199 */ 200 rc = get_actag_count(dev, i, &actag); 201 if (rc) 202 continue; 203 link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; 204 } 205 dev_dbg(&dev->dev, "total actags for function: %d\n", 206 link->fn_desired_actags[PCI_FUNC(dev->devfn)]); 207 208 } 209 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); 210 211 static u16 assign_fn_actags(u16 desired, u16 total) 212 { 213 u16 count; 214 215 if (total <= PNV_OCXL_ACTAG_MAX) 216 count = desired; 217 else 218 count = PNV_OCXL_ACTAG_MAX * desired / total; 219 220 return count; 221 } 222 223 static void assign_actags(struct npu_link *link) 224 { 225 u16 actag_count, range_start = 0, total_desired = 0; 226 int i; 227 228 for (i = 0; i < 8; i++) 229 total_desired += link->fn_desired_actags[i]; 230 231 for (i = 0; i < 8; i++) { 232 if (link->fn_desired_actags[i]) { 233 actag_count = assign_fn_actags( 234 link->fn_desired_actags[i], 235 total_desired); 236 link->fn_actags[i].start = range_start; 237 link->fn_actags[i].count = actag_count; 238 range_start += actag_count; 239 WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); 240 } 241 pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", 242 link->domain, link->bus, link->dev, i, 243 link->fn_actags[i].start, link->fn_actags[i].count, 244 link->fn_desired_actags[i]); 245 } 246 link->assignment_done = true; 247 } 248 249 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, 250 u16 *supported) 251 { 252 struct npu_link *link; 253 254 guard(mutex)(&links_list_lock); 255 256 link = find_link(dev); 257 if (!link) { 258 dev_err(&dev->dev, "actag information not found\n"); 259 return -ENODEV; 260 } 261 /* 262 * On p9, we only have 64 actags per link, so they must be 263 * shared by all the functions of the same adapter. We counted 264 * the desired actag counts during PCI enumeration, so that we 265 * can allocate a pro-rated number of actags to each function. 266 */ 267 if (!link->assignment_done) 268 assign_actags(link); 269 270 *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; 271 *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; 272 *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; 273 274 return 0; 275 } 276 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); 277 278 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) 279 { 280 struct npu_link *link; 281 int i, rc = -EINVAL; 282 283 /* 284 * The number of PASIDs (process address space ID) which can 285 * be used by a function depends on how many functions exist 286 * on the device. The NPU needs to be configured to know how 287 * many bits are available to PASIDs and how many are to be 288 * used by the function BDF identifier. 289 * 290 * We only support one AFU-carrying function for now. 291 */ 292 guard(mutex)(&links_list_lock); 293 294 link = find_link(dev); 295 if (!link) { 296 dev_err(&dev->dev, "actag information not found\n"); 297 return -ENODEV; 298 } 299 300 for (i = 0; i < 8; i++) 301 if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { 302 *count = PNV_OCXL_PASID_MAX; 303 rc = 0; 304 break; 305 } 306 307 dev_dbg(&dev->dev, "%d PASIDs available for function\n", 308 rc ? 0 : *count); 309 return rc; 310 } 311 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); 312 313 static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) 314 { 315 int shift, idx; 316 317 WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); 318 idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; 319 shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); 320 buf[idx] |= rate << shift; 321 } 322 323 int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, 324 char *rate_buf, int rate_buf_size) 325 { 326 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) 327 return -EINVAL; 328 /* 329 * The TL capabilities are a characteristic of the NPU, so 330 * we go with hard-coded values. 331 * 332 * The receiving rate of each template is encoded on 4 bits. 333 * 334 * On P9: 335 * - templates 0 -> 3 are supported 336 * - templates 0, 1 and 3 have a 0 receiving rate 337 * - template 2 has receiving rate of 1 (extra cycle) 338 */ 339 memset(rate_buf, 0, rate_buf_size); 340 set_templ_rate(2, 1, rate_buf); 341 *cap = PNV_OCXL_TL_P9_RECV_CAP; 342 return 0; 343 } 344 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); 345 346 int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, 347 uint64_t rate_buf_phys, int rate_buf_size) 348 { 349 struct pci_controller *hose = pci_bus_to_host(dev->bus); 350 struct pnv_phb *phb = hose->private_data; 351 int rc; 352 353 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) 354 return -EINVAL; 355 356 rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, 357 rate_buf_phys, rate_buf_size); 358 if (rc) { 359 dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); 360 return -EINVAL; 361 } 362 return 0; 363 } 364 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); 365 366 int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) 367 { 368 int rc; 369 370 rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); 371 if (rc) { 372 dev_err(&dev->dev, 373 "Can't get translation interrupt for device\n"); 374 return rc; 375 } 376 return 0; 377 } 378 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); 379 380 void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, 381 void __iomem *tfc, void __iomem *pe_handle) 382 { 383 iounmap(dsisr); 384 iounmap(dar); 385 iounmap(tfc); 386 iounmap(pe_handle); 387 } 388 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); 389 390 int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, 391 void __iomem **dar, void __iomem **tfc, 392 void __iomem **pe_handle) 393 { 394 u64 reg; 395 int i, j, rc = 0; 396 void __iomem *regs[4]; 397 398 /* 399 * opal stores the mmio addresses of the DSISR, DAR, TFC and 400 * PE_HANDLE registers in a device tree property, in that 401 * order 402 */ 403 for (i = 0; i < 4; i++) { 404 rc = of_property_read_u64_index(dev->dev.of_node, 405 "ibm,opal-xsl-mmio", i, ®); 406 if (rc) 407 break; 408 regs[i] = ioremap(reg, 8); 409 if (!regs[i]) { 410 rc = -EINVAL; 411 break; 412 } 413 } 414 if (rc) { 415 dev_err(&dev->dev, "Can't map translation mmio registers\n"); 416 for (j = i - 1; j >= 0; j--) 417 iounmap(regs[j]); 418 } else { 419 *dsisr = regs[0]; 420 *dar = regs[1]; 421 *tfc = regs[2]; 422 *pe_handle = regs[3]; 423 } 424 return rc; 425 } 426 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); 427 428 struct spa_data { 429 u64 phb_opal_id; 430 u32 bdfn; 431 }; 432 433 int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, 434 void **platform_data) 435 { 436 struct pci_controller *hose = pci_bus_to_host(dev->bus); 437 struct pnv_phb *phb = hose->private_data; 438 struct spa_data *data; 439 u32 bdfn; 440 int rc; 441 442 data = kzalloc(sizeof(*data), GFP_KERNEL); 443 if (!data) 444 return -ENOMEM; 445 446 bdfn = pci_dev_id(dev); 447 rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), 448 PE_mask); 449 if (rc) { 450 dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); 451 kfree(data); 452 return rc; 453 } 454 data->phb_opal_id = phb->opal_id; 455 data->bdfn = bdfn; 456 *platform_data = (void *) data; 457 return 0; 458 } 459 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); 460 461 void pnv_ocxl_spa_release(void *platform_data) 462 { 463 struct spa_data *data = (struct spa_data *) platform_data; 464 int rc; 465 466 rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); 467 WARN_ON(rc); 468 kfree(data); 469 } 470 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); 471 472 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) 473 { 474 struct spa_data *data = (struct spa_data *) platform_data; 475 476 return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); 477 } 478 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); 479 480 int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, 481 uint64_t lpcr, void __iomem **arva) 482 { 483 struct pci_controller *hose = pci_bus_to_host(dev->bus); 484 struct pnv_phb *phb = hose->private_data; 485 u64 mmio_atsd; 486 int rc; 487 488 /* ATSD physical address. 489 * ATSD LAUNCH register: write access initiates a shoot down to 490 * initiate the TLB Invalidate command. 491 */ 492 rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 493 0, &mmio_atsd); 494 if (rc) { 495 dev_info(&dev->dev, "No available ATSD found\n"); 496 return rc; 497 } 498 499 /* Assign a register set to a Logical Partition and MMIO ATSD 500 * LPARID register to the required value. 501 */ 502 rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), 503 lparid, lpcr); 504 if (rc) { 505 dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); 506 return rc; 507 } 508 509 *arva = ioremap(mmio_atsd, 24); 510 if (!(*arva)) { 511 dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); 512 rc = -ENOMEM; 513 } 514 515 return rc; 516 } 517 EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); 518 519 void pnv_ocxl_unmap_lpar(void __iomem *arva) 520 { 521 iounmap(arva); 522 } 523 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); 524 525 void pnv_ocxl_tlb_invalidate(void __iomem *arva, 526 unsigned long pid, 527 unsigned long addr, 528 unsigned long page_size) 529 { 530 unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); 531 u64 val = 0ull; 532 int pend; 533 u8 size; 534 535 if (!(arva)) 536 return; 537 538 if (addr) { 539 /* load Abbreviated Virtual Address register with 540 * the necessary value 541 */ 542 val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); 543 out_be64(arva + PNV_OCXL_ATSD_AVA, val); 544 } 545 546 /* Write access initiates a shoot down to initiate the 547 * TLB Invalidate command 548 */ 549 val = PNV_OCXL_ATSD_LNCH_R; 550 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); 551 if (addr) 552 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); 553 else { 554 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); 555 val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; 556 } 557 val |= PNV_OCXL_ATSD_LNCH_PRS; 558 /* Actual Page Size to be invalidated 559 * 000 4KB 560 * 101 64KB 561 * 001 2MB 562 * 010 1GB 563 */ 564 size = 0b101; 565 if (page_size == 0x1000) 566 size = 0b000; 567 if (page_size == 0x200000) 568 size = 0b001; 569 if (page_size == 0x40000000) 570 size = 0b010; 571 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); 572 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); 573 out_be64(arva + PNV_OCXL_ATSD_LNCH, val); 574 575 /* Poll the ATSD status register to determine when the 576 * TLB Invalidate has been completed. 577 */ 578 val = in_be64(arva + PNV_OCXL_ATSD_STAT); 579 pend = val >> 63; 580 581 while (pend) { 582 if (time_after_eq(jiffies, timeout)) { 583 pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", 584 __func__, val, pid); 585 return; 586 } 587 cpu_relax(); 588 val = in_be64(arva + PNV_OCXL_ATSD_STAT); 589 pend = val >> 63; 590 } 591 } 592 EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); 593