1 /* 2 * Copyright 2014 IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9 10 #include <linux/pci_regs.h> 11 #include <linux/pci_ids.h> 12 #include <linux/device.h> 13 #include <linux/module.h> 14 #include <linux/kernel.h> 15 #include <linux/slab.h> 16 #include <linux/sort.h> 17 #include <linux/pci.h> 18 #include <linux/of.h> 19 #include <linux/delay.h> 20 #include <asm/opal.h> 21 #include <asm/msi_bitmap.h> 22 #include <asm/pnv-pci.h> 23 #include <asm/io.h> 24 25 #include "cxl.h" 26 #include <misc/cxl.h> 27 28 29 #define CXL_PCI_VSEC_ID 0x1280 30 #define CXL_VSEC_MIN_SIZE 0x80 31 32 #define CXL_READ_VSEC_LENGTH(dev, vsec, dest) \ 33 { \ 34 pci_read_config_word(dev, vsec + 0x6, dest); \ 35 *dest >>= 4; \ 36 } 37 #define CXL_READ_VSEC_NAFUS(dev, vsec, dest) \ 38 pci_read_config_byte(dev, vsec + 0x8, dest) 39 40 #define CXL_READ_VSEC_STATUS(dev, vsec, dest) \ 41 pci_read_config_byte(dev, vsec + 0x9, dest) 42 #define CXL_STATUS_SECOND_PORT 0x80 43 #define CXL_STATUS_MSI_X_FULL 0x40 44 #define CXL_STATUS_MSI_X_SINGLE 0x20 45 #define CXL_STATUS_FLASH_RW 0x08 46 #define CXL_STATUS_FLASH_RO 0x04 47 #define CXL_STATUS_LOADABLE_AFU 0x02 48 #define CXL_STATUS_LOADABLE_PSL 0x01 49 /* If we see these features we won't try to use the card */ 50 #define CXL_UNSUPPORTED_FEATURES \ 51 (CXL_STATUS_MSI_X_FULL | CXL_STATUS_MSI_X_SINGLE) 52 53 #define CXL_READ_VSEC_MODE_CONTROL(dev, vsec, dest) \ 54 pci_read_config_byte(dev, vsec + 0xa, dest) 55 #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ 56 pci_write_config_byte(dev, vsec + 0xa, val) 57 #define CXL_VSEC_PROTOCOL_MASK 0xe0 58 #define CXL_VSEC_PROTOCOL_1024TB 0x80 59 #define CXL_VSEC_PROTOCOL_512TB 0x40 60 #define CXL_VSEC_PROTOCOL_256TB 0x20 /* Power 8 uses this */ 61 #define CXL_VSEC_PROTOCOL_ENABLE 0x01 62 63 #define CXL_READ_VSEC_PSL_REVISION(dev, vsec, dest) \ 64 pci_read_config_word(dev, vsec + 0xc, dest) 65 #define CXL_READ_VSEC_CAIA_MINOR(dev, vsec, dest) \ 66 pci_read_config_byte(dev, vsec + 0xe, dest) 67 #define CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, dest) \ 68 pci_read_config_byte(dev, vsec + 0xf, dest) 69 #define CXL_READ_VSEC_BASE_IMAGE(dev, vsec, dest) \ 70 pci_read_config_word(dev, vsec + 0x10, dest) 71 72 #define CXL_READ_VSEC_IMAGE_STATE(dev, vsec, dest) \ 73 pci_read_config_byte(dev, vsec + 0x13, dest) 74 #define CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, val) \ 75 pci_write_config_byte(dev, vsec + 0x13, val) 76 #define CXL_VSEC_USER_IMAGE_LOADED 0x80 /* RO */ 77 #define CXL_VSEC_PERST_LOADS_IMAGE 0x20 /* RW */ 78 #define CXL_VSEC_PERST_SELECT_USER 0x10 /* RW */ 79 80 #define CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, dest) \ 81 pci_read_config_dword(dev, vsec + 0x20, dest) 82 #define CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, dest) \ 83 pci_read_config_dword(dev, vsec + 0x24, dest) 84 #define CXL_READ_VSEC_PS_OFF(dev, vsec, dest) \ 85 pci_read_config_dword(dev, vsec + 0x28, dest) 86 #define CXL_READ_VSEC_PS_SIZE(dev, vsec, dest) \ 87 pci_read_config_dword(dev, vsec + 0x2c, dest) 88 89 90 /* This works a little different than the p1/p2 register accesses to make it 91 * easier to pull out individual fields */ 92 #define AFUD_READ(afu, off) in_be64(afu->native->afu_desc_mmio + off) 93 #define AFUD_READ_LE(afu, off) in_le64(afu->native->afu_desc_mmio + off) 94 #define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) 95 #define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) 96 97 #define AFUD_READ_INFO(afu) AFUD_READ(afu, 0x0) 98 #define AFUD_NUM_INTS_PER_PROC(val) EXTRACT_PPC_BITS(val, 0, 15) 99 #define AFUD_NUM_PROCS(val) EXTRACT_PPC_BITS(val, 16, 31) 100 #define AFUD_NUM_CRS(val) EXTRACT_PPC_BITS(val, 32, 47) 101 #define AFUD_MULTIMODE(val) EXTRACT_PPC_BIT(val, 48) 102 #define AFUD_PUSH_BLOCK_TRANSFER(val) EXTRACT_PPC_BIT(val, 55) 103 #define AFUD_DEDICATED_PROCESS(val) EXTRACT_PPC_BIT(val, 59) 104 #define AFUD_AFU_DIRECTED(val) EXTRACT_PPC_BIT(val, 61) 105 #define AFUD_TIME_SLICED(val) EXTRACT_PPC_BIT(val, 63) 106 #define AFUD_READ_CR(afu) AFUD_READ(afu, 0x20) 107 #define AFUD_CR_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) 108 #define AFUD_READ_CR_OFF(afu) AFUD_READ(afu, 0x28) 109 #define AFUD_READ_PPPSA(afu) AFUD_READ(afu, 0x30) 110 #define AFUD_PPPSA_PP(val) EXTRACT_PPC_BIT(val, 6) 111 #define AFUD_PPPSA_PSA(val) EXTRACT_PPC_BIT(val, 7) 112 #define AFUD_PPPSA_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) 113 #define AFUD_READ_PPPSA_OFF(afu) AFUD_READ(afu, 0x38) 114 #define AFUD_READ_EB(afu) AFUD_READ(afu, 0x40) 115 #define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) 116 #define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) 117 118 static const struct pci_device_id cxl_pci_tbl[] = { 119 { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, 120 { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, 121 { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), }, 122 { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), }, 123 { PCI_DEVICE_CLASS(0x120000, ~0), }, 124 125 { } 126 }; 127 MODULE_DEVICE_TABLE(pci, cxl_pci_tbl); 128 129 130 /* 131 * Mostly using these wrappers to avoid confusion: 132 * priv 1 is BAR2, while priv 2 is BAR0 133 */ 134 static inline resource_size_t p1_base(struct pci_dev *dev) 135 { 136 return pci_resource_start(dev, 2); 137 } 138 139 static inline resource_size_t p1_size(struct pci_dev *dev) 140 { 141 return pci_resource_len(dev, 2); 142 } 143 144 static inline resource_size_t p2_base(struct pci_dev *dev) 145 { 146 return pci_resource_start(dev, 0); 147 } 148 149 static inline resource_size_t p2_size(struct pci_dev *dev) 150 { 151 return pci_resource_len(dev, 0); 152 } 153 154 static int find_cxl_vsec(struct pci_dev *dev) 155 { 156 int vsec = 0; 157 u16 val; 158 159 while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { 160 pci_read_config_word(dev, vsec + 0x4, &val); 161 if (val == CXL_PCI_VSEC_ID) 162 return vsec; 163 } 164 return 0; 165 166 } 167 168 static void dump_cxl_config_space(struct pci_dev *dev) 169 { 170 int vsec; 171 u32 val; 172 173 dev_info(&dev->dev, "dump_cxl_config_space\n"); 174 175 pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, &val); 176 dev_info(&dev->dev, "BAR0: %#.8x\n", val); 177 pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, &val); 178 dev_info(&dev->dev, "BAR1: %#.8x\n", val); 179 pci_read_config_dword(dev, PCI_BASE_ADDRESS_2, &val); 180 dev_info(&dev->dev, "BAR2: %#.8x\n", val); 181 pci_read_config_dword(dev, PCI_BASE_ADDRESS_3, &val); 182 dev_info(&dev->dev, "BAR3: %#.8x\n", val); 183 pci_read_config_dword(dev, PCI_BASE_ADDRESS_4, &val); 184 dev_info(&dev->dev, "BAR4: %#.8x\n", val); 185 pci_read_config_dword(dev, PCI_BASE_ADDRESS_5, &val); 186 dev_info(&dev->dev, "BAR5: %#.8x\n", val); 187 188 dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", 189 p1_base(dev), p1_size(dev)); 190 dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", 191 p2_base(dev), p2_size(dev)); 192 dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", 193 pci_resource_start(dev, 4), pci_resource_len(dev, 4)); 194 195 if (!(vsec = find_cxl_vsec(dev))) 196 return; 197 198 #define show_reg(name, what) \ 199 dev_info(&dev->dev, "cxl vsec: %30s: %#x\n", name, what) 200 201 pci_read_config_dword(dev, vsec + 0x0, &val); 202 show_reg("Cap ID", (val >> 0) & 0xffff); 203 show_reg("Cap Ver", (val >> 16) & 0xf); 204 show_reg("Next Cap Ptr", (val >> 20) & 0xfff); 205 pci_read_config_dword(dev, vsec + 0x4, &val); 206 show_reg("VSEC ID", (val >> 0) & 0xffff); 207 show_reg("VSEC Rev", (val >> 16) & 0xf); 208 show_reg("VSEC Length", (val >> 20) & 0xfff); 209 pci_read_config_dword(dev, vsec + 0x8, &val); 210 show_reg("Num AFUs", (val >> 0) & 0xff); 211 show_reg("Status", (val >> 8) & 0xff); 212 show_reg("Mode Control", (val >> 16) & 0xff); 213 show_reg("Reserved", (val >> 24) & 0xff); 214 pci_read_config_dword(dev, vsec + 0xc, &val); 215 show_reg("PSL Rev", (val >> 0) & 0xffff); 216 show_reg("CAIA Ver", (val >> 16) & 0xffff); 217 pci_read_config_dword(dev, vsec + 0x10, &val); 218 show_reg("Base Image Rev", (val >> 0) & 0xffff); 219 show_reg("Reserved", (val >> 16) & 0x0fff); 220 show_reg("Image Control", (val >> 28) & 0x3); 221 show_reg("Reserved", (val >> 30) & 0x1); 222 show_reg("Image Loaded", (val >> 31) & 0x1); 223 224 pci_read_config_dword(dev, vsec + 0x14, &val); 225 show_reg("Reserved", val); 226 pci_read_config_dword(dev, vsec + 0x18, &val); 227 show_reg("Reserved", val); 228 pci_read_config_dword(dev, vsec + 0x1c, &val); 229 show_reg("Reserved", val); 230 231 pci_read_config_dword(dev, vsec + 0x20, &val); 232 show_reg("AFU Descriptor Offset", val); 233 pci_read_config_dword(dev, vsec + 0x24, &val); 234 show_reg("AFU Descriptor Size", val); 235 pci_read_config_dword(dev, vsec + 0x28, &val); 236 show_reg("Problem State Offset", val); 237 pci_read_config_dword(dev, vsec + 0x2c, &val); 238 show_reg("Problem State Size", val); 239 240 pci_read_config_dword(dev, vsec + 0x30, &val); 241 show_reg("Reserved", val); 242 pci_read_config_dword(dev, vsec + 0x34, &val); 243 show_reg("Reserved", val); 244 pci_read_config_dword(dev, vsec + 0x38, &val); 245 show_reg("Reserved", val); 246 pci_read_config_dword(dev, vsec + 0x3c, &val); 247 show_reg("Reserved", val); 248 249 pci_read_config_dword(dev, vsec + 0x40, &val); 250 show_reg("PSL Programming Port", val); 251 pci_read_config_dword(dev, vsec + 0x44, &val); 252 show_reg("PSL Programming Control", val); 253 254 pci_read_config_dword(dev, vsec + 0x48, &val); 255 show_reg("Reserved", val); 256 pci_read_config_dword(dev, vsec + 0x4c, &val); 257 show_reg("Reserved", val); 258 259 pci_read_config_dword(dev, vsec + 0x50, &val); 260 show_reg("Flash Address Register", val); 261 pci_read_config_dword(dev, vsec + 0x54, &val); 262 show_reg("Flash Size Register", val); 263 pci_read_config_dword(dev, vsec + 0x58, &val); 264 show_reg("Flash Status/Control Register", val); 265 pci_read_config_dword(dev, vsec + 0x58, &val); 266 show_reg("Flash Data Port", val); 267 268 #undef show_reg 269 } 270 271 static void dump_afu_descriptor(struct cxl_afu *afu) 272 { 273 u64 val, afu_cr_num, afu_cr_off, afu_cr_len; 274 int i; 275 276 #define show_reg(name, what) \ 277 dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) 278 279 val = AFUD_READ_INFO(afu); 280 show_reg("num_ints_per_process", AFUD_NUM_INTS_PER_PROC(val)); 281 show_reg("num_of_processes", AFUD_NUM_PROCS(val)); 282 show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); 283 show_reg("req_prog_mode", val & 0xffffULL); 284 afu_cr_num = AFUD_NUM_CRS(val); 285 286 val = AFUD_READ(afu, 0x8); 287 show_reg("Reserved", val); 288 val = AFUD_READ(afu, 0x10); 289 show_reg("Reserved", val); 290 val = AFUD_READ(afu, 0x18); 291 show_reg("Reserved", val); 292 293 val = AFUD_READ_CR(afu); 294 show_reg("Reserved", (val >> (63-7)) & 0xff); 295 show_reg("AFU_CR_len", AFUD_CR_LEN(val)); 296 afu_cr_len = AFUD_CR_LEN(val) * 256; 297 298 val = AFUD_READ_CR_OFF(afu); 299 afu_cr_off = val; 300 show_reg("AFU_CR_offset", val); 301 302 val = AFUD_READ_PPPSA(afu); 303 show_reg("PerProcessPSA_control", (val >> (63-7)) & 0xff); 304 show_reg("PerProcessPSA Length", AFUD_PPPSA_LEN(val)); 305 306 val = AFUD_READ_PPPSA_OFF(afu); 307 show_reg("PerProcessPSA_offset", val); 308 309 val = AFUD_READ_EB(afu); 310 show_reg("Reserved", (val >> (63-7)) & 0xff); 311 show_reg("AFU_EB_len", AFUD_EB_LEN(val)); 312 313 val = AFUD_READ_EB_OFF(afu); 314 show_reg("AFU_EB_offset", val); 315 316 for (i = 0; i < afu_cr_num; i++) { 317 val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len); 318 show_reg("CR Vendor", val & 0xffff); 319 show_reg("CR Device", (val >> 16) & 0xffff); 320 } 321 #undef show_reg 322 } 323 324 static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev) 325 { 326 struct device_node *np; 327 const __be32 *prop; 328 u64 psl_dsnctl; 329 u64 chipid; 330 331 if (!(np = pnv_pci_get_phb_node(dev))) 332 return -ENODEV; 333 334 while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL))) 335 np = of_get_next_parent(np); 336 if (!np) 337 return -ENODEV; 338 chipid = be32_to_cpup(prop); 339 of_node_put(np); 340 341 /* Tell PSL where to route data to */ 342 psl_dsnctl = 0x02E8900002000000ULL | (chipid << (63-5)); 343 cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl); 344 cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); 345 /* snoop write mask */ 346 cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); 347 /* set fir_accum */ 348 cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL); 349 /* for debugging with trace arrays */ 350 cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); 351 352 return 0; 353 } 354 355 #define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) 356 #define _2048_250MHZ_CYCLES 1 357 358 static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) 359 { 360 u64 psl_tb; 361 int delta; 362 unsigned int retry = 0; 363 struct device_node *np; 364 365 if (!(np = pnv_pci_get_phb_node(dev))) 366 return -ENODEV; 367 368 /* Do not fail when CAPP timebase sync is not supported by OPAL */ 369 of_node_get(np); 370 if (! of_get_property(np, "ibm,capp-timebase-sync", NULL)) { 371 of_node_put(np); 372 pr_err("PSL: Timebase sync: OPAL support missing\n"); 373 return 0; 374 } 375 of_node_put(np); 376 377 /* 378 * Setup PSL Timebase Control and Status register 379 * with the recommended Timebase Sync Count value 380 */ 381 cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, 382 TBSYNC_CNT(2 * _2048_250MHZ_CYCLES)); 383 384 /* Enable PSL Timebase */ 385 cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); 386 cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb); 387 388 /* Wait until CORE TB and PSL TB difference <= 16usecs */ 389 do { 390 msleep(1); 391 if (retry++ > 5) { 392 pr_err("PSL: Timebase sync: giving up!\n"); 393 return -EIO; 394 } 395 psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase); 396 delta = mftb() - psl_tb; 397 if (delta < 0) 398 delta = -delta; 399 } while (tb_to_ns(delta) > 16000); 400 401 return 0; 402 } 403 404 static int init_implementation_afu_regs(struct cxl_afu *afu) 405 { 406 /* read/write masks for this slice */ 407 cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); 408 /* APC read/write masks for this slice */ 409 cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL); 410 /* for debugging with trace arrays */ 411 cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL); 412 cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S); 413 414 return 0; 415 } 416 417 int cxl_pci_setup_irq(struct cxl *adapter, unsigned int hwirq, 418 unsigned int virq) 419 { 420 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 421 422 return pnv_cxl_ioda_msi_setup(dev, hwirq, virq); 423 } 424 425 int cxl_update_image_control(struct cxl *adapter) 426 { 427 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 428 int rc; 429 int vsec; 430 u8 image_state; 431 432 if (!(vsec = find_cxl_vsec(dev))) { 433 dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); 434 return -ENODEV; 435 } 436 437 if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) { 438 dev_err(&dev->dev, "failed to read image state: %i\n", rc); 439 return rc; 440 } 441 442 if (adapter->perst_loads_image) 443 image_state |= CXL_VSEC_PERST_LOADS_IMAGE; 444 else 445 image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE; 446 447 if (adapter->perst_select_user) 448 image_state |= CXL_VSEC_PERST_SELECT_USER; 449 else 450 image_state &= ~CXL_VSEC_PERST_SELECT_USER; 451 452 if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) { 453 dev_err(&dev->dev, "failed to update image control: %i\n", rc); 454 return rc; 455 } 456 457 return 0; 458 } 459 460 int cxl_pci_alloc_one_irq(struct cxl *adapter) 461 { 462 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 463 464 return pnv_cxl_alloc_hwirqs(dev, 1); 465 } 466 467 void cxl_pci_release_one_irq(struct cxl *adapter, int hwirq) 468 { 469 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 470 471 return pnv_cxl_release_hwirqs(dev, hwirq, 1); 472 } 473 474 int cxl_pci_alloc_irq_ranges(struct cxl_irq_ranges *irqs, 475 struct cxl *adapter, unsigned int num) 476 { 477 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 478 479 return pnv_cxl_alloc_hwirq_ranges(irqs, dev, num); 480 } 481 482 void cxl_pci_release_irq_ranges(struct cxl_irq_ranges *irqs, 483 struct cxl *adapter) 484 { 485 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 486 487 pnv_cxl_release_hwirq_ranges(irqs, dev); 488 } 489 490 static int setup_cxl_bars(struct pci_dev *dev) 491 { 492 /* Safety check in case we get backported to < 3.17 without M64 */ 493 if ((p1_base(dev) < 0x100000000ULL) || 494 (p2_base(dev) < 0x100000000ULL)) { 495 dev_err(&dev->dev, "ABORTING: M32 BAR assignment incompatible with CXL\n"); 496 return -ENODEV; 497 } 498 499 /* 500 * BAR 4/5 has a special meaning for CXL and must be programmed with a 501 * special value corresponding to the CXL protocol address range. 502 * For POWER 8 that means bits 48:49 must be set to 10 503 */ 504 pci_write_config_dword(dev, PCI_BASE_ADDRESS_4, 0x00000000); 505 pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, 0x00020000); 506 507 return 0; 508 } 509 510 /* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ 511 static int switch_card_to_cxl(struct pci_dev *dev) 512 { 513 int vsec; 514 u8 val; 515 int rc; 516 517 dev_info(&dev->dev, "switch card to CXL\n"); 518 519 if (!(vsec = find_cxl_vsec(dev))) { 520 dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); 521 return -ENODEV; 522 } 523 524 if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { 525 dev_err(&dev->dev, "failed to read current mode control: %i", rc); 526 return rc; 527 } 528 val &= ~CXL_VSEC_PROTOCOL_MASK; 529 val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; 530 if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { 531 dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); 532 return rc; 533 } 534 /* 535 * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states 536 * we must wait 100ms after this mode switch before touching 537 * PCIe config space. 538 */ 539 msleep(100); 540 541 return 0; 542 } 543 544 static int pci_map_slice_regs(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) 545 { 546 u64 p1n_base, p2n_base, afu_desc; 547 const u64 p1n_size = 0x100; 548 const u64 p2n_size = 0x1000; 549 550 p1n_base = p1_base(dev) + 0x10000 + (afu->slice * p1n_size); 551 p2n_base = p2_base(dev) + (afu->slice * p2n_size); 552 afu->psn_phys = p2_base(dev) + (adapter->native->ps_off + (afu->slice * adapter->ps_size)); 553 afu_desc = p2_base(dev) + adapter->native->afu_desc_off + (afu->slice * adapter->native->afu_desc_size); 554 555 if (!(afu->native->p1n_mmio = ioremap(p1n_base, p1n_size))) 556 goto err; 557 if (!(afu->p2n_mmio = ioremap(p2n_base, p2n_size))) 558 goto err1; 559 if (afu_desc) { 560 if (!(afu->native->afu_desc_mmio = ioremap(afu_desc, adapter->native->afu_desc_size))) 561 goto err2; 562 } 563 564 return 0; 565 err2: 566 iounmap(afu->p2n_mmio); 567 err1: 568 iounmap(afu->native->p1n_mmio); 569 err: 570 dev_err(&afu->dev, "Error mapping AFU MMIO regions\n"); 571 return -ENOMEM; 572 } 573 574 static void pci_unmap_slice_regs(struct cxl_afu *afu) 575 { 576 if (afu->p2n_mmio) { 577 iounmap(afu->p2n_mmio); 578 afu->p2n_mmio = NULL; 579 } 580 if (afu->native->p1n_mmio) { 581 iounmap(afu->native->p1n_mmio); 582 afu->native->p1n_mmio = NULL; 583 } 584 if (afu->native->afu_desc_mmio) { 585 iounmap(afu->native->afu_desc_mmio); 586 afu->native->afu_desc_mmio = NULL; 587 } 588 } 589 590 void cxl_pci_release_afu(struct device *dev) 591 { 592 struct cxl_afu *afu = to_cxl_afu(dev); 593 594 pr_devel("%s\n", __func__); 595 596 idr_destroy(&afu->contexts_idr); 597 cxl_release_spa(afu); 598 599 kfree(afu->native); 600 kfree(afu); 601 } 602 603 /* Expects AFU struct to have recently been zeroed out */ 604 static int cxl_read_afu_descriptor(struct cxl_afu *afu) 605 { 606 u64 val; 607 608 val = AFUD_READ_INFO(afu); 609 afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val); 610 afu->max_procs_virtualised = AFUD_NUM_PROCS(val); 611 afu->crs_num = AFUD_NUM_CRS(val); 612 613 if (AFUD_AFU_DIRECTED(val)) 614 afu->modes_supported |= CXL_MODE_DIRECTED; 615 if (AFUD_DEDICATED_PROCESS(val)) 616 afu->modes_supported |= CXL_MODE_DEDICATED; 617 if (AFUD_TIME_SLICED(val)) 618 afu->modes_supported |= CXL_MODE_TIME_SLICED; 619 620 val = AFUD_READ_PPPSA(afu); 621 afu->pp_size = AFUD_PPPSA_LEN(val) * 4096; 622 afu->psa = AFUD_PPPSA_PSA(val); 623 if ((afu->pp_psa = AFUD_PPPSA_PP(val))) 624 afu->native->pp_offset = AFUD_READ_PPPSA_OFF(afu); 625 626 val = AFUD_READ_CR(afu); 627 afu->crs_len = AFUD_CR_LEN(val) * 256; 628 afu->crs_offset = AFUD_READ_CR_OFF(afu); 629 630 631 /* eb_len is in multiple of 4K */ 632 afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096; 633 afu->eb_offset = AFUD_READ_EB_OFF(afu); 634 635 /* eb_off is 4K aligned so lower 12 bits are always zero */ 636 if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) { 637 dev_warn(&afu->dev, 638 "Invalid AFU error buffer offset %Lx\n", 639 afu->eb_offset); 640 dev_info(&afu->dev, 641 "Ignoring AFU error buffer in the descriptor\n"); 642 /* indicate that no afu buffer exists */ 643 afu->eb_len = 0; 644 } 645 646 return 0; 647 } 648 649 static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) 650 { 651 int i, rc; 652 u32 val; 653 654 if (afu->psa && afu->adapter->ps_size < 655 (afu->native->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { 656 dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); 657 return -ENODEV; 658 } 659 660 if (afu->pp_psa && (afu->pp_size < PAGE_SIZE)) 661 dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!"); 662 663 for (i = 0; i < afu->crs_num; i++) { 664 rc = cxl_ops->afu_cr_read32(afu, i, 0, &val); 665 if (rc || val == 0) { 666 dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); 667 return -EINVAL; 668 } 669 } 670 671 return 0; 672 } 673 674 static int sanitise_afu_regs(struct cxl_afu *afu) 675 { 676 u64 reg; 677 678 /* 679 * Clear out any regs that contain either an IVTE or address or may be 680 * waiting on an acknowledgement to try to be a bit safer as we bring 681 * it online 682 */ 683 reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); 684 if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { 685 dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#016llx\n", reg); 686 if (cxl_ops->afu_reset(afu)) 687 return -EIO; 688 if (cxl_afu_disable(afu)) 689 return -EIO; 690 if (cxl_psl_purge(afu)) 691 return -EIO; 692 } 693 cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0x0000000000000000); 694 cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, 0x0000000000000000); 695 cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, 0x0000000000000000); 696 cxl_p1n_write(afu, CXL_PSL_AMBAR_An, 0x0000000000000000); 697 cxl_p1n_write(afu, CXL_PSL_SPOffset_An, 0x0000000000000000); 698 cxl_p1n_write(afu, CXL_HAURP_An, 0x0000000000000000); 699 cxl_p2n_write(afu, CXL_CSRP_An, 0x0000000000000000); 700 cxl_p2n_write(afu, CXL_AURP1_An, 0x0000000000000000); 701 cxl_p2n_write(afu, CXL_AURP0_An, 0x0000000000000000); 702 cxl_p2n_write(afu, CXL_SSTP1_An, 0x0000000000000000); 703 cxl_p2n_write(afu, CXL_SSTP0_An, 0x0000000000000000); 704 reg = cxl_p2n_read(afu, CXL_PSL_DSISR_An); 705 if (reg) { 706 dev_warn(&afu->dev, "AFU had pending DSISR: %#016llx\n", reg); 707 if (reg & CXL_PSL_DSISR_TRANS) 708 cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); 709 else 710 cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); 711 } 712 reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); 713 if (reg) { 714 if (reg & ~0xffff) 715 dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); 716 cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); 717 } 718 reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); 719 if (reg) { 720 dev_warn(&afu->dev, "AFU had pending error status: %#016llx\n", reg); 721 cxl_p2n_write(afu, CXL_PSL_ErrStat_An, reg); 722 } 723 724 return 0; 725 } 726 727 #define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE 728 /* 729 * afu_eb_read: 730 * Called from sysfs and reads the afu error info buffer. The h/w only supports 731 * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte 732 * aligned the function uses a bounce buffer which can be max PAGE_SIZE. 733 */ 734 ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, 735 loff_t off, size_t count) 736 { 737 loff_t aligned_start, aligned_end; 738 size_t aligned_length; 739 void *tbuf; 740 const void __iomem *ebuf = afu->native->afu_desc_mmio + afu->eb_offset; 741 742 if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) 743 return 0; 744 745 /* calculate aligned read window */ 746 count = min((size_t)(afu->eb_len - off), count); 747 aligned_start = round_down(off, 8); 748 aligned_end = round_up(off + count, 8); 749 aligned_length = aligned_end - aligned_start; 750 751 /* max we can copy in one read is PAGE_SIZE */ 752 if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) { 753 aligned_length = ERR_BUFF_MAX_COPY_SIZE; 754 count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); 755 } 756 757 /* use bounce buffer for copy */ 758 tbuf = (void *)__get_free_page(GFP_TEMPORARY); 759 if (!tbuf) 760 return -ENOMEM; 761 762 /* perform aligned read from the mmio region */ 763 memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length); 764 memcpy(buf, tbuf + (off & 0x7), count); 765 766 free_page((unsigned long)tbuf); 767 768 return count; 769 } 770 771 static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pci_dev *dev) 772 { 773 int rc; 774 775 if ((rc = pci_map_slice_regs(afu, adapter, dev))) 776 return rc; 777 778 if ((rc = sanitise_afu_regs(afu))) 779 goto err1; 780 781 /* We need to reset the AFU before we can read the AFU descriptor */ 782 if ((rc = cxl_ops->afu_reset(afu))) 783 goto err1; 784 785 if (cxl_verbose) 786 dump_afu_descriptor(afu); 787 788 if ((rc = cxl_read_afu_descriptor(afu))) 789 goto err1; 790 791 if ((rc = cxl_afu_descriptor_looks_ok(afu))) 792 goto err1; 793 794 if ((rc = init_implementation_afu_regs(afu))) 795 goto err1; 796 797 if ((rc = cxl_native_register_serr_irq(afu))) 798 goto err1; 799 800 if ((rc = cxl_native_register_psl_irq(afu))) 801 goto err2; 802 803 return 0; 804 805 err2: 806 cxl_native_release_serr_irq(afu); 807 err1: 808 pci_unmap_slice_regs(afu); 809 return rc; 810 } 811 812 static void pci_deconfigure_afu(struct cxl_afu *afu) 813 { 814 cxl_native_release_psl_irq(afu); 815 cxl_native_release_serr_irq(afu); 816 pci_unmap_slice_regs(afu); 817 } 818 819 static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) 820 { 821 struct cxl_afu *afu; 822 int rc = -ENOMEM; 823 824 afu = cxl_alloc_afu(adapter, slice); 825 if (!afu) 826 return -ENOMEM; 827 828 afu->native = kzalloc(sizeof(struct cxl_afu_native), GFP_KERNEL); 829 if (!afu->native) 830 goto err_free_afu; 831 832 mutex_init(&afu->native->spa_mutex); 833 834 rc = dev_set_name(&afu->dev, "afu%i.%i", adapter->adapter_num, slice); 835 if (rc) 836 goto err_free_native; 837 838 rc = pci_configure_afu(afu, adapter, dev); 839 if (rc) 840 goto err_free_native; 841 842 /* Don't care if this fails */ 843 cxl_debugfs_afu_add(afu); 844 845 /* 846 * After we call this function we must not free the afu directly, even 847 * if it returns an error! 848 */ 849 if ((rc = cxl_register_afu(afu))) 850 goto err_put1; 851 852 if ((rc = cxl_sysfs_afu_add(afu))) 853 goto err_put1; 854 855 adapter->afu[afu->slice] = afu; 856 857 if ((rc = cxl_pci_vphb_add(afu))) 858 dev_info(&afu->dev, "Can't register vPHB\n"); 859 860 return 0; 861 862 err_put1: 863 pci_deconfigure_afu(afu); 864 cxl_debugfs_afu_remove(afu); 865 device_unregister(&afu->dev); 866 return rc; 867 868 err_free_native: 869 kfree(afu->native); 870 err_free_afu: 871 kfree(afu); 872 return rc; 873 874 } 875 876 static void cxl_pci_remove_afu(struct cxl_afu *afu) 877 { 878 pr_devel("%s\n", __func__); 879 880 if (!afu) 881 return; 882 883 cxl_pci_vphb_remove(afu); 884 cxl_sysfs_afu_remove(afu); 885 cxl_debugfs_afu_remove(afu); 886 887 spin_lock(&afu->adapter->afu_list_lock); 888 afu->adapter->afu[afu->slice] = NULL; 889 spin_unlock(&afu->adapter->afu_list_lock); 890 891 cxl_context_detach_all(afu); 892 cxl_ops->afu_deactivate_mode(afu, afu->current_mode); 893 894 pci_deconfigure_afu(afu); 895 device_unregister(&afu->dev); 896 } 897 898 int cxl_pci_reset(struct cxl *adapter) 899 { 900 struct pci_dev *dev = to_pci_dev(adapter->dev.parent); 901 int rc; 902 903 if (adapter->perst_same_image) { 904 dev_warn(&dev->dev, 905 "cxl: refusing to reset/reflash when perst_reloads_same_image is set.\n"); 906 return -EINVAL; 907 } 908 909 dev_info(&dev->dev, "CXL reset\n"); 910 911 /* pcie_warm_reset requests a fundamental pci reset which includes a 912 * PERST assert/deassert. PERST triggers a loading of the image 913 * if "user" or "factory" is selected in sysfs */ 914 if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) { 915 dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n"); 916 return rc; 917 } 918 919 return rc; 920 } 921 922 static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) 923 { 924 if (pci_request_region(dev, 2, "priv 2 regs")) 925 goto err1; 926 if (pci_request_region(dev, 0, "priv 1 regs")) 927 goto err2; 928 929 pr_devel("cxl_map_adapter_regs: p1: %#016llx %#llx, p2: %#016llx %#llx", 930 p1_base(dev), p1_size(dev), p2_base(dev), p2_size(dev)); 931 932 if (!(adapter->native->p1_mmio = ioremap(p1_base(dev), p1_size(dev)))) 933 goto err3; 934 935 if (!(adapter->native->p2_mmio = ioremap(p2_base(dev), p2_size(dev)))) 936 goto err4; 937 938 return 0; 939 940 err4: 941 iounmap(adapter->native->p1_mmio); 942 adapter->native->p1_mmio = NULL; 943 err3: 944 pci_release_region(dev, 0); 945 err2: 946 pci_release_region(dev, 2); 947 err1: 948 return -ENOMEM; 949 } 950 951 static void cxl_unmap_adapter_regs(struct cxl *adapter) 952 { 953 if (adapter->native->p1_mmio) { 954 iounmap(adapter->native->p1_mmio); 955 adapter->native->p1_mmio = NULL; 956 pci_release_region(to_pci_dev(adapter->dev.parent), 2); 957 } 958 if (adapter->native->p2_mmio) { 959 iounmap(adapter->native->p2_mmio); 960 adapter->native->p2_mmio = NULL; 961 pci_release_region(to_pci_dev(adapter->dev.parent), 0); 962 } 963 } 964 965 static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) 966 { 967 int vsec; 968 u32 afu_desc_off, afu_desc_size; 969 u32 ps_off, ps_size; 970 u16 vseclen; 971 u8 image_state; 972 973 if (!(vsec = find_cxl_vsec(dev))) { 974 dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); 975 return -ENODEV; 976 } 977 978 CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); 979 if (vseclen < CXL_VSEC_MIN_SIZE) { 980 dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n"); 981 return -EINVAL; 982 } 983 984 CXL_READ_VSEC_STATUS(dev, vsec, &adapter->vsec_status); 985 CXL_READ_VSEC_PSL_REVISION(dev, vsec, &adapter->psl_rev); 986 CXL_READ_VSEC_CAIA_MAJOR(dev, vsec, &adapter->caia_major); 987 CXL_READ_VSEC_CAIA_MINOR(dev, vsec, &adapter->caia_minor); 988 CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); 989 CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); 990 adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); 991 adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); 992 993 CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); 994 CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off); 995 CXL_READ_VSEC_AFU_DESC_SIZE(dev, vsec, &afu_desc_size); 996 CXL_READ_VSEC_PS_OFF(dev, vsec, &ps_off); 997 CXL_READ_VSEC_PS_SIZE(dev, vsec, &ps_size); 998 999 /* Convert everything to bytes, because there is NO WAY I'd look at the 1000 * code a month later and forget what units these are in ;-) */ 1001 adapter->native->ps_off = ps_off * 64 * 1024; 1002 adapter->ps_size = ps_size * 64 * 1024; 1003 adapter->native->afu_desc_off = afu_desc_off * 64 * 1024; 1004 adapter->native->afu_desc_size = afu_desc_size * 64 * 1024; 1005 1006 /* Total IRQs - 1 PSL ERROR - #AFU*(1 slice error + 1 DSI) */ 1007 adapter->user_irqs = pnv_cxl_get_irq_count(dev) - 1 - 2*adapter->slices; 1008 1009 return 0; 1010 } 1011 1012 /* 1013 * Workaround a PCIe Host Bridge defect on some cards, that can cause 1014 * malformed Transaction Layer Packet (TLP) errors to be erroneously 1015 * reported. Mask this error in the Uncorrectable Error Mask Register. 1016 * 1017 * The upper nibble of the PSL revision is used to distinguish between 1018 * different cards. The affected ones have it set to 0. 1019 */ 1020 static void cxl_fixup_malformed_tlp(struct cxl *adapter, struct pci_dev *dev) 1021 { 1022 int aer; 1023 u32 data; 1024 1025 if (adapter->psl_rev & 0xf000) 1026 return; 1027 if (!(aer = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR))) 1028 return; 1029 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &data); 1030 if (data & PCI_ERR_UNC_MALF_TLP) 1031 if (data & PCI_ERR_UNC_INTN) 1032 return; 1033 data |= PCI_ERR_UNC_MALF_TLP; 1034 data |= PCI_ERR_UNC_INTN; 1035 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, data); 1036 } 1037 1038 static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) 1039 { 1040 if (adapter->vsec_status & CXL_STATUS_SECOND_PORT) 1041 return -EBUSY; 1042 1043 if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { 1044 dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n"); 1045 return -EINVAL; 1046 } 1047 1048 if (!adapter->slices) { 1049 /* Once we support dynamic reprogramming we can use the card if 1050 * it supports loadable AFUs */ 1051 dev_err(&dev->dev, "ABORTING: Device has no AFUs\n"); 1052 return -EINVAL; 1053 } 1054 1055 if (!adapter->native->afu_desc_off || !adapter->native->afu_desc_size) { 1056 dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); 1057 return -EINVAL; 1058 } 1059 1060 if (adapter->ps_size > p2_size(dev) - adapter->native->ps_off) { 1061 dev_err(&dev->dev, "ABORTING: Problem state size larger than " 1062 "available in BAR2: 0x%llx > 0x%llx\n", 1063 adapter->ps_size, p2_size(dev) - adapter->native->ps_off); 1064 return -EINVAL; 1065 } 1066 1067 return 0; 1068 } 1069 1070 ssize_t cxl_pci_read_adapter_vpd(struct cxl *adapter, void *buf, size_t len) 1071 { 1072 return pci_read_vpd(to_pci_dev(adapter->dev.parent), 0, len, buf); 1073 } 1074 1075 static void cxl_release_adapter(struct device *dev) 1076 { 1077 struct cxl *adapter = to_cxl_adapter(dev); 1078 1079 pr_devel("cxl_release_adapter\n"); 1080 1081 cxl_remove_adapter_nr(adapter); 1082 1083 kfree(adapter->native); 1084 kfree(adapter); 1085 } 1086 1087 #define CXL_PSL_ErrIVTE_tberror (0x1ull << (63-31)) 1088 1089 static int sanitise_adapter_regs(struct cxl *adapter) 1090 { 1091 /* Clear PSL tberror bit by writing 1 to it */ 1092 cxl_p1_write(adapter, CXL_PSL_ErrIVTE, CXL_PSL_ErrIVTE_tberror); 1093 return cxl_tlb_slb_invalidate(adapter); 1094 } 1095 1096 /* This should contain *only* operations that can safely be done in 1097 * both creation and recovery. 1098 */ 1099 static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) 1100 { 1101 int rc; 1102 1103 adapter->dev.parent = &dev->dev; 1104 adapter->dev.release = cxl_release_adapter; 1105 pci_set_drvdata(dev, adapter); 1106 1107 rc = pci_enable_device(dev); 1108 if (rc) { 1109 dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); 1110 return rc; 1111 } 1112 1113 if ((rc = cxl_read_vsec(adapter, dev))) 1114 return rc; 1115 1116 if ((rc = cxl_vsec_looks_ok(adapter, dev))) 1117 return rc; 1118 1119 cxl_fixup_malformed_tlp(adapter, dev); 1120 1121 if ((rc = setup_cxl_bars(dev))) 1122 return rc; 1123 1124 if ((rc = switch_card_to_cxl(dev))) 1125 return rc; 1126 1127 if ((rc = cxl_update_image_control(adapter))) 1128 return rc; 1129 1130 if ((rc = cxl_map_adapter_regs(adapter, dev))) 1131 return rc; 1132 1133 if ((rc = sanitise_adapter_regs(adapter))) 1134 goto err; 1135 1136 if ((rc = init_implementation_adapter_regs(adapter, dev))) 1137 goto err; 1138 1139 if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) 1140 goto err; 1141 1142 /* If recovery happened, the last step is to turn on snooping. 1143 * In the non-recovery case this has no effect */ 1144 if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) 1145 goto err; 1146 1147 if ((rc = cxl_setup_psl_timebase(adapter, dev))) 1148 goto err; 1149 1150 if ((rc = cxl_native_register_psl_err_irq(adapter))) 1151 goto err; 1152 1153 return 0; 1154 1155 err: 1156 cxl_unmap_adapter_regs(adapter); 1157 return rc; 1158 1159 } 1160 1161 static void cxl_deconfigure_adapter(struct cxl *adapter) 1162 { 1163 struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); 1164 1165 cxl_native_release_psl_err_irq(adapter); 1166 cxl_unmap_adapter_regs(adapter); 1167 1168 pci_disable_device(pdev); 1169 } 1170 1171 static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) 1172 { 1173 struct cxl *adapter; 1174 int rc; 1175 1176 adapter = cxl_alloc_adapter(); 1177 if (!adapter) 1178 return ERR_PTR(-ENOMEM); 1179 1180 adapter->native = kzalloc(sizeof(struct cxl_native), GFP_KERNEL); 1181 if (!adapter->native) { 1182 rc = -ENOMEM; 1183 goto err_release; 1184 } 1185 1186 /* Set defaults for parameters which need to persist over 1187 * configure/reconfigure 1188 */ 1189 adapter->perst_loads_image = true; 1190 adapter->perst_same_image = false; 1191 1192 rc = cxl_configure_adapter(adapter, dev); 1193 if (rc) { 1194 pci_disable_device(dev); 1195 goto err_release; 1196 } 1197 1198 /* Don't care if this one fails: */ 1199 cxl_debugfs_adapter_add(adapter); 1200 1201 /* 1202 * After we call this function we must not free the adapter directly, 1203 * even if it returns an error! 1204 */ 1205 if ((rc = cxl_register_adapter(adapter))) 1206 goto err_put1; 1207 1208 if ((rc = cxl_sysfs_adapter_add(adapter))) 1209 goto err_put1; 1210 1211 return adapter; 1212 1213 err_put1: 1214 /* This should mirror cxl_remove_adapter, except without the 1215 * sysfs parts 1216 */ 1217 cxl_debugfs_adapter_remove(adapter); 1218 cxl_deconfigure_adapter(adapter); 1219 device_unregister(&adapter->dev); 1220 return ERR_PTR(rc); 1221 1222 err_release: 1223 cxl_release_adapter(&adapter->dev); 1224 return ERR_PTR(rc); 1225 } 1226 1227 static void cxl_pci_remove_adapter(struct cxl *adapter) 1228 { 1229 pr_devel("cxl_remove_adapter\n"); 1230 1231 cxl_sysfs_adapter_remove(adapter); 1232 cxl_debugfs_adapter_remove(adapter); 1233 1234 cxl_deconfigure_adapter(adapter); 1235 1236 device_unregister(&adapter->dev); 1237 } 1238 1239 static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) 1240 { 1241 struct cxl *adapter; 1242 int slice; 1243 int rc; 1244 1245 if (cxl_pci_is_vphb_device(dev)) { 1246 dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n"); 1247 return -ENODEV; 1248 } 1249 1250 if (cxl_verbose) 1251 dump_cxl_config_space(dev); 1252 1253 adapter = cxl_pci_init_adapter(dev); 1254 if (IS_ERR(adapter)) { 1255 dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); 1256 return PTR_ERR(adapter); 1257 } 1258 1259 for (slice = 0; slice < adapter->slices; slice++) { 1260 if ((rc = pci_init_afu(adapter, slice, dev))) { 1261 dev_err(&dev->dev, "AFU %i failed to initialise: %i\n", slice, rc); 1262 continue; 1263 } 1264 1265 rc = cxl_afu_select_best_mode(adapter->afu[slice]); 1266 if (rc) 1267 dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); 1268 } 1269 1270 return 0; 1271 } 1272 1273 static void cxl_remove(struct pci_dev *dev) 1274 { 1275 struct cxl *adapter = pci_get_drvdata(dev); 1276 struct cxl_afu *afu; 1277 int i; 1278 1279 /* 1280 * Lock to prevent someone grabbing a ref through the adapter list as 1281 * we are removing it 1282 */ 1283 for (i = 0; i < adapter->slices; i++) { 1284 afu = adapter->afu[i]; 1285 cxl_pci_remove_afu(afu); 1286 } 1287 cxl_pci_remove_adapter(adapter); 1288 } 1289 1290 static pci_ers_result_t cxl_vphb_error_detected(struct cxl_afu *afu, 1291 pci_channel_state_t state) 1292 { 1293 struct pci_dev *afu_dev; 1294 pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; 1295 pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET; 1296 1297 /* There should only be one entry, but go through the list 1298 * anyway 1299 */ 1300 list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { 1301 if (!afu_dev->driver) 1302 continue; 1303 1304 afu_dev->error_state = state; 1305 1306 if (afu_dev->driver->err_handler) 1307 afu_result = afu_dev->driver->err_handler->error_detected(afu_dev, 1308 state); 1309 /* Disconnect trumps all, NONE trumps NEED_RESET */ 1310 if (afu_result == PCI_ERS_RESULT_DISCONNECT) 1311 result = PCI_ERS_RESULT_DISCONNECT; 1312 else if ((afu_result == PCI_ERS_RESULT_NONE) && 1313 (result == PCI_ERS_RESULT_NEED_RESET)) 1314 result = PCI_ERS_RESULT_NONE; 1315 } 1316 return result; 1317 } 1318 1319 static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, 1320 pci_channel_state_t state) 1321 { 1322 struct cxl *adapter = pci_get_drvdata(pdev); 1323 struct cxl_afu *afu; 1324 pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET; 1325 int i; 1326 1327 /* At this point, we could still have an interrupt pending. 1328 * Let's try to get them out of the way before they do 1329 * anything we don't like. 1330 */ 1331 schedule(); 1332 1333 /* If we're permanently dead, give up. */ 1334 if (state == pci_channel_io_perm_failure) { 1335 /* Tell the AFU drivers; but we don't care what they 1336 * say, we're going away. 1337 */ 1338 for (i = 0; i < adapter->slices; i++) { 1339 afu = adapter->afu[i]; 1340 cxl_vphb_error_detected(afu, state); 1341 } 1342 return PCI_ERS_RESULT_DISCONNECT; 1343 } 1344 1345 /* Are we reflashing? 1346 * 1347 * If we reflash, we could come back as something entirely 1348 * different, including a non-CAPI card. As such, by default 1349 * we don't participate in the process. We'll be unbound and 1350 * the slot re-probed. (TODO: check EEH doesn't blindly rebind 1351 * us!) 1352 * 1353 * However, this isn't the entire story: for reliablity 1354 * reasons, we usually want to reflash the FPGA on PERST in 1355 * order to get back to a more reliable known-good state. 1356 * 1357 * This causes us a bit of a problem: if we reflash we can't 1358 * trust that we'll come back the same - we could have a new 1359 * image and been PERSTed in order to load that 1360 * image. However, most of the time we actually *will* come 1361 * back the same - for example a regular EEH event. 1362 * 1363 * Therefore, we allow the user to assert that the image is 1364 * indeed the same and that we should continue on into EEH 1365 * anyway. 1366 */ 1367 if (adapter->perst_loads_image && !adapter->perst_same_image) { 1368 /* TODO take the PHB out of CXL mode */ 1369 dev_info(&pdev->dev, "reflashing, so opting out of EEH!\n"); 1370 return PCI_ERS_RESULT_NONE; 1371 } 1372 1373 /* 1374 * At this point, we want to try to recover. We'll always 1375 * need a complete slot reset: we don't trust any other reset. 1376 * 1377 * Now, we go through each AFU: 1378 * - We send the driver, if bound, an error_detected callback. 1379 * We expect it to clean up, but it can also tell us to give 1380 * up and permanently detach the card. To simplify things, if 1381 * any bound AFU driver doesn't support EEH, we give up on EEH. 1382 * 1383 * - We detach all contexts associated with the AFU. This 1384 * does not free them, but puts them into a CLOSED state 1385 * which causes any the associated files to return useful 1386 * errors to userland. It also unmaps, but does not free, 1387 * any IRQs. 1388 * 1389 * - We clean up our side: releasing and unmapping resources we hold 1390 * so we can wire them up again when the hardware comes back up. 1391 * 1392 * Driver authors should note: 1393 * 1394 * - Any contexts you create in your kernel driver (except 1395 * those associated with anonymous file descriptors) are 1396 * your responsibility to free and recreate. Likewise with 1397 * any attached resources. 1398 * 1399 * - We will take responsibility for re-initialising the 1400 * device context (the one set up for you in 1401 * cxl_pci_enable_device_hook and accessed through 1402 * cxl_get_context). If you've attached IRQs or other 1403 * resources to it, they remains yours to free. 1404 * 1405 * You can call the same functions to release resources as you 1406 * normally would: we make sure that these functions continue 1407 * to work when the hardware is down. 1408 * 1409 * Two examples: 1410 * 1411 * 1) If you normally free all your resources at the end of 1412 * each request, or if you use anonymous FDs, your 1413 * error_detected callback can simply set a flag to tell 1414 * your driver not to start any new calls. You can then 1415 * clear the flag in the resume callback. 1416 * 1417 * 2) If you normally allocate your resources on startup: 1418 * * Set a flag in error_detected as above. 1419 * * Let CXL detach your contexts. 1420 * * In slot_reset, free the old resources and allocate new ones. 1421 * * In resume, clear the flag to allow things to start. 1422 */ 1423 for (i = 0; i < adapter->slices; i++) { 1424 afu = adapter->afu[i]; 1425 1426 result = cxl_vphb_error_detected(afu, state); 1427 1428 /* Only continue if everyone agrees on NEED_RESET */ 1429 if (result != PCI_ERS_RESULT_NEED_RESET) 1430 return result; 1431 1432 cxl_context_detach_all(afu); 1433 cxl_ops->afu_deactivate_mode(afu, afu->current_mode); 1434 pci_deconfigure_afu(afu); 1435 } 1436 cxl_deconfigure_adapter(adapter); 1437 1438 return result; 1439 } 1440 1441 static pci_ers_result_t cxl_pci_slot_reset(struct pci_dev *pdev) 1442 { 1443 struct cxl *adapter = pci_get_drvdata(pdev); 1444 struct cxl_afu *afu; 1445 struct cxl_context *ctx; 1446 struct pci_dev *afu_dev; 1447 pci_ers_result_t afu_result = PCI_ERS_RESULT_RECOVERED; 1448 pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; 1449 int i; 1450 1451 if (cxl_configure_adapter(adapter, pdev)) 1452 goto err; 1453 1454 for (i = 0; i < adapter->slices; i++) { 1455 afu = adapter->afu[i]; 1456 1457 if (pci_configure_afu(afu, adapter, pdev)) 1458 goto err; 1459 1460 if (cxl_afu_select_best_mode(afu)) 1461 goto err; 1462 1463 list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { 1464 /* Reset the device context. 1465 * TODO: make this less disruptive 1466 */ 1467 ctx = cxl_get_context(afu_dev); 1468 1469 if (ctx && cxl_release_context(ctx)) 1470 goto err; 1471 1472 ctx = cxl_dev_context_init(afu_dev); 1473 if (!ctx) 1474 goto err; 1475 1476 afu_dev->dev.archdata.cxl_ctx = ctx; 1477 1478 if (cxl_ops->afu_check_and_enable(afu)) 1479 goto err; 1480 1481 afu_dev->error_state = pci_channel_io_normal; 1482 1483 /* If there's a driver attached, allow it to 1484 * chime in on recovery. Drivers should check 1485 * if everything has come back OK, but 1486 * shouldn't start new work until we call 1487 * their resume function. 1488 */ 1489 if (!afu_dev->driver) 1490 continue; 1491 1492 if (afu_dev->driver->err_handler && 1493 afu_dev->driver->err_handler->slot_reset) 1494 afu_result = afu_dev->driver->err_handler->slot_reset(afu_dev); 1495 1496 if (afu_result == PCI_ERS_RESULT_DISCONNECT) 1497 result = PCI_ERS_RESULT_DISCONNECT; 1498 } 1499 } 1500 return result; 1501 1502 err: 1503 /* All the bits that happen in both error_detected and cxl_remove 1504 * should be idempotent, so we don't need to worry about leaving a mix 1505 * of unconfigured and reconfigured resources. 1506 */ 1507 dev_err(&pdev->dev, "EEH recovery failed. Asking to be disconnected.\n"); 1508 return PCI_ERS_RESULT_DISCONNECT; 1509 } 1510 1511 static void cxl_pci_resume(struct pci_dev *pdev) 1512 { 1513 struct cxl *adapter = pci_get_drvdata(pdev); 1514 struct cxl_afu *afu; 1515 struct pci_dev *afu_dev; 1516 int i; 1517 1518 /* Everything is back now. Drivers should restart work now. 1519 * This is not the place to be checking if everything came back up 1520 * properly, because there's no return value: do that in slot_reset. 1521 */ 1522 for (i = 0; i < adapter->slices; i++) { 1523 afu = adapter->afu[i]; 1524 1525 list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) { 1526 if (afu_dev->driver && afu_dev->driver->err_handler && 1527 afu_dev->driver->err_handler->resume) 1528 afu_dev->driver->err_handler->resume(afu_dev); 1529 } 1530 } 1531 } 1532 1533 static const struct pci_error_handlers cxl_err_handler = { 1534 .error_detected = cxl_pci_error_detected, 1535 .slot_reset = cxl_pci_slot_reset, 1536 .resume = cxl_pci_resume, 1537 }; 1538 1539 struct pci_driver cxl_pci_driver = { 1540 .name = "cxl-pci", 1541 .id_table = cxl_pci_tbl, 1542 .probe = cxl_probe, 1543 .remove = cxl_remove, 1544 .shutdown = cxl_remove, 1545 .err_handler = &cxl_err_handler, 1546 }; 1547