1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019, Joyent, Inc. 14 * Copyright 2023 Oxide Computer Company 15 */ 16 17 /* 18 * Nexus Driver for AMD Zen family systems. The purpose of this driver is to 19 * provide access to the following resources in a single, centralized fashion: 20 * 21 * - The per-chip Data Fabric 22 * - The North Bridge 23 * - The System Management Network (SMN) 24 * 25 * This is a nexus driver as once we have attached to all the requisite 26 * components, we will enumerate child devices which consume this functionality. 27 * 28 * ------------------------ 29 * Mapping Devices Together 30 * ------------------------ 31 * 32 * The operating system needs to expose things like temperature sensors and DRAM 33 * configuration registers in terms of things that are meaningful to the system 34 * such as logical CPUs, cores, etc. This driver attaches to the PCI devices 35 * that represent the northbridge, data fabrics, and dies. Note that there are 36 * multiple northbridge and DF devices (one each per die) and this driver maps 37 * all of these three things together. Unfortunately, this requires some 38 * acrobatics as there is no direct way to map a northbridge to its 39 * corresponding die. Instead, we map a CPU die to a data fabric PCI device and 40 * a data fabric PCI device to a corresponding northbridge PCI device. This 41 * transitive relationship allows us to map from between northbridge and die. 42 * 43 * As each data fabric device is attached, based on vendor and device portions 44 * of the PCI ID, we add it to the DF stubs list in the global amdzen_t 45 * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs. 46 * 47 * In current Zen based products, there is a direct mapping between processor 48 * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and 49 * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to 50 * processor node 1, etc. This means that to map a logical CPU to a data fabric 51 * device, we take its processor node id, add it to 0x18 and find the PCI device 52 * that is on bus 0 with that ID number. We already discovered the DF devices as 53 * described above. 54 * 55 * The northbridge PCI device has a well-defined device and function, but the 56 * bus that it is on varies. Each die has its own set of assigned PCI buses and 57 * its northbridge device is on the first die-specific bus. This implies that 58 * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all 59 * of the data fabric devices are on and is not assigned to any particular die. 60 * Additionally, while the northbridge on the lowest-numbered PCI bus 61 * intuitively corresponds to processor node zero, hardware does not guarantee 62 * this. Because we don't want to be at the mercy of firmware, we don't rely on 63 * this ordering assumption, though we have yet to find a system that deviates 64 * from it, either. 65 * 66 * One of the registers in the data fabric device's function 0 67 * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is 68 * associated with the processor node. This means that we can map a data fabric 69 * device to a northbridge by finding the northbridge whose PCI bus ID matches 70 * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL. 71 * 72 * Given all of the above, we can map a northbridge to a data fabric device and 73 * a die to a data fabric device. Because these are 1:1 mappings, there is a 74 * transitive relationship from northbridge to die. and therefore we know which 75 * northbridge is associated with which processor die. This is summarized in the 76 * following image: 77 * 78 * +-------+ +------------------------------------+ +--------------+ 79 * | Die 0 |---->| Data Fabric PCI BDF 0/18/0 |---->| Northbridge | 80 * +-------+ | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10 | | PCI 10/0/0 | 81 * ... +------------------------------------+ +--------------+ 82 * +-------+ +------------------------------------+ +--------------+ 83 * | Die n |---->| Data Fabric PCI BDF 0/18+n/0 |---->| Northbridge | 84 * +-------+ | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 | | PCI 133/0/0 | 85 * +------------------------------------+ +--------------+ 86 * 87 * Note, the PCI buses used by the northbridges here are arbitrary examples that 88 * do not necessarily reflect actual hardware values; however, the 89 * bus/device/function (BDF) of the data fabric accurately models hardware. All 90 * BDF values are in hex. 91 * 92 * Starting with the Rome generation of processors (Family 17h Model 30-3Fh), 93 * AMD has multiple northbridges on a given die. All of these northbridges share 94 * the same data fabric and system management network port. From our perspective 95 * this means that some of the northbridge devices will be redundant and that we 96 * no longer have a 1:1 mapping between the northbridge and the data fabric 97 * devices. Every data fabric will have a northbridge, but not every northbridge 98 * will have a data fabric device mapped. Because we're always trying to map 99 * from a die to a northbridge and not the reverse, the fact that there are 100 * extra northbridge devices hanging around that we don't know about shouldn't 101 * be a problem. 102 * 103 * ------------------------------- 104 * Attach and Detach Complications 105 * ------------------------------- 106 * 107 * We need to map different PCI devices together. Each device is attached to a 108 * amdzen_stub driver to facilitate integration with the rest of the kernel PCI 109 * machinery and so we have to manage multiple dev_info_t structures, each of 110 * which may be independently attached and detached. 111 * 112 * This is not particularly complex for attach: our _init routine allocates the 113 * necessary mutex and list structures at module load time, and as each stub is 114 * attached, it calls into this code to be added to the appropriate list. When 115 * the nexus itself is attached, we walk the PCI device tree accumulating a 116 * counter for all devices we expect to be attached. Once the scan is complete 117 * and all such devices are accounted for (stub registration may be happening 118 * asynchronously with respect to nexus attach), we initialize the nexus device 119 * and the attach is complete. 120 * 121 * Most other device drivers support instances that can be brought back after 122 * detach, provided they are associated with an active minor node in the 123 * /devices file system. This driver is different. Once a stub device has been 124 * attached, we do not permit detaching the nexus driver instance, as the kernel 125 * does not give us interlocking guarantees between nexus and stub driver attach 126 * and detach. It is simplest to just unconditionally fail detach once a stub 127 * has attached. 128 * 129 * --------------- 130 * Exposed Devices 131 * --------------- 132 * 133 * Rather than try and have all of the different functions that could be 134 * provided in one driver, we have a nexus driver that tries to load child 135 * pseudo-device drivers that provide specific pieces of functionality. 136 * 137 * ------- 138 * Locking 139 * ------- 140 * 141 * The amdzen_data structure contains a single lock, azn_mutex. 142 * 143 * The various client functions here are intended for our nexus's direct 144 * children, but have been designed in case someone else should depends on this 145 * driver. Once a DF has been discovered, the set of entities inside of it 146 * (adf_nents, adf_ents[]) is considered static, constant data, and iteration 147 * over them does not require locking. However, the discovery of the amd_df_t 148 * does. In addition, locking is required whenever performing register accesses 149 * to the DF or SMN. 150 * 151 * To summarize, one must hold the lock in the following circumstances: 152 * 153 * - Looking up DF structures 154 * - Reading or writing to DF registers 155 * - Reading or writing to SMN registers 156 * 157 * In general, it is preferred that the lock be held across an entire client 158 * operation if possible. The only time this becomes an issue are when we have 159 * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively 160 * call into us. 161 */ 162 163 #include <sys/modctl.h> 164 #include <sys/conf.h> 165 #include <sys/devops.h> 166 #include <sys/ddi.h> 167 #include <sys/sunddi.h> 168 #include <sys/pci.h> 169 #include <sys/sysmacros.h> 170 #include <sys/sunndi.h> 171 #include <sys/x86_archext.h> 172 #include <sys/cpuvar.h> 173 #include <sys/policy.h> 174 #include <sys/stat.h> 175 #include <sys/sunddi.h> 176 #include <sys/bitmap.h> 177 178 #include <sys/amdzen/df.h> 179 #include <sys/amdzen/ccd.h> 180 #include "amdzen.h" 181 #include "amdzen_client.h" 182 #include "amdzen_topo.h" 183 184 amdzen_t *amdzen_data; 185 186 /* 187 * Internal minor nodes for devices that the nexus provides itself. 188 */ 189 #define AMDZEN_MINOR_TOPO 0 190 191 /* 192 * Array of northbridge IDs that we care about. 193 */ 194 static const uint16_t amdzen_nb_ids[] = { 195 /* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */ 196 0x1450, 197 /* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */ 198 0x15d0, 199 /* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */ 200 0x1480, 201 /* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */ 202 0x1630, 203 /* Family 19h Genoa and Bergamo */ 204 0x14a4, 205 /* Family 17h Mendocino, Family 19h Rembrandt */ 206 0x14b5, 207 /* Family 19h Raphael */ 208 0x14d8, 209 /* Family 19h Phoenix */ 210 0x14e8 211 }; 212 213 typedef struct { 214 char *acd_name; 215 amdzen_child_t acd_addr; 216 } amdzen_child_data_t; 217 218 static const amdzen_child_data_t amdzen_children[] = { 219 { "smntemp", AMDZEN_C_SMNTEMP }, 220 { "usmn", AMDZEN_C_USMN }, 221 { "zen_udf", AMDZEN_C_ZEN_UDF }, 222 { "zen_umc", AMDZEN_C_ZEN_UMC } 223 }; 224 225 static uint8_t 226 amdzen_stub_get8(amdzen_stub_t *stub, off_t reg) 227 { 228 return (pci_config_get8(stub->azns_cfgspace, reg)); 229 } 230 231 static uint16_t 232 amdzen_stub_get16(amdzen_stub_t *stub, off_t reg) 233 { 234 return (pci_config_get16(stub->azns_cfgspace, reg)); 235 } 236 237 static uint32_t 238 amdzen_stub_get32(amdzen_stub_t *stub, off_t reg) 239 { 240 return (pci_config_get32(stub->azns_cfgspace, reg)); 241 } 242 243 static uint64_t 244 amdzen_stub_get64(amdzen_stub_t *stub, off_t reg) 245 { 246 return (pci_config_get64(stub->azns_cfgspace, reg)); 247 } 248 249 static void 250 amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val) 251 { 252 pci_config_put8(stub->azns_cfgspace, reg, val); 253 } 254 255 static void 256 amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val) 257 { 258 pci_config_put16(stub->azns_cfgspace, reg, val); 259 } 260 261 static void 262 amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val) 263 { 264 pci_config_put32(stub->azns_cfgspace, reg, val); 265 } 266 267 static uint64_t 268 amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def, 269 uint8_t inst, boolean_t do_64) 270 { 271 df_reg_def_t ficaa; 272 df_reg_def_t ficad; 273 uint32_t val = 0; 274 df_rev_t df_rev = azn->azn_dfs[0].adf_rev; 275 276 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 277 ASSERT3U(def.drd_gens & df_rev, ==, df_rev); 278 val = DF_FICAA_V2_SET_TARG_INST(val, 1); 279 val = DF_FICAA_V2_SET_FUNC(val, def.drd_func); 280 val = DF_FICAA_V2_SET_INST(val, inst); 281 val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0); 282 283 switch (df_rev) { 284 case DF_REV_2: 285 case DF_REV_3: 286 case DF_REV_3P5: 287 ficaa = DF_FICAA_V2; 288 ficad = DF_FICAD_LO_V2; 289 /* 290 * Both here and in the DFv4 case, the register ignores the 291 * lower 2 bits. That is we can only address and encode things 292 * in units of 4 bytes. 293 */ 294 val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2); 295 break; 296 case DF_REV_4: 297 ficaa = DF_FICAA_V4; 298 ficad = DF_FICAD_LO_V4; 299 val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2); 300 break; 301 default: 302 panic("encountered unexpected DF rev: %u", df_rev); 303 } 304 305 amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val); 306 if (do_64) { 307 return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func], 308 ficad.drd_reg)); 309 } else { 310 return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func], 311 ficad.drd_reg)); 312 } 313 } 314 315 /* 316 * Perform a targeted 32-bit indirect read to a specific instance and function. 317 */ 318 static uint32_t 319 amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst, 320 const df_reg_def_t def) 321 { 322 return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE)); 323 } 324 325 /* 326 * For a broadcast read, just go to the underlying PCI function and perform a 327 * read. At this point in time, we don't believe we need to use the FICAA/FICAD 328 * to access it (though it does have a broadcast mode). 329 */ 330 static uint32_t 331 amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def) 332 { 333 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 334 return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg)); 335 } 336 337 static uint32_t 338 amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg) 339 { 340 const uint32_t base_addr = SMN_REG_ADDR_BASE(reg); 341 const uint32_t addr_off = SMN_REG_ADDR_OFF(reg); 342 343 VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg)); 344 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 345 amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr); 346 347 switch (SMN_REG_SIZE(reg)) { 348 case 1: 349 return ((uint32_t)amdzen_stub_get8(df->adf_nb, 350 AMDZEN_NB_SMN_DATA + addr_off)); 351 case 2: 352 return ((uint32_t)amdzen_stub_get16(df->adf_nb, 353 AMDZEN_NB_SMN_DATA + addr_off)); 354 case 4: 355 return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA)); 356 default: 357 panic("unreachable invalid SMN register size %u", 358 SMN_REG_SIZE(reg)); 359 } 360 } 361 362 static void 363 amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg, 364 const uint32_t val) 365 { 366 const uint32_t base_addr = SMN_REG_ADDR_BASE(reg); 367 const uint32_t addr_off = SMN_REG_ADDR_OFF(reg); 368 369 VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg)); 370 VERIFY(SMN_REG_VALUE_FITS(reg, val)); 371 VERIFY(MUTEX_HELD(&azn->azn_mutex)); 372 amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr); 373 374 switch (SMN_REG_SIZE(reg)) { 375 case 1: 376 amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off, 377 (uint8_t)val); 378 break; 379 case 2: 380 amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off, 381 (uint16_t)val); 382 break; 383 case 4: 384 amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val); 385 break; 386 default: 387 panic("unreachable invalid SMN register size %u", 388 SMN_REG_SIZE(reg)); 389 } 390 } 391 392 static amdzen_df_t * 393 amdzen_df_find(amdzen_t *azn, uint_t dfno) 394 { 395 uint_t i; 396 397 ASSERT(MUTEX_HELD(&azn->azn_mutex)); 398 if (dfno >= azn->azn_ndfs) { 399 return (NULL); 400 } 401 402 for (i = 0; i < azn->azn_ndfs; i++) { 403 amdzen_df_t *df = &azn->azn_dfs[i]; 404 if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) { 405 continue; 406 } 407 408 if (dfno == 0) { 409 return (df); 410 } 411 dfno--; 412 } 413 414 return (NULL); 415 } 416 417 static amdzen_df_ent_t * 418 amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid) 419 { 420 for (uint_t i = 0; i < df->adf_nents; i++) { 421 amdzen_df_ent_t *ent = &df->adf_ents[i]; 422 423 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) { 424 continue; 425 } 426 427 if (ent->adfe_inst_id == instid) { 428 return (ent); 429 } 430 } 431 432 return (NULL); 433 } 434 435 /* 436 * Client functions that are used by nexus children. 437 */ 438 int 439 amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp) 440 { 441 amdzen_df_t *df; 442 amdzen_t *azn = amdzen_data; 443 444 if (!SMN_REG_SIZE_IS_VALID(reg)) 445 return (EINVAL); 446 if (!SMN_REG_IS_NATURALLY_ALIGNED(reg)) 447 return (EINVAL); 448 449 mutex_enter(&azn->azn_mutex); 450 df = amdzen_df_find(azn, dfno); 451 if (df == NULL) { 452 mutex_exit(&azn->azn_mutex); 453 return (ENOENT); 454 } 455 456 if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) { 457 mutex_exit(&azn->azn_mutex); 458 return (ENXIO); 459 } 460 461 *valp = amdzen_smn_read(azn, df, reg); 462 mutex_exit(&azn->azn_mutex); 463 return (0); 464 } 465 466 int 467 amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val) 468 { 469 amdzen_df_t *df; 470 amdzen_t *azn = amdzen_data; 471 472 if (!SMN_REG_SIZE_IS_VALID(reg)) 473 return (EINVAL); 474 if (!SMN_REG_IS_NATURALLY_ALIGNED(reg)) 475 return (EINVAL); 476 if (!SMN_REG_VALUE_FITS(reg, val)) 477 return (EOVERFLOW); 478 479 mutex_enter(&azn->azn_mutex); 480 df = amdzen_df_find(azn, dfno); 481 if (df == NULL) { 482 mutex_exit(&azn->azn_mutex); 483 return (ENOENT); 484 } 485 486 if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) { 487 mutex_exit(&azn->azn_mutex); 488 return (ENXIO); 489 } 490 491 amdzen_smn_write(azn, df, reg, val); 492 mutex_exit(&azn->azn_mutex); 493 return (0); 494 } 495 496 uint_t 497 amdzen_c_df_count(void) 498 { 499 uint_t ret; 500 amdzen_t *azn = amdzen_data; 501 502 mutex_enter(&azn->azn_mutex); 503 ret = azn->azn_ndfs; 504 mutex_exit(&azn->azn_mutex); 505 return (ret); 506 } 507 508 df_rev_t 509 amdzen_c_df_rev(void) 510 { 511 amdzen_df_t *df; 512 amdzen_t *azn = amdzen_data; 513 df_rev_t rev; 514 515 /* 516 * Always use the first DF instance to determine what we're using. Our 517 * current assumption, which seems to generally be true, is that the 518 * given DF revisions are the same in a given system when the DFs are 519 * directly connected. 520 */ 521 mutex_enter(&azn->azn_mutex); 522 df = amdzen_df_find(azn, 0); 523 if (df == NULL) { 524 rev = DF_REV_UNKNOWN; 525 } else { 526 rev = df->adf_rev; 527 } 528 mutex_exit(&azn->azn_mutex); 529 530 return (rev); 531 } 532 533 int 534 amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def, 535 uint32_t *valp) 536 { 537 amdzen_df_t *df; 538 amdzen_t *azn = amdzen_data; 539 540 mutex_enter(&azn->azn_mutex); 541 df = amdzen_df_find(azn, dfno); 542 if (df == NULL) { 543 mutex_exit(&azn->azn_mutex); 544 return (ENOENT); 545 } 546 547 *valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE); 548 mutex_exit(&azn->azn_mutex); 549 550 return (0); 551 } 552 553 int 554 amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def, 555 uint64_t *valp) 556 { 557 amdzen_df_t *df; 558 amdzen_t *azn = amdzen_data; 559 560 mutex_enter(&azn->azn_mutex); 561 df = amdzen_df_find(azn, dfno); 562 if (df == NULL) { 563 mutex_exit(&azn->azn_mutex); 564 return (ENOENT); 565 } 566 567 *valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE); 568 mutex_exit(&azn->azn_mutex); 569 570 return (0); 571 } 572 573 int 574 amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func, 575 void *arg) 576 { 577 amdzen_df_t *df; 578 amdzen_t *azn = amdzen_data; 579 df_type_t df_type; 580 uint8_t df_subtype; 581 582 /* 583 * Unlike other calls here, we hold our lock only to find the DF here. 584 * The main reason for this is the nature of the callback function. 585 * Folks are iterating over instances so they can call back into us. If 586 * you look at the locking statement, the thing that is most volatile 587 * right here and what we need to protect is the DF itself and 588 * subsequent register accesses to it. The actual data about which 589 * entities exist is static and so once we have found a DF we should 590 * hopefully be in good shape as they only come, but don't go. 591 */ 592 mutex_enter(&azn->azn_mutex); 593 df = amdzen_df_find(azn, dfno); 594 if (df == NULL) { 595 mutex_exit(&azn->azn_mutex); 596 return (ENOENT); 597 } 598 mutex_exit(&azn->azn_mutex); 599 600 switch (type) { 601 case ZEN_DF_TYPE_CS_UMC: 602 df_type = DF_TYPE_CS; 603 /* 604 * In the original Zeppelin DFv2 die there was no subtype field 605 * used for the CS. The UMC is the only type and has a subtype 606 * of zero. 607 */ 608 if (df->adf_rev != DF_REV_2) { 609 df_subtype = DF_CS_SUBTYPE_UMC; 610 } else { 611 df_subtype = 0; 612 } 613 break; 614 case ZEN_DF_TYPE_CCM_CPU: 615 /* 616 * Because the CCM CPU subtype has always remained zero, we can 617 * use that regardless of the generation. 618 */ 619 df_type = DF_TYPE_CCM; 620 df_subtype = DF_CCM_SUBTYPE_CPU; 621 break; 622 default: 623 return (EINVAL); 624 } 625 626 for (uint_t i = 0; i < df->adf_nents; i++) { 627 amdzen_df_ent_t *ent = &df->adf_ents[i]; 628 629 /* 630 * Some DF components are not considered enabled and therefore 631 * will end up having bogus values in their ID fields. If we do 632 * not have an enable flag set, we must skip this node. 633 */ 634 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 635 continue; 636 637 if (ent->adfe_type == df_type && 638 ent->adfe_subtype == df_subtype) { 639 int ret = func(dfno, ent->adfe_fabric_id, 640 ent->adfe_inst_id, arg); 641 if (ret != 0) { 642 return (ret); 643 } 644 } 645 } 646 647 return (0); 648 } 649 650 int 651 amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp) 652 { 653 const amdzen_df_t *df; 654 amdzen_t *azn = amdzen_data; 655 656 mutex_enter(&azn->azn_mutex); 657 df = amdzen_df_find(azn, 0); 658 if (df == NULL) { 659 mutex_exit(&azn->azn_mutex); 660 return (ENOENT); 661 } 662 663 *decomp = df->adf_decomp; 664 mutex_exit(&azn->azn_mutex); 665 return (0); 666 } 667 668 static boolean_t 669 amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd) 670 { 671 int ret; 672 dev_info_t *child; 673 674 if (ndi_devi_alloc(azn->azn_dip, acd->acd_name, 675 (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) { 676 dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child " 677 "dip for %s", acd->acd_name); 678 return (B_FALSE); 679 } 680 681 ddi_set_parent_data(child, (void *)acd); 682 if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) { 683 dev_err(azn->azn_dip, CE_WARN, "!failed to online child " 684 "dip %s: %d", acd->acd_name, ret); 685 return (B_FALSE); 686 } 687 688 return (B_TRUE); 689 } 690 691 static boolean_t 692 amdzen_map_dfs(amdzen_t *azn) 693 { 694 amdzen_stub_t *stub; 695 696 ASSERT(MUTEX_HELD(&azn->azn_mutex)); 697 698 for (stub = list_head(&azn->azn_df_stubs); stub != NULL; 699 stub = list_next(&azn->azn_df_stubs, stub)) { 700 amdzen_df_t *df; 701 uint_t dfno; 702 703 dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE; 704 if (dfno > AMDZEN_MAX_DFS) { 705 dev_err(stub->azns_dip, CE_WARN, "encountered df " 706 "device with illegal DF PCI b/d/f: 0x%x/%x/%x", 707 stub->azns_bus, stub->azns_dev, stub->azns_func); 708 goto err; 709 } 710 711 df = &azn->azn_dfs[dfno]; 712 713 if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) { 714 dev_err(stub->azns_dip, CE_WARN, "encountered df " 715 "device with illegal DF PCI b/d/f: 0x%x/%x/%x", 716 stub->azns_bus, stub->azns_dev, stub->azns_func); 717 goto err; 718 } 719 720 if (df->adf_funcs[stub->azns_func] != NULL) { 721 dev_err(stub->azns_dip, CE_WARN, "encountered " 722 "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x", 723 stub->azns_bus, stub->azns_dev, stub->azns_func); 724 goto err; 725 } 726 df->adf_funcs[stub->azns_func] = stub; 727 } 728 729 return (B_TRUE); 730 731 err: 732 azn->azn_flags |= AMDZEN_F_DEVICE_ERROR; 733 return (B_FALSE); 734 } 735 736 static boolean_t 737 amdzen_check_dfs(amdzen_t *azn) 738 { 739 uint_t i; 740 boolean_t ret = B_TRUE; 741 742 for (i = 0; i < AMDZEN_MAX_DFS; i++) { 743 amdzen_df_t *df = &azn->azn_dfs[i]; 744 uint_t count = 0; 745 746 /* 747 * We require all platforms to have DFs functions 0-6. Not all 748 * platforms have DF function 7. 749 */ 750 for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) { 751 if (df->adf_funcs[func] != NULL) { 752 count++; 753 } 754 } 755 756 if (count == 0) 757 continue; 758 759 if (count != 7) { 760 ret = B_FALSE; 761 dev_err(azn->azn_dip, CE_WARN, "df %u devices " 762 "incomplete", i); 763 } else { 764 df->adf_flags |= AMDZEN_DF_F_VALID; 765 azn->azn_ndfs++; 766 } 767 } 768 769 return (ret); 770 } 771 772 static const uint8_t amdzen_df_rome_ids[0x2b] = { 773 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 774 24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 775 44, 45, 46, 47, 48 776 }; 777 778 /* 779 * Check the first df entry to see if it belongs to Rome or Milan. If so, then 780 * it uses the disjoint ID space. 781 */ 782 static boolean_t 783 amdzen_is_rome_style(uint_t id) 784 { 785 return (id == 0x1490 || id == 0x1650); 786 } 787 788 /* 789 * To be able to do most other things we want to do, we must first determine 790 * what revision of the DF (data fabric) that we're using. 791 * 792 * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen 793 * 4 timeframe and allows us to tell apart different version of the DF register 794 * set, most usefully when various subtypes were added. 795 * 796 * Older versions can theoretically be told apart based on usage of reserved 797 * registers. We walk these in the following order, starting with the newest rev 798 * and walking backwards to tell things apart: 799 * 800 * o v3.5 -> Check function 1, register 0x150. This was reserved prior 801 * to this point. This is actually DF_FIDMASK0_V3P5. We are supposed 802 * to check bits [7:0]. 803 * 804 * o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was 805 * changed to indicate a component mask. This is non-zero 806 * in the 3.0 generation. This is actually DF_FIDMASK_V2. 807 * 808 * o v2.0 -> This is just the not that case. Presumably v1 wasn't part 809 * of the Zen generation. 810 * 811 * Because we don't know what version we are yet, we do not use the normal 812 * versioned register accesses which would check what DF version we are and 813 * would want to use the normal indirect register accesses (which also require 814 * us to know the version). We instead do direct broadcast reads. 815 */ 816 static void 817 amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df) 818 { 819 uint32_t val; 820 df_reg_def_t rd = DF_FBICNT; 821 822 val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); 823 df->adf_major = DF_FBICNT_V4_GET_MAJOR(val); 824 df->adf_minor = DF_FBICNT_V4_GET_MINOR(val); 825 if (df->adf_major == 0 && df->adf_minor == 0) { 826 rd = DF_FIDMASK0_V3P5; 827 val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); 828 if (bitx32(val, 7, 0) != 0) { 829 df->adf_major = 3; 830 df->adf_minor = 5; 831 df->adf_rev = DF_REV_3P5; 832 } else { 833 rd = DF_FIDMASK_V2; 834 val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], 835 rd.drd_reg); 836 if (bitx32(val, 7, 0) != 0) { 837 df->adf_major = 3; 838 df->adf_minor = 0; 839 df->adf_rev = DF_REV_3; 840 } else { 841 df->adf_major = 2; 842 df->adf_minor = 0; 843 df->adf_rev = DF_REV_2; 844 } 845 } 846 } else if (df->adf_major == 4 && df->adf_minor == 0) { 847 df->adf_rev = DF_REV_4; 848 } else { 849 df->adf_rev = DF_REV_UNKNOWN; 850 } 851 } 852 853 /* 854 * All of the different versions of the DF have different ways of getting at and 855 * answering the question of how do I break a fabric ID into a corresponding 856 * socket, die, and component. Importantly the goal here is to obtain, cache, 857 * and normalize: 858 * 859 * o The DF System Configuration 860 * o The various Mask registers 861 * o The Node ID 862 */ 863 static void 864 amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df) 865 { 866 uint32_t mask; 867 df_fabric_decomp_t *decomp = &df->adf_decomp; 868 869 switch (df->adf_rev) { 870 case DF_REV_2: 871 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2); 872 switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) { 873 case DF_DIE_TYPE_CPU: 874 mask = amdzen_df_read32_bcast(azn, df, 875 DF_DIEMASK_CPU_V2); 876 break; 877 case DF_DIE_TYPE_APU: 878 mask = amdzen_df_read32_bcast(azn, df, 879 DF_DIEMASK_APU_V2); 880 break; 881 default: 882 panic("DF thinks we're not on a CPU!"); 883 } 884 df->adf_mask0 = mask; 885 886 /* 887 * DFv2 is a bit different in how the fabric mask register is 888 * phrased. Logically a fabric ID is broken into something that 889 * uniquely identifies a "node" (a particular die on a socket) 890 * and something that identifies a "component", e.g. a memory 891 * controller. 892 * 893 * Starting with DFv3, these registers logically called out how 894 * to separate the fabric ID first into a node and a component. 895 * Then the node was then broken down into a socket and die. In 896 * DFv2, there is no separate mask and shift of a node. Instead 897 * the socket and die are absolute offsets into the fabric ID 898 * rather than relative offsets into the node ID. As such, when 899 * we encounter DFv2, we fake up a node mask and shift and make 900 * it look like DFv3+. 901 */ 902 decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) | 903 DF_DIEMASK_V2_GET_DIE_MASK(mask); 904 decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask); 905 decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask); 906 decomp->dfd_comp_shift = 0; 907 908 decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >> 909 decomp->dfd_node_shift; 910 decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >> 911 decomp->dfd_node_shift; 912 decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) - 913 decomp->dfd_node_shift; 914 decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) - 915 decomp->dfd_node_shift; 916 ASSERT3U(decomp->dfd_die_shift, ==, 0); 917 918 /* 919 * There is no register in the actual data fabric with the node 920 * ID in DFv2 that we have found. Instead we take the first 921 * entity's fabric ID and transform it into the node id. 922 */ 923 df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id & 924 decomp->dfd_node_mask) >> decomp->dfd_node_shift; 925 break; 926 case DF_REV_3: 927 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3); 928 df->adf_mask0 = amdzen_df_read32_bcast(azn, df, 929 DF_FIDMASK0_V3); 930 df->adf_mask1 = amdzen_df_read32_bcast(azn, df, 931 DF_FIDMASK1_V3); 932 933 decomp->dfd_sock_mask = 934 DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1); 935 decomp->dfd_sock_shift = 936 DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1); 937 decomp->dfd_die_mask = 938 DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1); 939 decomp->dfd_die_shift = 0; 940 decomp->dfd_node_mask = 941 DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0); 942 decomp->dfd_node_shift = 943 DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1); 944 decomp->dfd_comp_mask = 945 DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0); 946 decomp->dfd_comp_shift = 0; 947 948 df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg); 949 break; 950 case DF_REV_3P5: 951 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, 952 DF_SYSCFG_V3P5); 953 df->adf_mask0 = amdzen_df_read32_bcast(azn, df, 954 DF_FIDMASK0_V3P5); 955 df->adf_mask1 = amdzen_df_read32_bcast(azn, df, 956 DF_FIDMASK1_V3P5); 957 df->adf_mask2 = amdzen_df_read32_bcast(azn, df, 958 DF_FIDMASK2_V3P5); 959 960 decomp->dfd_sock_mask = 961 DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2); 962 decomp->dfd_sock_shift = 963 DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1); 964 decomp->dfd_die_mask = 965 DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2); 966 decomp->dfd_die_shift = 0; 967 decomp->dfd_node_mask = 968 DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0); 969 decomp->dfd_node_shift = 970 DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1); 971 decomp->dfd_comp_mask = 972 DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0); 973 decomp->dfd_comp_shift = 0; 974 975 df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg); 976 break; 977 case DF_REV_4: 978 df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4); 979 df->adf_mask0 = amdzen_df_read32_bcast(azn, df, 980 DF_FIDMASK0_V4); 981 df->adf_mask1 = amdzen_df_read32_bcast(azn, df, 982 DF_FIDMASK1_V4); 983 df->adf_mask2 = amdzen_df_read32_bcast(azn, df, 984 DF_FIDMASK2_V4); 985 986 /* 987 * The DFv4 registers are at a different location in the DF; 988 * however, the actual layout of fields is the same as DFv3.5. 989 * This is why you see V3P5 below. 990 */ 991 decomp->dfd_sock_mask = 992 DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2); 993 decomp->dfd_sock_shift = 994 DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1); 995 decomp->dfd_die_mask = 996 DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2); 997 decomp->dfd_die_shift = 0; 998 decomp->dfd_node_mask = 999 DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0); 1000 decomp->dfd_node_shift = 1001 DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1); 1002 decomp->dfd_comp_mask = 1003 DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0); 1004 decomp->dfd_comp_shift = 0; 1005 1006 df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg); 1007 break; 1008 default: 1009 panic("encountered suspicious, previously rejected DF " 1010 "rev: 0x%x", df->adf_rev); 1011 } 1012 } 1013 1014 /* 1015 * The purpose of this function is to map CCMs to the corresponding CCDs that 1016 * exist. This is not an obvious thing as there is no direct mapping in the data 1017 * fabric between these IDs. 1018 * 1019 * Prior to DFv4, a given CCM was only ever connected to at most one CCD. 1020 * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports) 1021 * that connect to CCDs. These may be connected to the same CCD or a different 1022 * one. When both ports are enabled we must check whether or not the port is 1023 * considered to be in wide mode. When wide mode is enabled then the two ports 1024 * are connected to a single CCD. If wide mode is disabled then the two ports 1025 * are connected to separate CCDs. 1026 * 1027 * The physical number of a CCD, which is how we determine the SMN aperture to 1028 * use, is based on the CCM ID. In most sockets we have seen up to a maximum of 1029 * 8 CCMs. When a CCM is connected to more than one CCD we have determined based 1030 * on some hints from AMD's ACPI information that the numbering is assumed to be 1031 * that CCM's number plus the total number of CCMs. 1032 * 1033 * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there 1034 * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs 1035 * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs 1036 * within the package has changed across generations. 1037 * 1038 * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an 1039 * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This 1040 * meant that in cases where only a subset of CCDs were populated it'd forcibly 1041 * disable the higher CCD in a group (but with DFv3 the CCM would still be 1042 * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say. 1043 * This was almost certainly done to balance the NUMA config. 1044 * 1045 * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the 1046 * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5), 1047 * etc. This is also why we more often see disabled CCMs in Genoa, but not in 1048 * Rome/Milan. 1049 * 1050 * When we're operating in wide mode and therefore both SDPs are connected to a 1051 * single CCD, we've always found that the lower CCD index will be used by the 1052 * system and the higher one is not considered present. Therefore, when 1053 * operating in wide mode, we need to make sure that whenever we have a non-zero 1054 * value for SDPs being connected that we rewrite this to only appear as a 1055 * single CCD is present. It's conceivable (though hard to imagine) that we 1056 * could get a value of 0b10 indicating that only the upper SDP link is active 1057 * for some reason. 1058 */ 1059 1060 static void 1061 amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe, 1062 uint32_t ccmno) 1063 { 1064 amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm; 1065 uint32_t ccd_en; 1066 1067 if (df->adf_rev >= DF_REV_4) { 1068 uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id, 1069 DF_CCD_EN_V4); 1070 ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val); 1071 1072 val = amdzen_df_read32(azn, df, dfe->adfe_inst_id, 1073 DF_CCMCFG4_V4); 1074 if (DF_CCMCFG4_V4_GET_WIDE_EN(val) != 0 && ccd_en != 0) { 1075 ccd_en = 0x1; 1076 } 1077 } else { 1078 ccd_en = 0x1; 1079 } 1080 1081 for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) { 1082 ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0; 1083 if (ccm->acd_ccd_en[i] == 0) 1084 continue; 1085 ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm; 1086 ccm->acd_nccds++; 1087 } 1088 } 1089 1090 /* 1091 * Initialize our knowledge about a given series of nodes on the data fabric. 1092 */ 1093 static void 1094 amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df) 1095 { 1096 uint_t i; 1097 uint32_t val, ccmno; 1098 1099 amdzen_determine_df_vers(azn, df); 1100 1101 switch (df->adf_rev) { 1102 case DF_REV_2: 1103 case DF_REV_3: 1104 case DF_REV_3P5: 1105 val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2); 1106 break; 1107 case DF_REV_4: 1108 val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4); 1109 break; 1110 default: 1111 dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF " 1112 "revision: 0x%x", df->adf_rev); 1113 return; 1114 } 1115 df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val); 1116 val = amdzen_df_read32_bcast(azn, df, DF_FBICNT); 1117 df->adf_nents = DF_FBICNT_GET_COUNT(val); 1118 if (df->adf_nents == 0) 1119 return; 1120 df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents, 1121 KM_SLEEP); 1122 1123 for (i = 0; i < df->adf_nents; i++) { 1124 amdzen_df_ent_t *dfe = &df->adf_ents[i]; 1125 uint8_t inst = i; 1126 1127 /* 1128 * Unfortunately, Rome uses a discontinuous instance ID pattern 1129 * while everything else we can find uses a contiguous instance 1130 * ID pattern. This means that for Rome, we need to adjust the 1131 * indexes that we iterate over, though the total number of 1132 * entries is right. This was carried over into Milan, but not 1133 * Genoa. 1134 */ 1135 if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) { 1136 if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) { 1137 dev_err(azn->azn_dip, CE_WARN, "Rome family " 1138 "processor reported more ids than the PPR, " 1139 "resetting %u to instance zero", inst); 1140 inst = 0; 1141 } else { 1142 inst = amdzen_df_rome_ids[inst]; 1143 } 1144 } 1145 1146 dfe->adfe_drvid = inst; 1147 dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0); 1148 dfe->adfe_info1 = amdzen_df_read32(azn, df, inst, DF_FBIINFO1); 1149 dfe->adfe_info2 = amdzen_df_read32(azn, df, inst, DF_FBIINFO2); 1150 dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3); 1151 1152 dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0); 1153 dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0); 1154 1155 /* 1156 * The enabled flag was not present in Zen 1. Simulate it by 1157 * checking for a non-zero register instead. 1158 */ 1159 if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) || 1160 (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) { 1161 dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED; 1162 } 1163 if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) { 1164 dfe->adfe_flags |= AMDZEN_DFE_F_MCA; 1165 } 1166 dfe->adfe_inst_id = DF_FBIINFO3_GET_INSTID(dfe->adfe_info3); 1167 switch (df->adf_rev) { 1168 case DF_REV_2: 1169 dfe->adfe_fabric_id = 1170 DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3); 1171 break; 1172 case DF_REV_3: 1173 dfe->adfe_fabric_id = 1174 DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3); 1175 break; 1176 case DF_REV_3P5: 1177 dfe->adfe_fabric_id = 1178 DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3); 1179 break; 1180 case DF_REV_4: 1181 dfe->adfe_fabric_id = 1182 DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3); 1183 break; 1184 default: 1185 panic("encountered suspicious, previously rejected DF " 1186 "rev: 0x%x", df->adf_rev); 1187 } 1188 1189 /* 1190 * Record information about a subset of DF entities that we've 1191 * found. Currently we're tracking this only for CCMs. 1192 */ 1193 if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 1194 continue; 1195 1196 if (dfe->adfe_type == DF_TYPE_CCM && 1197 dfe->adfe_subtype == DF_CCM_SUBTYPE_CPU) { 1198 df->adf_nccm++; 1199 } 1200 } 1201 1202 /* 1203 * Now that we have filled in all of our info, attempt to fill in 1204 * specific information about different types of instances. 1205 */ 1206 ccmno = 0; 1207 for (uint_t i = 0; i < df->adf_nents; i++) { 1208 amdzen_df_ent_t *dfe = &df->adf_ents[i]; 1209 1210 if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 1211 continue; 1212 1213 /* 1214 * Perform type and sub-type specific initialization. Currently 1215 * limited to CCMs. 1216 */ 1217 switch (dfe->adfe_type) { 1218 case DF_TYPE_CCM: 1219 amdzen_setup_df_ccm(azn, df, dfe, ccmno); 1220 ccmno++; 1221 break; 1222 default: 1223 break; 1224 } 1225 } 1226 1227 amdzen_determine_fabric_decomp(azn, df); 1228 } 1229 1230 static void 1231 amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df) 1232 { 1233 amdzen_stub_t *stub; 1234 1235 for (stub = list_head(&azn->azn_nb_stubs); stub != NULL; 1236 stub = list_next(&azn->azn_nb_stubs, stub)) { 1237 if (stub->azns_bus == df->adf_nb_busno) { 1238 df->adf_flags |= AMDZEN_DF_F_FOUND_NB; 1239 df->adf_nb = stub; 1240 return; 1241 } 1242 } 1243 } 1244 1245 static void 1246 amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7) 1247 { 1248 uint32_t nsock, nccd, nccx, ncore, nthr, extccx; 1249 uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits; 1250 amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp; 1251 1252 /* 1253 * These are all 0 based values, meaning that we need to add one to each 1254 * of them. However, we skip this because to calculate the number of 1255 * bits to cover an entity we would subtract one. 1256 */ 1257 nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0); 1258 ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7); 1259 nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7); 1260 nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7); 1261 nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7); 1262 1263 if (uarchrev_uarch(cpuid_getuarchrev(CPU)) >= X86_UARCH_AMD_ZEN4) { 1264 extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7); 1265 } else { 1266 extccx = 0; 1267 } 1268 1269 nthr_bits = highbit(nthr); 1270 ncore_bits = highbit(ncore); 1271 nccx_bits = highbit(nccx); 1272 nccd_bits = highbit(nccd); 1273 nsock_bits = highbit(nsock); 1274 1275 apic->aad_thread_shift = 0; 1276 apic->aad_thread_mask = (1 << nthr_bits) - 1; 1277 1278 apic->aad_core_shift = nthr_bits; 1279 if (ncore_bits > 0) { 1280 apic->aad_core_mask = (1 << ncore_bits) - 1; 1281 apic->aad_core_mask <<= apic->aad_core_shift; 1282 } else { 1283 apic->aad_core_mask = 0; 1284 } 1285 1286 /* 1287 * The APIC_16T_MODE bit indicates that the total shift to start the CCX 1288 * should be at 4 bits if it's not. It doesn't mean that the CCX portion 1289 * of the value should take up four bits. In the common Genoa case, 1290 * nccx_bits will be zero. 1291 */ 1292 apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits; 1293 if (extccx != 0 && apic->aad_ccx_shift < 4) { 1294 apic->aad_ccx_shift = 4; 1295 } 1296 if (nccx_bits > 0) { 1297 apic->aad_ccx_mask = (1 << nccx_bits) - 1; 1298 apic->aad_ccx_mask <<= apic->aad_ccx_shift; 1299 } else { 1300 apic->aad_ccx_mask = 0; 1301 } 1302 1303 apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits; 1304 if (nccd_bits > 0) { 1305 apic->aad_ccd_mask = (1 << nccd_bits) - 1; 1306 apic->aad_ccd_mask <<= apic->aad_ccd_shift; 1307 } else { 1308 apic->aad_ccd_mask = 0; 1309 } 1310 1311 apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits; 1312 if (nsock_bits > 0) { 1313 apic->aad_sock_mask = (1 << nsock_bits) - 1; 1314 apic->aad_sock_mask <<= apic->aad_sock_shift; 1315 } else { 1316 apic->aad_sock_mask = 0; 1317 } 1318 1319 /* 1320 * Currently all supported Zen 2+ platforms only have a single die per 1321 * socket as compared to Zen 1. So this is always kept at zero. 1322 */ 1323 apic->aad_die_mask = 0; 1324 apic->aad_die_shift = 0; 1325 } 1326 1327 /* 1328 * We would like to determine what the logical APIC decomposition is on Zen 3 1329 * and newer family parts. While there is information added to CPUID in the form 1330 * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we 1331 * believe is the underlying source of the CPUID data. 1332 * 1333 * Fundamentally there are a series of registers in SMN space that relate to the 1334 * SCFCTP. Coincidentally, there is one of these for each core and there are a 1335 * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains 1336 * information about a given's core logical and physical IDs. More interestingly 1337 * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall 1338 * total number of logical entities. We've been promised that this has to be 1339 * the same across the fabric. That's all well and good, but this begs the 1340 * question of how do we actually get there. The above is a core-specific 1341 * register and requires that we understand information about which CCDs and 1342 * CCXs are actually present. 1343 * 1344 * So we are starting with a data fabric that has some CCM present. The CCM 1345 * entries in the data fabric may be tagged with our ENABLED flag. 1346 * Unfortunately, that can be true regardless of whether or not it's actually 1347 * present or not. As a result, we go to another chunk of SMN space registers, 1348 * SMU::PWR. These contain information about the CCDs, the physical cores that 1349 * are enabled, and related. So we will first walk the DF entities and see if we 1350 * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then 1351 * there is nothing present. Otherwise, we should get back something that 1352 * matches information in the data fabric. 1353 * 1354 * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to 1355 * determine which physical cores are enabled in the CCD/CCX. That will finally 1356 * give us an index to get to our friend INITPKG7. 1357 */ 1358 static boolean_t 1359 amdzen_determine_apic_decomp_initpkg(amdzen_t *azn) 1360 { 1361 amdzen_df_t *df = &azn->azn_dfs[0]; 1362 uint32_t ccdno = 0; 1363 1364 for (uint_t i = 0; i < df->adf_nents; i++) { 1365 const amdzen_df_ent_t *ent = &df->adf_ents[i]; 1366 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 1367 continue; 1368 1369 if (ent->adfe_type == DF_TYPE_CCM && 1370 ent->adfe_subtype == DF_CCM_SUBTYPE_CPU) { 1371 uint32_t val, nccx, pkg7, pkg0; 1372 smn_reg_t die_reg, thrcfg_reg, core_reg; 1373 smn_reg_t pkg7_reg, pkg0_reg; 1374 int core_bit; 1375 uint8_t pccxno, pcoreno; 1376 1377 die_reg = SMUPWR_CCD_DIE_ID(ccdno); 1378 val = amdzen_smn_read(azn, df, die_reg); 1379 if (val == SMN_EINVAL32) { 1380 ccdno++; 1381 continue; 1382 } 1383 1384 ASSERT3U(SMUPWR_CCD_DIE_ID_GET(val), ==, ccdno); 1385 1386 /* 1387 * This die actually exists. Switch over to the core 1388 * enable register to find one to ask about physically. 1389 */ 1390 thrcfg_reg = SMUPWR_THREAD_CFG(ccdno); 1391 val = amdzen_smn_read(azn, df, thrcfg_reg); 1392 nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; 1393 core_reg = SMUPWR_CORE_EN(ccdno); 1394 val = amdzen_smn_read(azn, df, core_reg); 1395 if (val == 0) { 1396 ccdno++; 1397 continue; 1398 } 1399 1400 /* 1401 * There exists an enabled physical core. Find the first 1402 * index of it and map it to the corresponding CCD and 1403 * CCX. ddi_ffs is the bit index, but we want the 1404 * physical core number, hence the -1. 1405 */ 1406 core_bit = ddi_ffs(val); 1407 ASSERT3S(core_bit, !=, 0); 1408 pcoreno = core_bit - 1; 1409 1410 /* 1411 * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us 1412 * the Number of logical cores that are present in the 1413 * complex, not the total number of physical cores. So 1414 * here we need to encode that in Zen 3+ the number of 1415 * cores per CCX is a maximum of 8. Right now we do 1416 * assume that the physical and logical ccx numbering is 1417 * equivalent (we have no other way of knowing if it is 1418 * or isn't right now) and that we'd always have CCX0 1419 * before CCX1. AMD seems to suggest we can assume this, 1420 * though it is a worrisome assumption. 1421 */ 1422 pccxno = pcoreno / 8; 1423 ASSERT3U(pccxno, <, nccx); 1424 pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno, 1425 pcoreno); 1426 pkg7 = amdzen_smn_read(azn, df, pkg7_reg); 1427 pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno, 1428 pcoreno); 1429 pkg0 = amdzen_smn_read(azn, df, pkg0_reg); 1430 amdzen_initpkg_to_apic(azn, pkg0, pkg7); 1431 return (B_TRUE); 1432 } 1433 } 1434 1435 return (B_FALSE); 1436 } 1437 1438 /* 1439 * We have the fun job of trying to figure out what the correct form of the APIC 1440 * decomposition should be and how to break that into its logical components. 1441 * The way that we get at this is generation-specific unfortunately. Here's how 1442 * it works out: 1443 * 1444 * Zen 1-2 This era of CPUs are deceptively simple. The PPR for a given 1445 * family defines exactly how the APIC ID is broken into logical 1446 * components and it's fixed. That is, depending on whether or 1447 * not SMT is enabled. Zen 1 and Zen 2 use different schemes for 1448 * constructing this. The way that we're supposed to check if SMT 1449 * is enabled is to use AMD leaf 8X1E and ask how many threads per 1450 * core there are. We use the x86 feature set to determine that 1451 * instead. 1452 * 1453 * More specifically the Zen 1 scheme is 7 bits long. The bits have 1454 * the following meanings. 1455 * 1456 * [6] Socket ID 1457 * [5:4] Node ID 1458 * [3] Logical CCX ID 1459 * With SMT Without SMT 1460 * [2:1] Logical Core ID [2] hardcoded to zero 1461 * [0] Thread ID [1:0] Logical Core ID 1462 * 1463 * The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme 1464 * without SMT shifts everything to the right by one bit. 1465 * 1466 * [7] Socket ID 1467 * [6:4] Logical CCD ID 1468 * [3] Logical CCX ID 1469 * [2:1] Logical Core ID 1470 * [0] Thread ID 1471 * 1472 * Zen 3 Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and 1473 * Zen 2 had, but also don't give us the nice way of discovering 1474 * this via CPUID that Zen 4 did. The APIC ID id uses a given 1475 * number of bits for each logical component that exists, but the 1476 * exact number varies based on what's actually present. To get at 1477 * this we use a piece of data that is embedded in the SCFCTP 1478 * (Scalable Control Fabric, Clocks, Test, Power Gating). This can 1479 * be used to determine how many logical entities of each kind the 1480 * system thinks exist. While we could use the various CPUID 1481 * topology items to try to speed this up, they don't tell us the 1482 * die information that we need to do this. 1483 * 1484 * Zen 4+ Zen 4 introduced CPUID leaf 8000_0026h which gives us a means 1485 * for determining how to extract the CCD, CCX, and related pieces 1486 * out of the device. One thing we have to be aware of is that when 1487 * the CCD and CCX shift are the same, that means that there is 1488 * only a single CCX and therefore have to take that into account 1489 * appropriately. This is the case generally on Zen 4 platforms, 1490 * but not on Bergamo. Until we can confirm the actual CPUID leaf 1491 * values that we receive in the cases of Bergamo and others, we 1492 * opt instead to use the same SCFCTP scheme as Zen 3. 1493 */ 1494 static boolean_t 1495 amdzen_determine_apic_decomp(amdzen_t *azn) 1496 { 1497 x86_uarchrev_t uarchrev = cpuid_getuarchrev(CPU); 1498 amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp; 1499 boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT); 1500 1501 switch (uarchrev_uarch(uarchrev)) { 1502 case X86_UARCH_AMD_ZEN1: 1503 case X86_UARCH_AMD_ZENPLUS: 1504 apic->aad_sock_mask = 0x40; 1505 apic->aad_sock_shift = 6; 1506 apic->aad_die_mask = 0x30; 1507 apic->aad_die_shift = 4; 1508 apic->aad_ccd_mask = 0; 1509 apic->aad_ccd_shift = 0; 1510 apic->aad_ccx_mask = 0x08; 1511 apic->aad_ccx_shift = 3; 1512 1513 if (smt) { 1514 apic->aad_core_mask = 0x06; 1515 apic->aad_core_shift = 1; 1516 apic->aad_thread_mask = 0x1; 1517 apic->aad_thread_shift = 0; 1518 } else { 1519 apic->aad_core_mask = 0x03; 1520 apic->aad_core_shift = 0; 1521 apic->aad_thread_mask = 0; 1522 apic->aad_thread_shift = 0; 1523 } 1524 break; 1525 case X86_UARCH_AMD_ZEN2: 1526 if (smt) { 1527 apic->aad_sock_mask = 0x80; 1528 apic->aad_sock_shift = 7; 1529 apic->aad_die_mask = 0; 1530 apic->aad_die_shift = 0; 1531 apic->aad_ccd_mask = 0x70; 1532 apic->aad_ccd_shift = 4; 1533 apic->aad_ccx_mask = 0x08; 1534 apic->aad_ccx_shift = 3; 1535 apic->aad_core_mask = 0x06; 1536 apic->aad_core_shift = 1; 1537 apic->aad_thread_mask = 0x01; 1538 apic->aad_thread_shift = 0; 1539 } else { 1540 apic->aad_sock_mask = 0x40; 1541 apic->aad_sock_shift = 6; 1542 apic->aad_die_mask = 0; 1543 apic->aad_die_shift = 0; 1544 apic->aad_ccd_mask = 0x38; 1545 apic->aad_ccd_shift = 3; 1546 apic->aad_ccx_mask = 0x04; 1547 apic->aad_ccx_shift = 2; 1548 apic->aad_core_mask = 0x3; 1549 apic->aad_core_shift = 0; 1550 apic->aad_thread_mask = 0; 1551 apic->aad_thread_shift = 0; 1552 } 1553 break; 1554 case X86_UARCH_AMD_ZEN3: 1555 case X86_UARCH_AMD_ZEN4: 1556 return (amdzen_determine_apic_decomp_initpkg(azn)); 1557 default: 1558 return (B_FALSE); 1559 } 1560 return (B_TRUE); 1561 } 1562 1563 /* 1564 * Snapshot the number of cores that can exist in a CCX based on the Zen 1565 * microarchitecture revision. In Zen 1-4 this has been a constant number 1566 * regardless of the actual CPU Family. 1567 */ 1568 static void 1569 amdzen_determine_ncore_per_ccx(amdzen_t *azn) 1570 { 1571 x86_uarchrev_t uarchrev = cpuid_getuarchrev(CPU); 1572 1573 switch (uarchrev_uarch(uarchrev)) { 1574 case X86_UARCH_AMD_ZEN1: 1575 case X86_UARCH_AMD_ZENPLUS: 1576 case X86_UARCH_AMD_ZEN2: 1577 azn->azn_ncore_per_ccx = 4; 1578 break; 1579 case X86_UARCH_AMD_ZEN3: 1580 case X86_UARCH_AMD_ZEN4: 1581 azn->azn_ncore_per_ccx = 8; 1582 break; 1583 default: 1584 panic("asked about non-Zen uarch"); 1585 } 1586 } 1587 1588 /* 1589 * We need to be careful using this function as different AMD generations have 1590 * acted in different ways when there is a missing CCD. We've found that in 1591 * hardware where the CCM is enabled but there is no CCD attached, it generally 1592 * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would 1593 * correspond to a disabled CCM then the firmware may inject a fatal error 1594 * (which is hopefully something missing in our RAS/MCA-X enablement). 1595 * 1596 * Put differently if this doesn't correspond to an Enabled CCM and you know the 1597 * number of valid CCDs on this, don't use it. 1598 */ 1599 static boolean_t 1600 amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno) 1601 { 1602 smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno); 1603 uint32_t val = amdzen_smn_read(azn, df, die_reg); 1604 if (val == SMN_EINVAL32) { 1605 return (B_FALSE); 1606 } 1607 1608 ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val)); 1609 return (B_TRUE); 1610 } 1611 1612 /* 1613 * Attempt to determine a logical CCD number of a given CCD where we don't have 1614 * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems). 1615 * The CCD numbers that we have are the in the physical space. Likely beacuse of 1616 * how the orientation of CCM numbers map to physical locations and the layout 1617 * of them within the pacakge, we haven't found a good way using the core DFv3 1618 * registers to determine if a given CCD is actually present or not as generally 1619 * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to 1620 * determine CCD presence. 1621 */ 1622 static uint32_t 1623 amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df, 1624 const amdzen_df_ent_t *targ) 1625 { 1626 uint32_t smnid = 0; 1627 uint32_t logid = 0; 1628 1629 for (uint_t i = 0; i < df->adf_nents; i++) { 1630 const amdzen_df_ent_t *ent = &df->adf_ents[i]; 1631 1632 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) { 1633 continue; 1634 } 1635 1636 if (ent->adfe_inst_id == targ->adfe_inst_id) { 1637 return (logid); 1638 } 1639 1640 if (ent->adfe_type == targ->adfe_type && 1641 ent->adfe_subtype == targ->adfe_subtype) { 1642 boolean_t present = amdzen_ccd_present(azn, df, smnid); 1643 smnid++; 1644 if (present) { 1645 logid++; 1646 } 1647 } 1648 } 1649 1650 panic("asked to match against invalid DF entity %p in df %p", targ, df); 1651 } 1652 1653 static void 1654 amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df, 1655 amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core, 1656 boolean_t *ccd_set, boolean_t *ccx_set) 1657 { 1658 smn_reg_t pkg0_reg; 1659 uint32_t pkg0; 1660 1661 pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no, 1662 core->atcore_phys_no); 1663 pkg0 = amdzen_smn_read(azn, df, pkg0_reg); 1664 core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0); 1665 1666 if (!*ccx_set) { 1667 ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0); 1668 *ccx_set = B_TRUE; 1669 } 1670 1671 if (!*ccd_set) { 1672 ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0); 1673 *ccd_set = B_TRUE; 1674 } 1675 } 1676 1677 /* 1678 * Attempt to fill in the physical topology information for this given CCD. 1679 * There are a few steps to this that we undertake to perform this as follows: 1680 * 1681 * 1) First we determine whether the CCD is actually present or not by reading 1682 * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF 1683 * entry it appears, but the request for the die ID will returns an invalid 1684 * read (all 1s). This die ID should match what we think of as the SMN number 1685 * below. If not, we're in trouble and the rest of this is in question. 1686 * 1687 * 2) We use the SMU::PWR registers to determine how many logical and physical 1688 * cores are present in this CCD and how they are split amongst the CCX. Here we 1689 * need to encode the CPU to CCX core size rankings. Through this process we 1690 * determine and fill out which threads and cores are enabled. 1691 * 1692 * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we 1693 * have a proper physical to logical mapping, at which point we can fill in the 1694 * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to 1695 * indicate that we just mapped the first logical processor to the first enabled 1696 * core. 1697 * 1698 * 4) Once we have the logical IDs determined we will construct the APIC ID that 1699 * we expect this to have. 1700 * 1701 * Steps (2) - (4) are intertwined and done together. 1702 */ 1703 static void 1704 amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent, 1705 amdzen_topo_ccd_t *ccd) 1706 { 1707 uint32_t val, nccx, core_en, thread_en; 1708 uint32_t nlcore_per_ccx, nthreads_per_core; 1709 uint32_t sockid, dieid, compid; 1710 const uint32_t ccdno = ccd->atccd_phys_no; 1711 const x86_uarch_t uarch = uarchrev_uarch(cpuid_getuarchrev(CPU)); 1712 boolean_t smt, pkg0_ids, logccd_set = B_FALSE; 1713 smn_reg_t reg; 1714 1715 ASSERT(MUTEX_HELD(&azn->azn_mutex)); 1716 if (!amdzen_ccd_present(azn, df, ccdno)) { 1717 ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING; 1718 return; 1719 } 1720 1721 reg = SMUPWR_THREAD_CFG(ccdno); 1722 val = amdzen_smn_read(azn, df, reg); 1723 nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; 1724 nlcore_per_ccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; 1725 smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val); 1726 ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX); 1727 if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) { 1728 nthreads_per_core = 2; 1729 } else { 1730 nthreads_per_core = 1; 1731 } 1732 1733 reg = SMUPWR_CORE_EN(ccdno); 1734 core_en = amdzen_smn_read(azn, df, reg); 1735 reg = SMUPWR_THREAD_EN(ccdno); 1736 thread_en = amdzen_smn_read(azn, df, reg); 1737 1738 /* 1739 * The BSP is never enabled in a conventional sense and therefore the 1740 * bit is reserved and left as 0. As the BSP should be in the first CCD, 1741 * we go through and OR back in the bit lest we think the thread isn't 1742 * enabled. 1743 */ 1744 if (ccdno == 0) { 1745 thread_en |= 1; 1746 } 1747 1748 ccd->atccd_phys_no = ccdno; 1749 if (uarch >= X86_UARCH_AMD_ZEN3) { 1750 pkg0_ids = B_TRUE; 1751 } else { 1752 ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN; 1753 pkg0_ids = B_FALSE; 1754 1755 /* 1756 * Determine the CCD logical ID for Zen 2 now since this doesn't 1757 * rely upon needing a valid physical core. 1758 */ 1759 ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent); 1760 logccd_set = B_TRUE; 1761 } 1762 1763 /* 1764 * To construct the APIC ID we need to know the socket and die (not CCD) 1765 * this is on. We deconstruct the CCD's fabric ID to determine that. 1766 */ 1767 zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid, 1768 &dieid, &compid); 1769 1770 /* 1771 * At this point we have all the information about the CCD, the number 1772 * of CCX instances, and which physical cores and threads are enabled. 1773 * Currently we assume that if we have one CCX enabled, then it is 1774 * always CCX0. We cannot find evidence of a two CCX supporting part 1775 * that doesn't always ship with both CCXs present and enabled. 1776 */ 1777 ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx; 1778 for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) { 1779 amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno]; 1780 const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1; 1781 const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx; 1782 const uint32_t ccx_core_en = (core_en >> core_shift) & 1783 core_mask; 1784 boolean_t logccx_set = B_FALSE; 1785 1786 ccd->atccd_ccx_en[ccxno] = 1; 1787 ccx->atccx_phys_no = ccxno; 1788 ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx; 1789 ccx->atccx_nlog_cores = nlcore_per_ccx; 1790 1791 if (!pkg0_ids) { 1792 ccx->atccx_log_no = ccx->atccx_phys_no; 1793 logccx_set = B_TRUE; 1794 } 1795 1796 for (uint32_t coreno = 0, logcorezen2 = 0; 1797 coreno < azn->azn_ncore_per_ccx; coreno++) { 1798 amdzen_topo_core_t *core = &ccx->atccx_cores[coreno]; 1799 1800 if ((ccx_core_en & (1 << coreno)) == 0) { 1801 continue; 1802 } 1803 1804 ccx->atccx_core_en[coreno] = 1; 1805 core->atcore_phys_no = coreno; 1806 1807 /* 1808 * Now that we have the physical core number present, we 1809 * must determine the logical core number and fill out 1810 * the logical CCX/CCD if it has not been set. We must 1811 * do this before we attempt to look at which threads 1812 * are enabled, because that operates based upon logical 1813 * core number. 1814 * 1815 * For Zen 2 we do not have INITPKG0 at our disposal. We 1816 * currently assume (and tag for userland with the 1817 * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are 1818 * mapping logical cores to physicals in the order of 1819 * appearance. 1820 */ 1821 if (pkg0_ids) { 1822 amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx, 1823 core, &logccd_set, &logccx_set); 1824 } else { 1825 core->atcore_log_no = logcorezen2; 1826 logcorezen2++; 1827 } 1828 1829 /* 1830 * Determining which bits to use for the thread is a bit 1831 * weird here. Thread IDs within a CCX are logical, but 1832 * there are always physically spaced CCX sizes. See the 1833 * comment at the definition for SMU::PWR::THREAD_ENABLE 1834 * for more information. 1835 */ 1836 const uint32_t thread_shift = (ccx->atccx_nphys_cores * 1837 ccx->atccx_log_no + core->atcore_log_no) * 1838 nthreads_per_core; 1839 const uint32_t thread_mask = (nthreads_per_core << 1) - 1840 1; 1841 const uint32_t core_thread_en = (thread_en >> 1842 thread_shift) & thread_mask; 1843 core->atcore_nthreads = nthreads_per_core; 1844 core->atcore_thr_en[0] = core_thread_en & 0x01; 1845 core->atcore_thr_en[1] = core_thread_en & 0x02; 1846 #ifdef DEBUG 1847 if (nthreads_per_core == 1) { 1848 VERIFY0(core->atcore_thr_en[1]); 1849 } 1850 #endif 1851 for (uint32_t thrno = 0; thrno < core->atcore_nthreads; 1852 thrno++) { 1853 ASSERT3U(core->atcore_thr_en[thrno], !=, 0); 1854 1855 zen_apic_id_compose(&azn->azn_apic_decomp, 1856 sockid, dieid, ccd->atccd_log_no, 1857 ccx->atccx_log_no, core->atcore_log_no, 1858 thrno, &core->atcore_apicids[thrno]); 1859 1860 } 1861 } 1862 1863 ASSERT3U(logccx_set, ==, B_TRUE); 1864 ASSERT3U(logccd_set, ==, B_TRUE); 1865 } 1866 } 1867 1868 static void 1869 amdzen_nexus_init(void *arg) 1870 { 1871 uint_t i; 1872 amdzen_t *azn = arg; 1873 1874 /* 1875 * First go through all of the stubs and assign the DF entries. 1876 */ 1877 mutex_enter(&azn->azn_mutex); 1878 if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) { 1879 azn->azn_flags |= AMDZEN_F_MAP_ERROR; 1880 goto done; 1881 } 1882 1883 for (i = 0; i < AMDZEN_MAX_DFS; i++) { 1884 amdzen_df_t *df = &azn->azn_dfs[i]; 1885 1886 if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) 1887 continue; 1888 amdzen_setup_df(azn, df); 1889 amdzen_find_nb(azn, df); 1890 } 1891 1892 if (amdzen_determine_apic_decomp(azn)) { 1893 azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID; 1894 } 1895 1896 amdzen_determine_ncore_per_ccx(azn); 1897 1898 /* 1899 * Not all children may be installed. As such, we do not treat the 1900 * failure of a child as fatal to the driver. 1901 */ 1902 mutex_exit(&azn->azn_mutex); 1903 for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) { 1904 (void) amdzen_create_child(azn, &amdzen_children[i]); 1905 } 1906 mutex_enter(&azn->azn_mutex); 1907 1908 done: 1909 azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED; 1910 azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE; 1911 azn->azn_taskqid = TASKQID_INVALID; 1912 cv_broadcast(&azn->azn_cv); 1913 mutex_exit(&azn->azn_mutex); 1914 } 1915 1916 static int 1917 amdzen_stub_scan_cb(dev_info_t *dip, void *arg) 1918 { 1919 amdzen_t *azn = arg; 1920 uint16_t vid, did; 1921 int *regs; 1922 uint_t nregs, i; 1923 boolean_t match = B_FALSE; 1924 1925 if (dip == ddi_root_node()) { 1926 return (DDI_WALK_CONTINUE); 1927 } 1928 1929 /* 1930 * If a node in question is not a pci node, then we have no interest in 1931 * it as all the stubs that we care about are related to pci devices. 1932 */ 1933 if (strncmp("pci", ddi_get_name(dip), 3) != 0) { 1934 return (DDI_WALK_PRUNECHILD); 1935 } 1936 1937 /* 1938 * If we can't get a device or vendor ID and prove that this is an AMD 1939 * part, then we don't care about it. 1940 */ 1941 vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1942 "vendor-id", PCI_EINVAL16); 1943 did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1944 "device-id", PCI_EINVAL16); 1945 if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) { 1946 return (DDI_WALK_CONTINUE); 1947 } 1948 1949 if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { 1950 return (DDI_WALK_CONTINUE); 1951 } 1952 1953 for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) { 1954 if (amdzen_nb_ids[i] == did) { 1955 match = B_TRUE; 1956 } 1957 } 1958 1959 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1960 "reg", ®s, &nregs) != DDI_PROP_SUCCESS) { 1961 return (DDI_WALK_CONTINUE); 1962 } 1963 1964 if (nregs == 0) { 1965 ddi_prop_free(regs); 1966 return (DDI_WALK_CONTINUE); 1967 } 1968 1969 if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO && 1970 PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) { 1971 match = B_TRUE; 1972 } 1973 1974 ddi_prop_free(regs); 1975 if (match) { 1976 mutex_enter(&azn->azn_mutex); 1977 azn->azn_nscanned++; 1978 mutex_exit(&azn->azn_mutex); 1979 } 1980 1981 return (DDI_WALK_CONTINUE); 1982 } 1983 1984 static void 1985 amdzen_stub_scan(void *arg) 1986 { 1987 amdzen_t *azn = arg; 1988 1989 mutex_enter(&azn->azn_mutex); 1990 azn->azn_nscanned = 0; 1991 mutex_exit(&azn->azn_mutex); 1992 1993 ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn); 1994 1995 mutex_enter(&azn->azn_mutex); 1996 azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED; 1997 azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE; 1998 1999 if (azn->azn_nscanned == 0) { 2000 azn->azn_flags |= AMDZEN_F_UNSUPPORTED; 2001 azn->azn_taskqid = TASKQID_INVALID; 2002 cv_broadcast(&azn->azn_cv); 2003 } else if (azn->azn_npresent == azn->azn_nscanned) { 2004 azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED; 2005 azn->azn_taskqid = taskq_dispatch(system_taskq, 2006 amdzen_nexus_init, azn, TQ_SLEEP); 2007 } 2008 mutex_exit(&azn->azn_mutex); 2009 } 2010 2011 /* 2012 * Unfortunately we can't really let the stubs detach as we may need them to be 2013 * available for client operations. We may be able to improve this if we know 2014 * that the actual nexus is going away. However, as long as it's active, we need 2015 * all the stubs. 2016 */ 2017 int 2018 amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd) 2019 { 2020 if (cmd == DDI_SUSPEND) { 2021 return (DDI_SUCCESS); 2022 } 2023 2024 return (DDI_FAILURE); 2025 } 2026 2027 int 2028 amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd) 2029 { 2030 int *regs, reg; 2031 uint_t nregs, i; 2032 uint16_t vid, did; 2033 amdzen_stub_t *stub; 2034 amdzen_t *azn = amdzen_data; 2035 boolean_t valid = B_FALSE; 2036 boolean_t nb = B_FALSE; 2037 2038 if (cmd == DDI_RESUME) { 2039 return (DDI_SUCCESS); 2040 } else if (cmd != DDI_ATTACH) { 2041 return (DDI_FAILURE); 2042 } 2043 2044 /* 2045 * Make sure that the stub that we've been asked to attach is a pci type 2046 * device. If not, then there is no reason for us to proceed. 2047 */ 2048 if (strncmp("pci", ddi_get_name(dip), 3) != 0) { 2049 dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus " 2050 "stub: %s", ddi_get_name(dip)); 2051 return (DDI_FAILURE); 2052 } 2053 vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2054 "vendor-id", PCI_EINVAL16); 2055 did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2056 "device-id", PCI_EINVAL16); 2057 if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) { 2058 dev_err(dip, CE_WARN, "failed to get PCI ID properties"); 2059 return (DDI_FAILURE); 2060 } 2061 2062 if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { 2063 dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x", 2064 cpuid_getvendor(CPU) == X86_VENDOR_HYGON ? 2065 AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid); 2066 return (DDI_FAILURE); 2067 } 2068 2069 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2070 "reg", ®s, &nregs) != DDI_PROP_SUCCESS) { 2071 dev_err(dip, CE_WARN, "failed to get 'reg' property"); 2072 return (DDI_FAILURE); 2073 } 2074 2075 if (nregs == 0) { 2076 ddi_prop_free(regs); 2077 dev_err(dip, CE_WARN, "missing 'reg' property values"); 2078 return (DDI_FAILURE); 2079 } 2080 reg = *regs; 2081 ddi_prop_free(regs); 2082 2083 for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) { 2084 if (amdzen_nb_ids[i] == did) { 2085 valid = B_TRUE; 2086 nb = B_TRUE; 2087 } 2088 } 2089 2090 if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO && 2091 PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) { 2092 valid = B_TRUE; 2093 nb = B_FALSE; 2094 } 2095 2096 if (!valid) { 2097 dev_err(dip, CE_WARN, "device %s didn't match the nexus list", 2098 ddi_get_name(dip)); 2099 return (DDI_FAILURE); 2100 } 2101 2102 stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP); 2103 if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) { 2104 dev_err(dip, CE_WARN, "failed to set up config space"); 2105 kmem_free(stub, sizeof (amdzen_stub_t)); 2106 return (DDI_FAILURE); 2107 } 2108 2109 stub->azns_dip = dip; 2110 stub->azns_vid = vid; 2111 stub->azns_did = did; 2112 stub->azns_bus = PCI_REG_BUS_G(reg); 2113 stub->azns_dev = PCI_REG_DEV_G(reg); 2114 stub->azns_func = PCI_REG_FUNC_G(reg); 2115 ddi_set_driver_private(dip, stub); 2116 2117 mutex_enter(&azn->azn_mutex); 2118 azn->azn_npresent++; 2119 if (nb) { 2120 list_insert_tail(&azn->azn_nb_stubs, stub); 2121 } else { 2122 list_insert_tail(&azn->azn_df_stubs, stub); 2123 } 2124 2125 if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE && 2126 azn->azn_nscanned == azn->azn_npresent) { 2127 azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED; 2128 azn->azn_taskqid = taskq_dispatch(system_taskq, 2129 amdzen_nexus_init, azn, TQ_SLEEP); 2130 } 2131 mutex_exit(&azn->azn_mutex); 2132 2133 return (DDI_SUCCESS); 2134 } 2135 2136 static int 2137 amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, 2138 void *arg, void *result) 2139 { 2140 char buf[32]; 2141 dev_info_t *child; 2142 const amdzen_child_data_t *acd; 2143 2144 switch (ctlop) { 2145 case DDI_CTLOPS_REPORTDEV: 2146 if (rdip == NULL) { 2147 return (DDI_FAILURE); 2148 } 2149 cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n", 2150 ddi_node_name(rdip), ddi_get_name_addr(rdip), 2151 ddi_driver_name(rdip), ddi_get_instance(rdip)); 2152 break; 2153 case DDI_CTLOPS_INITCHILD: 2154 child = arg; 2155 if (child == NULL) { 2156 dev_err(dip, CE_WARN, "!no child passed for " 2157 "DDI_CTLOPS_INITCHILD"); 2158 } 2159 2160 acd = ddi_get_parent_data(child); 2161 if (acd == NULL) { 2162 dev_err(dip, CE_WARN, "!missing child parent data"); 2163 return (DDI_FAILURE); 2164 } 2165 2166 if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >= 2167 sizeof (buf)) { 2168 dev_err(dip, CE_WARN, "!failed to construct device " 2169 "addr due to overflow"); 2170 return (DDI_FAILURE); 2171 } 2172 2173 ddi_set_name_addr(child, buf); 2174 break; 2175 case DDI_CTLOPS_UNINITCHILD: 2176 child = arg; 2177 if (child == NULL) { 2178 dev_err(dip, CE_WARN, "!no child passed for " 2179 "DDI_CTLOPS_UNINITCHILD"); 2180 } 2181 2182 ddi_set_name_addr(child, NULL); 2183 break; 2184 default: 2185 return (ddi_ctlops(dip, rdip, ctlop, arg, result)); 2186 } 2187 return (DDI_SUCCESS); 2188 } 2189 2190 static int 2191 amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp) 2192 { 2193 minor_t m; 2194 amdzen_t *azn = amdzen_data; 2195 2196 if (crgetzoneid(credp) != GLOBAL_ZONEID || 2197 secpolicy_sys_config(credp, B_FALSE) != 0) { 2198 return (EPERM); 2199 } 2200 2201 if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) { 2202 return (EINVAL); 2203 } 2204 2205 if (otyp != OTYP_CHR) { 2206 return (EINVAL); 2207 } 2208 2209 m = getminor(*devp); 2210 if (m != AMDZEN_MINOR_TOPO) { 2211 return (ENXIO); 2212 } 2213 2214 mutex_enter(&azn->azn_mutex); 2215 if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) != 2216 AMDZEN_F_ATTACH_COMPLETE) { 2217 mutex_exit(&azn->azn_mutex); 2218 return (ENOTSUP); 2219 } 2220 mutex_exit(&azn->azn_mutex); 2221 2222 return (0); 2223 } 2224 2225 static int 2226 amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode) 2227 { 2228 amdzen_topo_base_t base; 2229 2230 bzero(&base, sizeof (base)); 2231 mutex_enter(&azn->azn_mutex); 2232 base.atb_ndf = azn->azn_ndfs; 2233 2234 if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) { 2235 mutex_exit(&azn->azn_mutex); 2236 return (ENOTSUP); 2237 } 2238 2239 base.atb_apic_decomp = azn->azn_apic_decomp; 2240 for (uint_t i = 0; i < azn->azn_ndfs; i++) { 2241 const amdzen_df_t *df = &azn->azn_dfs[i]; 2242 2243 base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents); 2244 if (i == 0) { 2245 base.atb_rev = df->adf_rev; 2246 base.atb_df_decomp = df->adf_decomp; 2247 } 2248 } 2249 mutex_exit(&azn->azn_mutex); 2250 2251 if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base), 2252 mode & FKIOCTL) != 0) { 2253 return (EFAULT); 2254 } 2255 2256 return (0); 2257 } 2258 2259 /* 2260 * Fill in the peers. The way we do is this is to just fill in all the entries 2261 * and then zero out the ones that aren't valid. 2262 */ 2263 static void 2264 amdzen_topo_ioctl_df_fill_peers(const amdzen_df_ent_t *ent, 2265 amdzen_topo_df_ent_t *topo_ent) 2266 { 2267 topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0); 2268 topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1); 2269 topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1); 2270 topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1); 2271 topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1); 2272 topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2); 2273 topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2); 2274 2275 for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS; 2276 i++) { 2277 topo_ent->atde_peers[i] = 0; 2278 } 2279 } 2280 2281 static void 2282 amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent, 2283 amdzen_topo_df_ent_t *topo_ent) 2284 { 2285 const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm; 2286 amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm; 2287 2288 topo_ccm->atcd_nccds = ccm->acd_nccds; 2289 for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) { 2290 topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i]; 2291 topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i]; 2292 } 2293 } 2294 2295 static int 2296 amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode) 2297 { 2298 uint_t model; 2299 uint32_t max_ents, nwritten; 2300 const amdzen_df_t *df; 2301 amdzen_topo_df_t topo_df; 2302 #ifdef _MULTI_DATAMODEL 2303 amdzen_topo_df32_t topo_df32; 2304 #endif 2305 2306 model = ddi_model_convert_from(mode); 2307 switch (model) { 2308 #ifdef _MULTI_DATAMODEL 2309 case DDI_MODEL_ILP32: 2310 if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32, 2311 sizeof (topo_df32), mode & FKIOCTL) != 0) { 2312 return (EFAULT); 2313 } 2314 bzero(&topo_df, sizeof (topo_df)); 2315 topo_df.atd_dfno = topo_df32.atd_dfno; 2316 topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents; 2317 topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents; 2318 break; 2319 #endif 2320 case DDI_MODEL_NONE: 2321 if (ddi_copyin((void *)(uintptr_t)arg, &topo_df, 2322 sizeof (topo_df), mode & FKIOCTL) != 0) { 2323 return (EFAULT); 2324 } 2325 break; 2326 default: 2327 return (ENOTSUP); 2328 } 2329 2330 mutex_enter(&azn->azn_mutex); 2331 if (topo_df.atd_dfno >= azn->azn_ndfs) { 2332 mutex_exit(&azn->azn_mutex); 2333 return (EINVAL); 2334 } 2335 2336 df = &azn->azn_dfs[topo_df.atd_dfno]; 2337 topo_df.atd_nodeid = df->adf_nodeid; 2338 topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >> 2339 df->adf_decomp.dfd_sock_shift; 2340 topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >> 2341 df->adf_decomp.dfd_die_shift; 2342 topo_df.atd_rev = df->adf_rev; 2343 topo_df.atd_df_act_nents = df->adf_nents; 2344 max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents); 2345 2346 if (topo_df.atd_df_ents == NULL) { 2347 topo_df.atd_df_buf_nvalid = 0; 2348 mutex_exit(&azn->azn_mutex); 2349 goto copyout; 2350 } 2351 2352 nwritten = 0; 2353 for (uint32_t i = 0; i < max_ents; i++) { 2354 amdzen_topo_df_ent_t topo_ent; 2355 const amdzen_df_ent_t *ent = &df->adf_ents[i]; 2356 2357 /* 2358 * We opt not to include disabled elements right now. They 2359 * generally don't have a valid type and there isn't much useful 2360 * information we can get from them. This can be changed if we 2361 * find a use case for them for userland topo. 2362 */ 2363 if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) 2364 continue; 2365 2366 bzero(&topo_ent, sizeof (topo_ent)); 2367 topo_ent.atde_type = ent->adfe_type; 2368 topo_ent.atde_subtype = ent->adfe_subtype; 2369 topo_ent.atde_fabric_id = ent->adfe_fabric_id; 2370 topo_ent.atde_inst_id = ent->adfe_inst_id; 2371 amdzen_topo_ioctl_df_fill_peers(ent, &topo_ent); 2372 2373 if (ent->adfe_type == DF_TYPE_CCM && 2374 ent->adfe_subtype == DF_CCM_SUBTYPE_CPU) { 2375 amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent); 2376 } 2377 2378 if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten], 2379 sizeof (topo_ent), mode & FKIOCTL) != 0) { 2380 mutex_exit(&azn->azn_mutex); 2381 return (EFAULT); 2382 } 2383 nwritten++; 2384 } 2385 mutex_exit(&azn->azn_mutex); 2386 2387 topo_df.atd_df_buf_nvalid = nwritten; 2388 copyout: 2389 switch (model) { 2390 #ifdef _MULTI_DATAMODEL 2391 case DDI_MODEL_ILP32: 2392 topo_df32.atd_nodeid = topo_df.atd_nodeid; 2393 topo_df32.atd_sockid = topo_df.atd_sockid; 2394 topo_df32.atd_dieid = topo_df.atd_dieid; 2395 topo_df32.atd_rev = topo_df.atd_rev; 2396 topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid; 2397 topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents; 2398 2399 if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg, 2400 sizeof (topo_df32), mode & FKIOCTL) != 0) { 2401 return (EFAULT); 2402 } 2403 break; 2404 #endif 2405 case DDI_MODEL_NONE: 2406 if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg, 2407 sizeof (topo_df), mode & FKIOCTL) != 0) { 2408 return (EFAULT); 2409 } 2410 break; 2411 default: 2412 break; 2413 } 2414 2415 2416 return (0); 2417 } 2418 2419 static int 2420 amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode) 2421 { 2422 amdzen_topo_ccd_t ccd, *ccdp; 2423 amdzen_df_t *df; 2424 amdzen_df_ent_t *ent; 2425 amdzen_ccm_data_t *ccm; 2426 uint32_t ccdno; 2427 size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err); 2428 2429 /* 2430 * Only copy in the identifying information so that way we can ensure 2431 * the rest of the structure we return to the user doesn't contain 2432 * anything unexpected in it. 2433 */ 2434 bzero(&ccd, sizeof (ccd)); 2435 if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size, 2436 mode & FKIOCTL) != 0) { 2437 return (EFAULT); 2438 } 2439 2440 mutex_enter(&azn->azn_mutex); 2441 if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) { 2442 ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP; 2443 goto copyout; 2444 } 2445 2446 df = amdzen_df_find(azn, ccd.atccd_dfno); 2447 if (df == NULL) { 2448 ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO; 2449 goto copyout; 2450 } 2451 2452 /* 2453 * We don't have enough information to know how to construct this 2454 * information in Zen 1 at this time, so refuse. 2455 */ 2456 if (df->adf_rev <= DF_REV_2) { 2457 ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED; 2458 goto copyout; 2459 } 2460 2461 ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid); 2462 if (ent == NULL) { 2463 ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID; 2464 goto copyout; 2465 } 2466 2467 if (ent->adfe_type != DF_TYPE_CCM || 2468 ent->adfe_subtype != DF_CCM_SUBTYPE_CPU) { 2469 ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD; 2470 goto copyout; 2471 } 2472 2473 ccm = &ent->adfe_data.aded_ccm; 2474 for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) { 2475 if (ccm->acd_ccd_en[ccdno] != 0 && 2476 ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) { 2477 break; 2478 } 2479 } 2480 2481 if (ccdno == DF_MAX_CCDS_PER_CCM) { 2482 ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD; 2483 goto copyout; 2484 } 2485 2486 if (ccm->acd_ccd_data[ccdno] == NULL) { 2487 /* 2488 * We don't actually have this data. Go fill it out and save it 2489 * for future use. 2490 */ 2491 ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY); 2492 if (ccdp == NULL) { 2493 mutex_exit(&azn->azn_mutex); 2494 return (ENOMEM); 2495 } 2496 2497 ccdp->atccd_dfno = ccd.atccd_dfno; 2498 ccdp->atccd_instid = ccd.atccd_instid; 2499 ccdp->atccd_phys_no = ccd.atccd_phys_no; 2500 amdzen_ccd_fill_topo(azn, df, ent, ccdp); 2501 ccm->acd_ccd_data[ccdno] = ccdp; 2502 } 2503 ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL); 2504 bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd)); 2505 2506 copyout: 2507 mutex_exit(&azn->azn_mutex); 2508 if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd), 2509 mode & FKIOCTL) != 0) { 2510 return (EFAULT); 2511 } 2512 2513 return (0); 2514 } 2515 2516 static int 2517 amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 2518 cred_t *credp, int *rvalp) 2519 { 2520 int ret; 2521 amdzen_t *azn = amdzen_data; 2522 2523 if (getminor(dev) != AMDZEN_MINOR_TOPO) { 2524 return (ENXIO); 2525 } 2526 2527 if ((mode & FREAD) == 0) { 2528 return (EBADF); 2529 } 2530 2531 switch (cmd) { 2532 case AMDZEN_TOPO_IOCTL_BASE: 2533 ret = amdzen_topo_ioctl_base(azn, arg, mode); 2534 break; 2535 case AMDZEN_TOPO_IOCTL_DF: 2536 ret = amdzen_topo_ioctl_df(azn, arg, mode); 2537 break; 2538 case AMDZEN_TOPO_IOCTL_CCD: 2539 ret = amdzen_topo_ioctl_ccd(azn, arg, mode); 2540 break; 2541 default: 2542 ret = ENOTTY; 2543 break; 2544 } 2545 2546 return (ret); 2547 } 2548 2549 static int 2550 amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp) 2551 { 2552 if (otyp != OTYP_CHR) { 2553 return (EINVAL); 2554 } 2555 2556 if (getminor(dev) != AMDZEN_MINOR_TOPO) { 2557 return (ENXIO); 2558 } 2559 2560 return (0); 2561 } 2562 2563 static int 2564 amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 2565 { 2566 amdzen_t *azn = amdzen_data; 2567 2568 if (cmd == DDI_RESUME) { 2569 return (DDI_SUCCESS); 2570 } else if (cmd != DDI_ATTACH) { 2571 return (DDI_FAILURE); 2572 } 2573 2574 mutex_enter(&azn->azn_mutex); 2575 if (azn->azn_dip != NULL) { 2576 dev_err(dip, CE_WARN, "driver is already attached!"); 2577 mutex_exit(&azn->azn_mutex); 2578 return (DDI_FAILURE); 2579 } 2580 2581 if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO, 2582 DDI_PSEUDO, 0) != 0) { 2583 dev_err(dip, CE_WARN, "failed to create topo minor node!"); 2584 mutex_exit(&azn->azn_mutex); 2585 return (DDI_FAILURE); 2586 } 2587 2588 azn->azn_dip = dip; 2589 azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan, 2590 azn, TQ_SLEEP); 2591 azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED; 2592 mutex_exit(&azn->azn_mutex); 2593 2594 return (DDI_SUCCESS); 2595 } 2596 2597 static int 2598 amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2599 { 2600 amdzen_t *azn = amdzen_data; 2601 2602 if (cmd == DDI_SUSPEND) { 2603 return (DDI_SUCCESS); 2604 } else if (cmd != DDI_DETACH) { 2605 return (DDI_FAILURE); 2606 } 2607 2608 mutex_enter(&azn->azn_mutex); 2609 while (azn->azn_taskqid != TASKQID_INVALID) { 2610 cv_wait(&azn->azn_cv, &azn->azn_mutex); 2611 } 2612 2613 /* 2614 * If we've attached any stub drivers, e.g. this platform is important 2615 * for us, then we fail detach. 2616 */ 2617 if (!list_is_empty(&azn->azn_df_stubs) || 2618 !list_is_empty(&azn->azn_nb_stubs)) { 2619 mutex_exit(&azn->azn_mutex); 2620 return (DDI_FAILURE); 2621 } 2622 2623 ddi_remove_minor_node(azn->azn_dip, NULL); 2624 azn->azn_dip = NULL; 2625 mutex_exit(&azn->azn_mutex); 2626 2627 return (DDI_SUCCESS); 2628 } 2629 2630 static void 2631 amdzen_free(void) 2632 { 2633 if (amdzen_data == NULL) { 2634 return; 2635 } 2636 2637 VERIFY(list_is_empty(&amdzen_data->azn_df_stubs)); 2638 list_destroy(&amdzen_data->azn_df_stubs); 2639 VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs)); 2640 list_destroy(&amdzen_data->azn_nb_stubs); 2641 cv_destroy(&amdzen_data->azn_cv); 2642 mutex_destroy(&amdzen_data->azn_mutex); 2643 kmem_free(amdzen_data, sizeof (amdzen_t)); 2644 amdzen_data = NULL; 2645 } 2646 2647 static void 2648 amdzen_alloc(void) 2649 { 2650 amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP); 2651 mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL); 2652 list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t), 2653 offsetof(amdzen_stub_t, azns_link)); 2654 list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t), 2655 offsetof(amdzen_stub_t, azns_link)); 2656 cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL); 2657 } 2658 2659 static struct cb_ops amdzen_topo_cb_ops = { 2660 .cb_open = amdzen_topo_open, 2661 .cb_close = amdzen_topo_close, 2662 .cb_strategy = nodev, 2663 .cb_print = nodev, 2664 .cb_dump = nodev, 2665 .cb_read = nodev, 2666 .cb_write = nodev, 2667 .cb_ioctl = amdzen_topo_ioctl, 2668 .cb_devmap = nodev, 2669 .cb_mmap = nodev, 2670 .cb_segmap = nodev, 2671 .cb_chpoll = nochpoll, 2672 .cb_prop_op = ddi_prop_op, 2673 .cb_flag = D_MP, 2674 .cb_rev = CB_REV, 2675 .cb_aread = nodev, 2676 .cb_awrite = nodev 2677 }; 2678 2679 struct bus_ops amdzen_bus_ops = { 2680 .busops_rev = BUSO_REV, 2681 .bus_map = nullbusmap, 2682 .bus_dma_map = ddi_no_dma_map, 2683 .bus_dma_allochdl = ddi_no_dma_allochdl, 2684 .bus_dma_freehdl = ddi_no_dma_freehdl, 2685 .bus_dma_bindhdl = ddi_no_dma_bindhdl, 2686 .bus_dma_unbindhdl = ddi_no_dma_unbindhdl, 2687 .bus_dma_flush = ddi_no_dma_flush, 2688 .bus_dma_win = ddi_no_dma_win, 2689 .bus_dma_ctl = ddi_no_dma_mctl, 2690 .bus_prop_op = ddi_bus_prop_op, 2691 .bus_ctl = amdzen_bus_ctl 2692 }; 2693 2694 static struct dev_ops amdzen_dev_ops = { 2695 .devo_rev = DEVO_REV, 2696 .devo_refcnt = 0, 2697 .devo_getinfo = nodev, 2698 .devo_identify = nulldev, 2699 .devo_probe = nulldev, 2700 .devo_attach = amdzen_attach, 2701 .devo_detach = amdzen_detach, 2702 .devo_reset = nodev, 2703 .devo_quiesce = ddi_quiesce_not_needed, 2704 .devo_bus_ops = &amdzen_bus_ops, 2705 .devo_cb_ops = &amdzen_topo_cb_ops 2706 }; 2707 2708 static struct modldrv amdzen_modldrv = { 2709 .drv_modops = &mod_driverops, 2710 .drv_linkinfo = "AMD Zen Nexus Driver", 2711 .drv_dev_ops = &amdzen_dev_ops 2712 }; 2713 2714 static struct modlinkage amdzen_modlinkage = { 2715 .ml_rev = MODREV_1, 2716 .ml_linkage = { &amdzen_modldrv, NULL } 2717 }; 2718 2719 int 2720 _init(void) 2721 { 2722 int ret; 2723 2724 if (cpuid_getvendor(CPU) != X86_VENDOR_AMD && 2725 cpuid_getvendor(CPU) != X86_VENDOR_HYGON) { 2726 return (ENOTSUP); 2727 } 2728 2729 if ((ret = mod_install(&amdzen_modlinkage)) == 0) { 2730 amdzen_alloc(); 2731 } 2732 2733 return (ret); 2734 } 2735 2736 int 2737 _info(struct modinfo *modinfop) 2738 { 2739 return (mod_info(&amdzen_modlinkage, modinfop)); 2740 } 2741 2742 int 2743 _fini(void) 2744 { 2745 int ret; 2746 2747 if ((ret = mod_remove(&amdzen_modlinkage)) == 0) { 2748 amdzen_free(); 2749 } 2750 2751 return (ret); 2752 } 2753