1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2025 Oxide Computer Company 14 */ 15 16 /* 17 * AMD Zen Unified Memory Controller Driver 18 * 19 * This file forms the core logic around transforming a physical address that 20 * we're used to using into a specific location on a DIMM. This has support for 21 * a wide range of AMD CPUs and APUs ranging from Zen 1 - Zen 4. 22 * 23 * The goal of this driver is to implement the infrastructure and support 24 * necessary to understand how DRAM requests are being routed in the system and 25 * to be able to map those to particular channels and then DIMMs. This is used 26 * as part of RAS (reliability, availability, and serviceability) to enable 27 * aspects around understanding ECC errors, hardware topology, and more. Like 28 * with any software project, there is more to do here. Please see the Future 29 * Work section at the end of this big theory statement for more information. 30 * 31 * ------------------- 32 * Driver Organization 33 * ------------------- 34 * 35 * This driver is organized into two major pieces: 36 * 37 * 1. Logic to interface with hardware, discover the data fabric, memory 38 * controller configuration, and transform that into a normalized fashion 39 * that can be used across all different Zen family CPUs. This is 40 * implemented generally in this file, and is designed to assume it is in 41 * the kernel (as it requires access to the SMN, DF PCI registers, and the 42 * amdzen nexus driver client services). 43 * 44 * 2. Logic that can take the above normalized memory information and perform 45 * decoding (e.g. physical address to DIMM information). This generally 46 * lives in common/mc/zen_uc/zen_umc_decode.c. This file is in common/, 47 * meaning it is designed to be shared by userland and the kernel. Even 48 * more so, it is designed to operate on a const version of our primary 49 * data structure (zen_umc_t), not allowing it to be modified. This allows 50 * us to more easily unit test the decoding logic and utilize it in other 51 * circumstances such as with the mcdecode utility. 52 * 53 * There is corresponding traditional dev_ops(9S) and cb_ops(9S) logic in the 54 * driver (currently this file) which take care of interfacing with the broader 55 * operating system environment. 56 * 57 * There is only ever one instance of this driver, e.g. it is a singleton in 58 * design pattern parlance. There is a single struct, the zen_umc_t found in the 59 * global (albeit static) variable zen_umc. This structure itself contains a 60 * hierarchical set of structures that describe the system. To make management 61 * of memory simpler, all of the nested structures that we discover from 62 * hardware are allocated in the same structure. The only exception to this rule 63 * is when we cache serialized nvlists for dumping. 64 * 65 * The organization of the structures inside the zen_umc_t, generally mimics the 66 * hardware organization and is structured as follows: 67 * 68 * +-----------+ 69 * | zen_umc_t | 70 * +-----------+ 71 * | 72 * +-------------------------------+ 73 * v v 74 * +--------------+ +--------------+ One instance of the 75 * | zen_umc_df_t | ... | zen_umc_df_t | zen_umc_df_t per 76 * +--------------+ +--------------+ discovered DF. 77 * ||| 78 * ||| 79 * ||| +----------------+ +----------------+ Global DRAM 80 * ||+--->| df_dram_rule_t | ... | df_dram_rule_t | rules for the 81 * || +----------------+ +----------------+ platform. 82 * || 83 * || +--------------------+ +--------------------+ UMC remap 84 * |+--->| zen_umc_cs_remap_t | ... | zen_umc_cs_remap_t | rule arrays. 85 * | +--------------------+ +--------------------+ 86 * | 87 * v 88 * +----------------+ +----------------+ One structure per 89 * | zen_umc_chan_t | ... | zen_umc_chan_t | discovered DDR4/5 90 * +----------------+ +----------------+ memory channel. 91 * |||| 92 * |||| 93 * |||| +----------------+ +----------------+ Channel specific 94 * |||+--->| df_dram_rule_t | ... | df_dram_rule_t | copy of DRAM rules. 95 * ||| +----------------+ +----------------+ Less than global. 96 * ||| 97 * ||| +---------------+ +---------------+ Per-Channel DRAM 98 * ||+---->| chan_offset_t | ... | chan_offset_t | offset that is used 99 * || +---------------+ +---------------+ for normalization. 100 * || 101 * || +-----------------+ Channel-specific 102 * |+----->| umc_chan_hash_t | hashing rules. 103 * | +-----------------+ 104 * | 105 * | +------------+ +------------+ One structure for 106 * +------>| umc_dimm_t | ... | umc_dimm_t | each DIMM in the 107 * +------------+ +------------+ channel. Always two. 108 * | 109 * | +----------+ +----------+ Per chip-select 110 * +---> | umc_cs_t | ... | umc_cs_t | data. Always two. 111 * +----------+ +----------+ 112 * 113 * In the data structures themselves you'll often find several pieces of data 114 * that have the term 'raw' in their name. The point of these is to basically 115 * capture the original value that we read from the register before processing 116 * it. These are generally used either for debugging or to help answer future 117 * curiosity with resorting to the udf and usmn tooling, which hopefully aren't 118 * actually installed on systems. 119 * 120 * With the exception of some of the members in the zen_umc_t that are around 121 * management of state for userland ioctls, everything in the structure is 122 * basically write-once and from that point on should be treated as read-only. 123 * 124 * --------------- 125 * Memory Decoding 126 * --------------- 127 * 128 * To understand the process of memory decoding, it's worth going through and 129 * understanding a bunch of the terminology that is used in this process. As an 130 * additional reference when understanding this, you may want to turn to either 131 * an older generation AMD BIOS and Kernel Developer's Guide or the more current 132 * Processor Programming Reference. In addition, the imc driver, which is the 133 * Intel equivalent, also provides an additional bit of reference. 134 * 135 * SYSTEM ADDRESS 136 * 137 * This is a physical address and is the way that the operating system 138 * normally thinks of memory. System addresses can refer to many different 139 * things. For example, you have traditional DRAM, memory-mapped PCIe 140 * devices, peripherals that the processor exposes such as the xAPIC, data 141 * from the FCH (Fusion Controller Hub), etc. 142 * 143 * TOM, TOM2, and the DRAM HOLE 144 * 145 * Physical memory has a complicated layout on x86 in part because of 146 * support for traditional 16-bit and 32-bit systems. As a result, contrary 147 * to popular belief, DRAM is not at a consistent address range in the 148 * processor. AMD processors have a few different ranges. There is a 32-bit 149 * region that starts at effectively physical address zero and goes to the 150 * TOM MSR (top of memory -- Core::X86::Msr::TOP_MEM). This indicates a 151 * limit below 4 GiB, generally around 2 GiB. 152 * 153 * From there, the next region of DRAM starts at 4 GiB and goes to TOM2 154 * (top of memory 2 -- Core::X86::Msr::TOM2). The region between TOM and 155 * 4 GiB is called the DRAM hole. Physical addresses in this region are 156 * used for memory mapped I/O. This breaks up contiguous physical 157 * addresses being used for DRAM, creating a "hole". 158 * 159 * DATA FABRIC 160 * 161 * The data fabric (DF) is the primary interface that different parts of 162 * the system use to communicate with one another. This includes the I/O 163 * engines (where PCIe traffic goes), CPU caches and their cores, memory 164 * channels, cross-socket communication, and a whole lot more. The first 165 * part of decoding addresses and figuring out which DRAM channel an 166 * address should be directed to all come from the data fabric. 167 * 168 * The data fabric is comprised of instances. So there is one instance for 169 * each group of cores, each memory channel, etc. Each instance has its own 170 * independent set of register information. As the data fabric is a series 171 * of devices exposed over PCI, if you do a normal PCI configuration space 172 * read or write that'll end up broadcasting the I/O. Instead, to access a 173 * particular instance's register information there is an indirect access 174 * mechanism. The primary way that this driver accesses data fabric 175 * registers is via these indirect reads. 176 * 177 * There is one instance of the Data Fabric per socket starting with Zen 2. 178 * In Zen 1, there was one instance of the data fabric per CCD -- core 179 * complex die (see cpuid.c's big theory statement for more information). 180 * 181 * DF INSTANCE ID 182 * 183 * A DF instance ID is an identifier for a single entity or component in a 184 * data fabric. The set of instance IDs is unique only with a single data 185 * fabric. So for example, each memory channel, I/O endpoint (e.g. PCIe 186 * logic), group of cores, has its own instance ID. Anything within the 187 * same data fabric (e.g. the same die) can be reached via its instance ID. 188 * The instance ID is used to indicate which instance to contact when 189 * performing indirect accesses. 190 * 191 * Not everything that has an instance ID will be globally routable (e.g. 192 * between multiple sockets). For things that are, such as the memory 193 * channels and coherent core initiators, there is a second ID called a 194 * fabric ID. 195 * 196 * DF FABRIC ID 197 * 198 * A DF fabric ID is an identifier that combines information to indicate 199 * both which instance of the data fabric a component is on and a component 200 * itself. So with this number you can distinguish between a memory channel 201 * on one of two sockets. A Fabric ID is made up of two parts. The upper 202 * part indicates which DF we are talking to and is referred to as a Node 203 * ID. The Node ID is itself broken into two parts: one that identifies a 204 * socket, and one that identifies a die. The lower part of a fabric ID is 205 * called a component ID and indicates which component in a particular data 206 * fabric that we are talking to. While only a subset of the total 207 * components in the data fabric are routable, for everything that is, its 208 * component ID matches its instance ID. 209 * 210 * Put differently, the component portion of a fabric ID and a component's 211 * instance ID are always the same for routable entities. For things which 212 * cannot be routed, they only have an instance ID and no fabric ID. 213 * Because this code is always interacting with data fabric components that 214 * are routable, sometimes instance ID and the component ID portion of the 215 * data fabric ID may be used interchangeably. 216 * 217 * Finally, it's worth calling out that the number of bits that are used to 218 * indicate the socket, die, and component in a fabric ID changes from 219 * hardware generation to hardware generation. 220 * 221 * Inside the code here, the socket and die decomposition information is 222 * always relative to the node ID. AMD phrases the decomposition 223 * information in terms of a series of masks and shifts. This is 224 * information that can be retrieved from the data fabric itself, allowing 225 * us to avoid hardcoding too much information other than which registers 226 * actually have which fields. With both masks and shifts, it's important 227 * to establish which comes first. We follow AMD's convention and always 228 * apply masks before shifts. With that, let's look at an example of a 229 * made up bit set: 230 * 231 * Assumptions (to make this example simple): 232 * o The fabric ID is 16 bits 233 * o The component ID is 8 bits 234 * o The node ID is 8 bits 235 * o The socket and die ID are both 4 bits 236 * 237 * Here, let's say that we have the ID 0x2106. This decomposes into a 238 * socket 0x2, die 0x1, and component 0x6. Here is how that works in more 239 * detail: 240 * 241 * 0x21 0x06 242 * |------| |------| 243 * Node ID Component ID 244 * Mask: 0xff00 0x00ff 245 * Shift: 8 0 246 * 247 * Next we would decompose the Node ID as: 248 * 0x2 0x1 249 * |------| |------| 250 * Sock ID Die ID 251 * Mask: 0xf0 0x0f 252 * Shift: 4 0 253 * 254 * Composing a fabric ID from its parts would work in a similar way by 255 * applying masks and shifts. 256 * 257 * NORMAL ADDRESS 258 * 259 * A normal address is one of the primary address types that AMD uses in 260 * memory decoding. It takes into account the DRAM hole, interleave 261 * settings, and is basically the address that is dispatched to the broader 262 * data fabric towards a particular DRAM channel. 263 * 264 * Often, phrases like 'normalizing the address' or normalization refer to 265 * the process of transforming a system address into the channel address. 266 * 267 * INTERLEAVING 268 * 269 * The idea of interleaving is to take a contiguous range and weave it 270 * between multiple different actual entities. Generally certain bits in 271 * the range are used to select one of several smaller regions. For 272 * example, if you have 8 regions each that are 4 GiB in size, that creates 273 * a single 32 GiB region. You can use three bits in that 32 GiB space to 274 * select one of the 8 regions. For a more visual example, see the 275 * definition of this in uts/intel/io/imc/imc.c. 276 * 277 * CHANNEL 278 * 279 * A channel is used to refer to a single memory channel. This is sometimes 280 * called a DRAM channel as well. A channel operates in a specific mode 281 * based on the JEDEC DRAM standards (e.g. DDR4, LPDDR5, etc.). A 282 * (LP)DDR4/5 channel may support up to two DIMMs inside the channel. The 283 * number of slots is platform dependent and from there the number of DIMMs 284 * installed can vary. Generally speaking, a DRAM channel defines a set 285 * number of signals, most of which go to all DIMMs in the channel, what 286 * varies is which "chip-select" is activated which causes a given DIMM to 287 * pay attention or not. 288 * 289 * DIMM 290 * 291 * A DIMM refers to a physical hardware component that is installed into a 292 * computer to provide access to dynamic memory. Originally this stood for 293 * dual-inline memory module, though the DIMM itself has evolved beyond 294 * that. A DIMM is organized into various pages, which are addressed by 295 * a combination of rows, columns, banks, bank groups, and ranks. How this 296 * fits together changes from generation to generation and is standardized 297 * in something like DDR4, LPDDR4, DDR5, LPDDR5, etc. These standards 298 * define the general individual modules that are assembled into a DIMM. 299 * There are slightly different standards for combined memory modules 300 * (which is what we use the term DIMM for). Examples of those include 301 * things like registered DIMMs (RDIMMs). 302 * 303 * A DDR4 DIMM contains a single channel that is 64-bits wide with 8 check 304 * bits. A DDR5 DIMM has a notable change in this scheme from earlier DDR 305 * standards. It breaks a single DDR5 DIMM into two sub-channels. Each 306 * sub-channel is independently addressed and contains 32-bits of data and 307 * 8-bits of check data. 308 * 309 * ROW AND COLUMN 310 * 311 * The most basic building block of a DIMM is a die. A DIMM consists of 312 * multiple dies that are organized together (we'll discuss the 313 * organization next). A given die is organized into a series of rows and 314 * columns. First, one selects a row. At which point one is able to select 315 * a specific column. It is more expensive to change rows than columns, 316 * leading a given row to contain approximately 1 KiB of data spread across 317 * its columns. The exact size depends on the device. Each row/column is a 318 * series of capacitors and transistors. The transistor is used to select 319 * data from the capacitor and the capacitor actually contains the logical 320 * 0/1 value. 321 * 322 * BANKS AND BANK GROUPS 323 * 324 * An individual DRAM die is organized in something called a bank. A DIMM 325 * has a number of banks that sit in series. These are then grouped into 326 * larger bank groups. Generally speaking, each bank group has the same 327 * number of banks. Let's take a look at an example of a system with 4 328 * bank groups, each with 4 banks. 329 * 330 * +-----------------------+ +-----------------------+ 331 * | Bank Group 0 | | Bank Group 1 | 332 * | +--------+ +--------+ | | +--------+ +--------+ | 333 * | | Bank 0 | | Bank 1 | | | | Bank 0 | | Bank 1 | | 334 * | +--------+ +--------+ | | +--------+ +--------+ | 335 * | +--------+ +--------+ | | +--------+ +--------+ | 336 * | | Bank 2 | | Bank 3 | | | | Bank 2 | | Bank 3 | | 337 * | +--------+ +--------+ | | +--------+ +--------+ | 338 * +-----------------------+ +-----------------------+ 339 * 340 * +-----------------------+ +-----------------------+ 341 * | Bank Group 2 | | Bank Group 3 | 342 * | +--------+ +--------+ | | +--------+ +--------+ | 343 * | | Bank 0 | | Bank 1 | | | | Bank 0 | | Bank 1 | | 344 * | +--------+ +--------+ | | +--------+ +--------+ | 345 * | +--------+ +--------+ | | +--------+ +--------+ | 346 * | | Bank 2 | | Bank 3 | | | | Bank 2 | | Bank 3 | | 347 * | +--------+ +--------+ | | +--------+ +--------+ | 348 * +-----------------------+ +-----------------------+ 349 * 350 * On a DIMM, only a single bank and bank group can be active at a time for 351 * reading or writing an 8 byte chunk of data. However, these are still 352 * pretty important and useful because of the time involved to switch 353 * between them. It is much cheaper to switch between bank groups than 354 * between banks and that time can be cheaper than activating a new row. 355 * This allows memory controllers to pipeline this substantially. 356 * 357 * RANK AND CHIP-SELECT 358 * 359 * The next level of organization is a rank. A rank is effectively an 360 * independent copy of all the bank and bank groups on a DIMM. That is, 361 * there are additional copies of the DIMM's organization, but not the data 362 * itself. Originally a 363 * single or dual rank DIMM was built such that one copy of everything was 364 * on each physical side of the DIMM. As the number of ranks has increased 365 * this has changed as well. Generally speaking, the contents of the rank 366 * are equivalent. That is, you have the same number of bank groups, banks, 367 * and each bank has the same number of rows and columns. 368 * 369 * Ranks are selected by what's called a chip-select, often abbreviated as 370 * CS_L in the various DRAM standards. AMD also often abbreviates this as a 371 * CS (which is not to be confused with the DF class of device called a 372 * CS). These signals are used to select a rank to activate on a DIMM. 373 * There are some number of these for each DIMM which is how the memory 374 * controller chooses which of the DIMMs it's actually going to activate in 375 * the system. 376 * 377 * One interesting gotcha here is how AMD organizes things. Each DIMM 378 * logically is broken into two chip-selects in hardware. Between DIMMs 379 * with more than 2 ranks and 3D stacked RDIMMs, there are ways to 380 * potentially activate more bits. Ultimately these are mapped to a series 381 * of rank multiplication logic internally. These ultimately then control 382 * some of these extra pins, though the exact method isn't 100% clear at 383 * this time. 384 * 385 * ----------------------- 386 * Rough Hardware Process 387 * ----------------------- 388 * 389 * To better understand how everything is implemented and structured, it's worth 390 * briefly describing what happens when hardware wants to read a given physical 391 * address. This is roughly summarized in the following chart. In the left hand 392 * side is the type of address, which is transformed and generally shrinks along 393 * the way. Next to it is the actor that is taking action and the type of 394 * address that it starts with. 395 * 396 * +---------+ +------+ 397 * | Virtual | | CPU | 398 * | Address | | Core | 399 * +---------+ +------+ 400 * | | The CPU core receives a memory request and then 401 * | * . . . . determines whether this request is DRAM or MMIO 402 * | | (memory-mapped I/O) and then sends it to the data 403 * v v fabric. 404 * +----------+ +--------+ 405 * | Physical | | Data | 406 * | Address | | Fabric | 407 * +----------+ +--------+ 408 * | | The data fabric instance in the CCX/D uses the 409 * | * . . . . programmed DRAM rules to determine what DRAM 410 * | | channel to direct a request to and what the 411 * | | channel-relative address is. It then sends the 412 * | | request through the fabric. Note, the number of 413 * | | DRAM rules varies based on the processor SoC. 414 * | | Server parts like Milan have many more rules than 415 * | | an APU like Cezanne. The DRAM rules tell us both 416 * v v how to find and normalize the physical address. 417 * +---------+ +---------+ 418 * | Channel | | DRAM | 419 * | Address | | Channel | 420 * +---------+ +---------+ 421 * | | The UMC (unified memory controller) receives the 422 * | * . . . . DRAM request and determines which DIMM to send 423 * | | the request to along with the rank, banks, row, 424 * | | column, etc. It initiates a DRAM transaction and 425 * | | then sends the results back through the data 426 * v v fabric to the CPU core. 427 * +---------+ +--------+ 428 * | DIMM | | Target | 429 * | Address | | DIMM | 430 * +---------+ +--------+ 431 * 432 * The above is all generally done in hardware. There are multiple steps 433 * internal to this that we end up mimicking in software. This includes things 434 * like, applying hashing logic, address transformations, and related. 435 * Thankfully the hardware is fairly generic and programmed with enough 436 * information that we can pull out to figure this out. The rest of this theory 437 * statement covers the major parts of this: interleaving, the act of 438 * determining which memory channel to actually go to, and normalization, the 439 * act of removing some portion of the physical address bits to determine the 440 * address relative to a channel. 441 * 442 * ------------------------ 443 * Data Fabric Interleaving 444 * ------------------------ 445 * 446 * One of the major parts of address decoding is to understand how the 447 * interleaving features work in the data fabric. This is used to allow an 448 * address range to be spread out between multiple memory channels and then, 449 * later on, when normalizing the address. As mentioned above, a system address 450 * matches a rule which has information on interleaving. Interleaving comes in 451 * many different flavors. It can be used to just switch between channels, 452 * sockets, and dies. It can also end up involving some straightforward and some 453 * fairly complex hashing operations. 454 * 455 * Each DRAM rule has instructions on how to perform this interleaving. The way 456 * this works is that the rule first says to start at a given address bit, 457 * generally ranging from bit 8-12. These influence the granularity of the 458 * interleaving going on. From there, the rules determine how many bits to use 459 * from the address to determine the die, socket, and channel. In the simplest 460 * form, these perform a log2 of the actual number of things you're interleaving 461 * across (we'll come back to non-powers of two). So let's work a few common 462 * examples: 463 * 464 * o 8-channel interleave, 1-die interleave, 2-socket interleave 465 * Start at bit 9 466 * 467 * In this case we have 3 bits that determine the channel to use, 0 bits 468 * for the die, 1 bit for the socket. Here we would then use the following 469 * bits to determine what the channel, die, and socket IDs are: 470 * 471 * [12] - Socket ID 472 * [11:9] - Channel ID 473 * 474 * You'll note that there was no die-interleave, which means the die ID is 475 * always zero. This is the general thing you expect to see in Zen 2 and 3 476 * based systems as they only have one die or a Zen 1 APU. 477 * 478 * o 2-channel interleave, 4-die interleave, 2-socket interleave 479 * Start at bit 10 480 * 481 * In this case we have 1 bit for the channel and socket interleave. We 482 * have 2 bits for the die. This is something you might see on a Zen 1 483 * system. This results in the following bits: 484 * 485 * [13] - Socket ID 486 * [12:11] - Die ID 487 * [10] - Channel ID 488 * 489 * 490 * COD, NPS, and MI3H HASHING 491 * 492 * However, this isn't the only primary extraction rule of the above values. The 493 * other primary method is using a hash. While the exact hash methods vary 494 * between Zen 2/3 and Zen 4 based systems, they follow a general scheme. In the 495 * system there are three interleaving configurations that are either global or 496 * enabled on a per-rule basis. These indicate whether one should perform the 497 * XOR computation using addresses at: 498 * 499 * o 64 KiB (starting at bit 16) 500 * o 2 MiB (starting at bit 21) 501 * o 1 GiB (starting at bit 30) 502 * 503 * In this world, you take the starting address bit defined by the rule and XOR 504 * it with each enabled interleave address. If you have more than one bit to 505 * select (e.g. because you are hashing across more than 2 channels), then you 506 * continue taking subsequent bits from each enabled region. So the second bit 507 * would use 17, 21, and 31 if all three ranges were enabled while the third bit 508 * would use 18, 22, and 32. While these are straightforward, there is a catch. 509 * 510 * While the DRAM rule contains what the starting address bit, you don't 511 * actually use subsequent bits in the same way. Instead subsequent bits are 512 * deterministic and use bits 12 and 13 from the address. This is not the same 513 * consecutive thing that one might expect. Let's look at a Rome/Milan based 514 * example: 515 * 516 * o 8-channel "COD" hashing, starting at address 9. All three ranges enabled. 517 * 1-die and 1-socket interleaving. 518 * 519 * In this model we are using 3 bits for the channel, 0 bits for the socket 520 * and die. 521 * 522 * Channel ID[0] = addr[9] ^ addr[16] ^ addr[21] ^ addr[30] 523 * Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 524 * Channel ID[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32] 525 * 526 * So through this scheme we'd have a socket/die of 0, and then the channel 527 * ID is computed based on that. The number of bits that we use here 528 * depends on how many channels the hash is going across. 529 * 530 * The Genoa and related variants, termed "NPS", has a few wrinkles. First, 531 * rather than 3 bits being used for the channel, up to 4 bits are. Second, 532 * while the Rome/Milan "COD" hash above does not support socket or die 533 * interleaving, the "NPS" hash actually supports socket interleaving. However, 534 * unlike the straightforward non-hashing scheme, the first bit is used to 535 * determine the socket when enabled as opposed to the last one. In addition, if 536 * we're not performing socket interleaving, then we end up throwing address bit 537 * 14 into the mix here. Let's look at examples: 538 * 539 * o 4-channel "NPS" hashing, starting at address 8. All three ranges enabled. 540 * 1-die and 1-socket interleaving. 541 * 542 * In this model we are using 2 bits for the channel, 0 bits for the socket 543 * and die. Because socket interleaving is not being used, bit 14 ends up 544 * being added into the first bit of the channel selection. Presumably this 545 * is to improve the address distribution in some form. 546 * 547 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14] 548 * Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 549 * 550 * o 8-channel "NPS" hashing, starting at address 9. All three ranges enabled. 551 * 1-die and 2-socket interleaving. 552 * 553 * In this model we are using 3 bits for the channel and 1 for the socket. 554 * The die is always set to 0. Unlike the above, address bit 14 is not used 555 * because it ends up being required for the 4th address bit. 556 * 557 * Socket ID[0] = addr[9] ^ addr[16] ^ addr[21] ^ addr[30] 558 * Channel ID[0] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 559 * Channel ID[1] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32] 560 * Channel ID[2] = addr[14] ^ addr[19] ^ addr[24] ^ addr[33] 561 * 562 * DF 4D2 NPS 1K/2K 563 * 564 * In our DF 4D2 variant, the interleave controls were changed and the way that 565 * hashes work is different. There are two main families here, a variant on the 566 * prior NPS hashing that is either NPS 1K or NPS 2K and the MI300 variant that 567 * we call MI3H. First, there are two additional address ranges that have been 568 * added: 569 * 570 * o 4 KiB (starting at bit 12) 571 * o 1 TiB (starting at bit 40) 572 * 573 * Of these, our understanding is that the 4 KiB range is only used for MI3H 574 * based hashing. When it is used, only bits 12-14 will be used, but that's 575 * because the hash algorithm for the MI3H series is, well, unique. The 1T 576 * otherwise works somewhat as normal. Currently we don't support the MI3H 577 * decoding, but know that it exists in the code so we can provide a better 578 * error code. 579 * 580 * The NPS 1K/2K hashes use a similar style. These are designed to support up to 581 * 32 channel hashes, which causes up to 5 bits to be used. The 5 bit form is 582 * only supported in the 1K variant. It starts at bit 8 (the nominally required 583 * starting interleave address) and then uses bit 9, before jumping up to bits 584 * 12-14 as required. The XOR addresses count up in a similar fashion. So the 64 585 * KiB interleave would use up to bits 16-20 in this scheme (corresponding to 586 * result bits 0-4). 587 * 588 * When the 2K form is used, only 4 bits are supported and the entire bit 9 row 589 * is ignored. This looks very similar to the NPS form; however, the gap is also 590 * there in the XOR bits and there is no longer the question of using bit 14 or 591 * not with socket interleaving. It is only ever used if we need the 5th channel 592 * bit. To see the difference let's look at two examples where the only 593 * difference between the two is whether we are using 1 or 2K hashing. 594 * 595 * o 8-channel "NPS" 1K hashing, starting at address 8. 64 KiB, 2 MiB, 1 GiB, 596 * and 1 TiB are enabled. 1-die and 1-socket. 597 * 598 * In this model, there are always 3 bits for the channel. This means that 599 * we only will use bits 8, 9, and 12 from the address to start with. 600 * 601 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] 602 * Channel ID[1] = addr[9] ^ addr[17] ^ addr[22] ^ addr[31] 603 * Channel ID[2] = addr[12] ^ addr[18] ^ addr[23] ^ addr[32] 604 * 605 * o 8-channel "NPS" 2K hashing, starting at address 8. 64 KiB, 2 MiB, 1 GiB, 606 * and 1 TiB are enabled. 1-die and 1-socket. 607 * 608 * In this model, we also use 3 bits for the channel. However, we no longer 609 * use bit 9, which is the 1K mode only. Similarly, you'll see that the bits 610 * from the hash that would have been used for determining interleaving with 611 * bit 9 are skipped entirely. This is why the 1K/2K variants are 612 * incompatible with the original NPS hashing. 613 * 614 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] 615 * Channel ID[1] = addr[12] ^ addr[18] ^ addr[23] ^ addr[32] 616 * Channel ID[2] = addr[13] ^ addr[19] ^ addr[24] ^ addr[33] 617 * 618 * ZEN 3 6-CHANNEL 619 * 620 * These were the simple cases. Things get more complex when we move to 621 * non-power of 2 based hashes between channels. There are two different sets of 622 * these schemes. The first of these is 6-channel hashing that was added in Zen 623 * 3. The second of these is a more complex and general form that was added in 624 * Zen 4. Let's start with the Zen 3 case. The Zen 3 6-channel hash requires 625 * starting at address bits 11 or 12 and varies its logic somewhat from there. 626 * In the 6-channel world, the socket and die interleaving must be disabled. 627 * Let's walk through an example: 628 * 629 * o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled. 630 * 1-die and 1-socket interleaving. 631 * 632 * Regardless of the starting address, we will always use three bits to 633 * determine a channel address. However, it's worth calling out that the 634 * 64K range is not considered for this at all. Another oddity is that when 635 * calculating the hash bits the order of the extracted 2M and 1G addresses 636 * are different. 637 * 638 * This flow starts by calculating the three hash bits. This is defined 639 * below. In the following, all bits marked with an '@' are ones that will 640 * change when starting at address bit 12. In those cases the value will 641 * increase by 1. Here's how we calculate the hash bits: 642 * 643 * hash[0] = addr[11@] ^ addr[14@] ^ addr[23] ^ addr[32] 644 * hash[1] = addr[12@] ^ addr[21] ^ addr[30] 645 * hash[2] = addr[13@] ^ addr[22] ^ addr[31] 646 * 647 * With this calculated, we always assign the first bit of the channel 648 * based on the hash. The other bits are more complicated as we have to 649 * deal with that gnarly power of two problem. We determine whether or not 650 * to use the hash bits directly in the channel based on their value. If 651 * they are not equal to 3, then we use it, otherwise if they are, then we 652 * need to go back to the physical address and we take its modulus. 653 * Basically: 654 * 655 * Channel Id[0] = hash[0] 656 * if (hash[2:1] == 3) 657 * Channel ID[2:1] = (addr >> [11@+3]) % 3 658 * else 659 * Channel ID[2:1] = hash[2:1] 660 * 661 * 662 * ZEN 4 NON-POWER OF 2 663 * 664 * I hope you like modulus calculations, because things get even more complex 665 * here now in Zen 4 which has many more modulus variations. These function in a 666 * similar way to the older 6-channel hash in Milan. They require one to start 667 * at address bit 8, they require that there is no die interleaving, and they 668 * support socket interleaving. The different channel arrangements end up in one 669 * of two sets of modulus values: a mod % 3 and a mod % 5 based on the number 670 * of channels used. Unlike the Milan form, all three address ranges (64 KiB, 2 671 * MiB, 1 GiB) are allowed to be used. 672 * 673 * o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 674 * 1-die and 2-socket interleaving. 675 * 676 * We start by calculating the following set of hash bits regardless of 677 * the number of channels that exist. The set of hash bits that is actually 678 * used in various computations ends up varying based upon the number of 679 * channels used. In 3-5 configs, only hash[0] is used. 6-10, both hash[0] 680 * and hash[2] (yes, not hash[1]). The 12 channel config uses all three. 681 * 682 * hash[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14] 683 * hash[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 684 * hash[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32] 685 * 686 * Unlike other schemes where bits directly map here, they instead are used 687 * to seed the overall value. Depending on whether hash[0] is a 0 or 1, the 688 * system goes through two different calculations entirely. Though all of 689 * them end up involving the remainder of the system address going through 690 * the modulus. In the following, a '3@' indicates the modulus value would 691 * be swapped to 5 in a different scenario. 692 * 693 * Channel ID = addr[63:14] % 3@ 694 * if (hash[0] == 1) 695 * Channel ID = (Channel ID + 1) % 3@ 696 * 697 * Once this base has for the channel ID has been calculated, additional 698 * portions are added in. As this is the 6-channel form, we say: 699 * 700 * Channel ID = Channel ID + (hash[2] * 3@) 701 * 702 * Finally the socket is deterministic and always comes from hash[0]. 703 * Basically: 704 * 705 * Socket ID = hash[0] 706 * 707 * o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 708 * 1-die and 1-socket interleaving. 709 * 710 * This is a variant of the above. The hash is calculated the same way. 711 * The base Channel ID is the same and if socket interleaving were enabled 712 * it would also be hash[0]. What instead differs is how we use hash[1] 713 * and hash[2]. The following logic is used instead of the final 714 * calculation above. 715 * 716 * Channel ID = Channel ID + (hash[2:1] * 3@) 717 * 718 * NPS 1K/2K NON-POWER of 2 719 * 720 * Just as the normal hashing changed with the introduction of the 1K/2K 721 * variants, so does the non-power of 2 hashing. This NP2 scheme is rather 722 * different than the base Zen 4 one. This uses the 64 KiB, 2 MiB, 1 GiB, and 1 723 * TiB ranges for hashing. Logically there are both 3 and 5 channel hashes again 724 * like Zen 4 and when socket interleaving is enabled, address bit 8 is always 725 * going to the socket. 726 * 727 * The 1K and 2K modes change which addresses are used and considered just like 728 * the non-NP2 case. The same interleave bit skipping for 2K still applies, 729 * meaning bit 9 will not be used for hashing and will instead be part of the 730 * normal address calculations that we have. 731 * 732 * Like in the Zen 4 case, we are going to be constructing our normalized 733 * address from three regions of bits. The low region which is everything that 734 * is used before the hashing, the bits skipped in the middle, and then the 735 * upper bits that have been untouched. These are not rearranged, rather its 736 * best to think of it as bits are removed from this, causing shifts and 737 * shrinks. 738 * 739 * Another important difference to call out before we get to examples is that 740 * each variant here uses a different address range as the upper portion to use. 741 * Unfortunately, where as for Zen 4 we had some regular rules, each of these 742 * cases seems rather different. However, there is some general logic which is 743 * that in each case we calculate some modulus value from different addresses 744 * which we use to determine the channel, sometimes mixed with other hash bits. 745 * Then we calculate a new normalized address by taking the divisor as the high 746 * portion. Let's look at some examples here: 747 * 748 * o 12 Channel 1K Zen 5, starting at address 8. 64K, 2M, 1G, and 1T ranges 749 * enabled. 1-die and 1-socket interleaving. 750 * 751 * This 12 channel mode is a modulus 3 case. This particular case needs two 752 * hash bits. Because it is a 1K mode it uses bits 8 and 9. If we were in a 753 * 2K mode, we'd use bits 8 and 12. Bit 8 always also hashes in bit 14 just 754 * like the Zen 4 case. 755 * 756 * hash[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[40] ^ 757 * addr[14] 758 * hash[1] = addr[9] ^ addr[17] ^ addr[22] ^ addr[31] ^ addr[41] 759 * 760 * Now that we have that, it's time to calculate the address we need to 761 * take the modulus of to stick into the channel. For this particular case, 762 * we construct an address as PA >> 12 | 0b00. In other words we take bits 763 * [48+, 12] and move them to bit 2. Once we have that, we can go ahead and 764 * construct the value modulus 3. Symbolically: 765 * 766 * modAddr = (addr[64:12] & ~3) | 0b00 (or (addr >> 12) << 2) 767 * modVal = modAddr % 3 768 * 769 * Channel ID[0] = hash[0] 770 * Channel ID[1] = hash[1] 771 * Channel ID[2] = modval[0] 772 * Channel ID[3] = modval[1] 773 * 774 * In the 2K version we use (addr[64:13] & ~7) | 0b000 and hash[1] is based 775 * on addr[12] rather than addr[9]. 776 * 777 * o 5 Channel 2K Zen 5, starting at address 8. 64K, 2M, 1G, and 1T ranges 778 * enabled. 1-die and 1-socket interleaving. 779 * 780 * With the 5-channel based mode we now will working modulus five rather 781 * than three. In this case, we have similar logic, except the way the 782 * address is constructed to take the mod of is different. We can think of 783 * this as: 784 * 785 * modAddr = addr[64:12] | addr[8] | 0b0 786 * modVal = modAddr % 5 787 * 788 * Channel ID[0] = modVal[0] 789 * Channel ID[1] = modVal[1] 790 * Channel ID[2] = modVal[2] 791 * 792 * Basically this ends up using a rather similar logical construction; 793 * however, the values that it plugs in are different. Note, that there was 794 * no use of the hash in this case. 795 * 796 * POST BIT EXTRACTION 797 * 798 * Now, all of this was done to concoct up a series of indexes used. However, 799 * you'll note that a given DRAM rule actually already has a fabric target. So 800 * what do we do here? We add them together. 801 * 802 * The data fabric has registers that describe which bits in a fabric ID 803 * correspond to a socket, die, and channel. Taking the channel, die, and socket 804 * IDs above, one can construct a fabric ID. From there, we add the two data 805 * fabric IDs together and can then get to the fabric ID of the actual logical 806 * target. This is why all of the socket and die interleaving examples with no 807 * interleaving are OK to result in a zero. The idea here is that the base 808 * fabric ID in the DRAM rule will take care of indicating those other things as 809 * required. 810 * 811 * You'll note the use of the term "logical target" up above. That's because 812 * some platforms have the ability to remap logical targets to physical targets 813 * (identified by the use of the ZEN_UMC_FAM_F_TARG_REMAP flag in the family 814 * data or the DF::DfCapability register once we're at the DF 4D2 variant). The 815 * way that remapping works changes based on the hardware generation. This was 816 * first added in Milan (Zen 3) CPUs. In that model, you would use the socket 817 * and component information from the target ID to identify which remapping 818 * rules to use. On Genoa (Zen 4) CPUs, you would instead use information in the 819 * rule itself to determine which of the remap rule sets to use and then uses 820 * the component ID to select which rewrite rule to use. 821 * 822 * Finally, there's one small wrinkle with this whole scheme that we haven't 823 * discussed: what actually is the address that we plug into this calculation. 824 * While you might think it actually is just the system address itself, that 825 * isn't actually always the case. Sometimes rather than using the address 826 * itself, it gets normalized based on the DRAM rule, which involves subtracting 827 * out the base address and potentially subtracting out the size of the DRAM 828 * hole (if the address is above the hole and hoisting is active for that 829 * range). When this is performed appears to tie to the DF generation. The 830 * following table relates the DF generation to our behavior: 831 * 832 * o DF 2 (Zen 1): Use the raw address 833 * o DF 3 (Zen 2-3): Use the raw address if it's not a power of 2 834 * o DF 3.5: Use the adjusted address 835 * o DF 4 (Zen 4): Use the adjusted address 836 * o DF 4D2 (Zen 4/5): Use the raw address 837 * 838 * -------------------------------------------- 839 * Data Fabric Interleave Address Normalization 840 * -------------------------------------------- 841 * 842 * While you may have thought that we were actually done with the normalization 843 * fun in the last section, there's still a bit more here that we need to 844 * consider. In particular, there's a secondary transformation beyond 845 * interleaving that occurs as part of constructing the channel normalized 846 * address. Effectively, we need to account for all the bits that were used in 847 * the interleaving and generally speaking remove them from our normalized 848 * address. 849 * 850 * While this may sound weird on paper, the way to think about it is that 851 * interleaving at some granularity means that each device is grabbing the same 852 * set of addresses, the interleave just is used to direct it to its own 853 * location. When working with a channel normalized address, we're effectively 854 * creating a new region of addresses that have meaning within the DIMMs 855 * themselves. The channel doesn't care about what got it there, mainly just 856 * what it is now. So with that in mind, we need to discuss how we remove all 857 * the interleaving information in our different modes. 858 * 859 * Just to make sure it's clear, we are _removing_ all bits that were used for 860 * interleaving. This causes all bits above the removed ones to be shifted 861 * right. 862 * 863 * First, we have the case of standard power of 2 interleaving that applies to 864 * the 1, 2, 4, 8, 16, and 32 channel configurations. Here, we need to account 865 * for the total number of bits that are used for the channel, die, and socket 866 * interleaving and we simply remove all those bits starting from the starting 867 * address. 868 * 869 * o 8-channel interleave, 1-die interleave, 2-socket interleave 870 * Start at bit 9 871 * 872 * If we look at this example, we are using 3 bits for the channel, 1 for 873 * the socket, for a total of 4 bits. Because this is starting at bit 9, 874 * this means that interleaving covers the bit range [12:9]. In this case 875 * our new address would be (orig[63:13] >> 4) | orig[8:0]. 876 * 877 * 878 * COD and NPS HASHING 879 * 880 * That was the simple case, next we have the COD/NPS hashing case that we need 881 * to consider. If we look at these, the way that they work is that they split 882 * which bits they use for determining the channel address and then hash others 883 * in. Here, we need to extract the starting address bit, then continue at bit 884 * 12 based on the number of bits in use and whether or not socket interleaving 885 * is at play for the NPS variant. Let's look at an example here: 886 * 887 * o 8-channel "COD" hashing, starting at address 9. All three ranges enabled. 888 * 1-die and 1-socket interleaving. 889 * 890 * Here we have three total bits being used. Because we start at bit 9, this 891 * means we need to drop bits [13:12], [9]. So our new address would be: 892 * 893 * orig[63:14] >> 3 | orig[11:10] >> 1 | orig[8:0] 894 * | | +-> stays the same 895 * | +-> relocated to bit 9 -- shifted by 1 because we 896 * | removed bit 9. 897 * +--> Relocated to bit 11 -- shifted by 3 because we removed bits, 9, 12, 898 * and 13. 899 * 900 * o 8-channel "NPS" hashing, starting at address 8. All three ranges enabled. 901 * 1-die and 2-socket interleaving. 902 * 903 * Here we need to remove bits [14:12], [8]. We're removing an extra bit 904 * because we have 2-socket interleaving. This results in a new address of: 905 * 906 * orig[63:15] >> 4 | orig[11:9] >> 1 | orig[7:0] 907 * | | +-> stays the same 908 * | +-> relocated to bit 8 -- shifted by 1 because we 909 * | removed bit 8. 910 * +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 12, 911 * 13, and 14. 912 * 913 * NPS 1K/2K Hashing 914 * 915 * This case is a fairly straightforward variant on what we just discussed. In 916 * fact, 2K hashing looks just like what we've done before. The only difference 917 * with 1K hashing is that we'll consider bit 9 also for removal before we jump 918 * up to bit 12. Let's look at an example: 919 * 920 * o 8-channel "NPS" 1K hashing, starting at address 8. All three ranges 921 * enabled. 1-die and 2-socket interleaving. 922 * 923 * Here we need to remove a total of 4 bits, which is now broken into 924 * [13:12] and [9:8]. This results in a new address of: 925 * 926 * orig[63:14] >> 4 | orig[11:10] >> 2 | orig[7:0] 927 * | | +-> stays the same 928 * | +-> relocated to bit 8 -- shifted by 2 because we 929 * | removed bits 8 and 9. 930 * +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 9, 931 * 12, and 13. 932 * 933 * ZEN 3 6-CHANNEL 934 * 935 * Now, to the real fun stuff, our non-powers of two. First, let's start with 936 * our friend, the Zen 3 6-channel hash. So, the first thing that we need to do 937 * here is start by recomputing our hash again based on the current normalized 938 * address. Regardless of the hash value, this first removes all three bits from 939 * the starting address, so that's removing either [14:12] or [13:11]. 940 * 941 * The rest of the normalization process here is quite complex and somewhat mind 942 * bending. Let's start working through an example here and build this up. 943 * First, let's assume that each channel has a single 16 GiB RDIMM. This would 944 * mean that the channel itself has 96 GiB RDIMM. However, by removing 3 bits 945 * worth, that technically corresponds to an 8-channel configuration that 946 * normally suggest a 128 GiB configuration. The processor requires us to record 947 * this fact in the DF::Np2ChannelConfig register. The value that it wants us a 948 * bit weird. We believe it's calculated by the following: 949 * 950 * 1. Round the channel size up to the next power of 2. 951 * 2. Divide this total size by 64 KiB. 952 * 3. Determine the log base 2 that satisfies this value. 953 * 954 * In our particular example above. We have a 96 GiB channel, so for (1) we end 955 * up with 128 GiB (2^37). We now divide that by 64 KiB (2^16), so this becomes 956 * 2^(37 - 16) or 2^21. Because we want the log base 2 of 2^21 from (2), this 957 * simply becomes 21. The DF::Np2ChannelConfig has two members, a 'space 0' and 958 * 'space 1'. Near as we can tell, in this mode only 'space 0' is used. 959 * 960 * Before we get into the actual normalization scheme, we have to ask ourselves 961 * how do we actually interleave data 6 ways. The scheme here is involved. 962 * First, it's important to remember like with other normalization schemes, we 963 * do adjust for the address for the base address in the DRAM rule and then also 964 * take into account the DRAM hole if present. 965 * 966 * If we delete 3 bits, let's take a sample address and see where it would end 967 * up in the above scheme. We're going to take our 3 address bits and say that 968 * they start at bit 12, so this means that the bits removed are [14:12]. So the 969 * following are the 8 addresses that we have here and where they end up 970 * starting with 1ff: 971 * 972 * o 0x01ff -> 0x1ff, Channel 0 (hash 0b000) 973 * o 0x11ff -> 0x1ff, Channel 1 (hash 0b001) 974 * o 0x21ff -> 0x1ff, Channel 2 (hash 0b010) 975 * o 0x31ff -> 0x1ff, Channel 3 (hash 0b011) 976 * o 0x41ff -> 0x1ff, Channel 4 (hash 0b100) 977 * o 0x51ff -> 0x1ff, Channel 5 (hash 0b101) 978 * o 0x61ff -> 0x3000001ff, Channel 0 (hash 0b110) 979 * o 0x71ff -> 0x3000001ff, Channel 1 (hash 0b111) 980 * 981 * Yes, we did just jump to near the top of what is a 16 GiB DIMM's range for 982 * those last two. The way we determine when to do this jump is based on our 983 * hash. Effectively we ask what is hash[2:1]. If it is 0b11, then we need to 984 * do something different and enter this special case, basically jumping to the 985 * top of the range. If we think about a 6-channel configuration for a moment, 986 * the thing that doesn't exist are the traditional 8-channel hash DIMMs 0b110 987 * and 0b111. 988 * 989 * If you go back to the interleave this kind of meshes, that tried to handle 990 * the case of the hash being 0, 1, and 2, normally, and then did special things 991 * with the case of the hash being in this upper quadrant. The hash then 992 * determined where it went by shifting over the upper address and doing a mod 993 * 3 and using that to determine the upper two bits. With that weird address at 994 * the top of the range, let's go through and see what else actually goes to 995 * those weird addresses: 996 * 997 * o 0x08000061ff -> 0x3000001ff, Channel 2 (hash 0b110) 998 * o 0x08000071ff -> 0x3000001ff, Channel 3 (hash 0b111) 999 * o 0x10000061ff -> 0x3000001ff, Channel 4 (hash 0b110) 1000 * o 0x10000071ff -> 0x3000001ff, Channel 5 (hash 0b111) 1001 * 1002 * Based on the above you can see that we've split the 16 GiB DIMM into a 12 GiB 1003 * region (e.g. [ 0x0, 0x300000000 ), and a 4 GiB region [ 0x300000000, 1004 * 0x400000000 ). What seems to happen is that the CPU algorithmically is going 1005 * to put things in this upper range. To perform that action it goes back to the 1006 * register information that we stored in DF::Np2ChannelConfig. The way this 1007 * seems to be thought of is it wants to set the upper two bits of a 64 KiB 1008 * chunk (e.g. bits [15:14]) to 0b11 and then shift that over based on the DIMM 1009 * size. 1010 * 1011 * Our 16 GiB DIMM has 34 bits, so effectively we want to set bits [33:32] in 1012 * this case. The channel is 37 bits wide, which the CPU again knows as 2^21 * 1013 * 2^16. So it constructs the 64 KiB value of [15:14] = 0b11 and fills the rest 1014 * with zeros. It then multiplies it by 2^(21 - 3), or 2^18. The - 3 comes from 1015 * the fact that we removed 3 address bits. This when added to the above gets 1016 * us bits [33,32] = 0b11. 1017 * 1018 * While this appears to be the logic, I don't have a proof that this scheme 1019 * actually evenly covers the entire range, but a few examples appear to work 1020 * out. 1021 * 1022 * With this, the standard example flow that we give, results in something like: 1023 * 1024 * o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled. Here, 1025 * we assume that the value of the NP2 space0 is 21 bits. This example 1026 * assumes we have 96 GiB total memory, which means rounding up to 128 GiB. 1027 * 1028 * Step 1 here is to adjust our address to remove the three bits indicated. 1029 * So we simply always set our new address to: 1030 * 1031 * orig[63:14] >> 3 | orig[10:0] 1032 * | +-> stays the same 1033 * +--> Relocated to bit 11 because a 6-channel config always uses 3 bits to 1034 * perform interleaving. 1035 * 1036 * At this step, one would need to consult the hash of the normalized 1037 * address before removing bits (but after adjusting for the base / DRAM 1038 * hole). If hash[2:1] == 3, then we would say that the address is actually: 1039 * 1040 * 0b11 << 32 | orig[63:14] >> 3 | orig[10:0] 1041 * 1042 * 1043 * ZEN 4 NON-POWER OF 2 1044 * 1045 * Next, we have the DFv4 versions of the 3, 5, 6, 10, and 12 channel hashing. 1046 * An important part of this is whether or not there is any socket hashing going 1047 * on. Recall there, that if socket hashing was going on, then it is part of the 1048 * interleave logic; however, if it is not, then its hash actually becomes 1049 * part of the normalized address, but not in the same spot! 1050 * 1051 * In this mode, we always remove the bits that are actually used by the hash. 1052 * Recall that some modes use hash[0], others hash[0] and hash[2], and then only 1053 * the 12-channel config uses hash[2:0]. This means we need to be careful in how 1054 * we actually remove address bits. All other bits in this lower range we end up 1055 * keeping and using. The top bits, e.g. addr[63:14] are kept and divided by the 1056 * actual channel-modulus. If we're not performing socket interleaving and 1057 * therefore need to keep the value of hash[0], then it is appended as the least 1058 * significant bit of that calculation. 1059 * 1060 * Let's look at an example of this to try to make sense of it all. 1061 * 1062 * o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 1063 * 1-die and 2-socket interleaving. 1064 * 1065 * Here we'd start by calculating hash[2:0] as described in the earlier 1066 * interleaving situation. Because we're using a socket interleave, we will 1067 * not opt to include hash[0] in the higher-level address calculation. 1068 * Because this is a 6-channel calculation, our modulus is 3. Here, we will 1069 * strip out bits 8 and 13 (recall in the interleaving 6-channel example we 1070 * ignored hash[1], thus no bit 12 here). Our new address will be: 1071 * 1072 * (orig[63:14] / 3) >> 2 | orig[12:9] >> 1 | orig[7:0] 1073 * | | +-> stays the same 1074 * | +-> relocated to bit 8 -- shifted by 1 because 1075 * | we removed bit 8. 1076 * +--> Relocated to bit 12 -- shifted by 2 because we removed bits 8 and 1077 * 13. 1078 * 1079 * o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 1080 * 1-die and 1-socket interleaving. 1081 * 1082 * This is a slightly different case from the above in two ways. First, we 1083 * will end up removing bits 8, 12, and 13, but then we'll also reuse 1084 * hash[0]. Our new address will be: 1085 * 1086 * ((orig[63:14] / 3) << 1 | hash[0]) >> 3 | orig[11:9] >> 1 | orig[7:0] 1087 * | | +-> stays the 1088 * | | same 1089 * | +-> relocated to bit 8 -- shifted by 1090 * | 1 because we removed bit 8. 1091 * +--> Relocated to bit 11 -- shifted by 3 because we removed bits 8, 12, 1092 * and 13. 1093 * 1094 * DF 4D2 NPS 1K/2K NON-POWER OF 2 1095 * 1096 * Unsurprisingly, if you've followed to this point, there is a slightly 1097 * different normalization scheme that is used here. Like in the other cases we 1098 * end up breaking the address into the three parts that are used: a lower 1099 * portion that remains the same, a middle portion that is from bits that were 1100 * not used as part of the interleaving process, and the upper portion which is 1101 * where we end up with our division (like the Zen 4 case above). To add to the 1102 * fun, the upper portion that gets divided sometimes has some lower parts of 1103 * the address tossed up there. 1104 * 1105 * Because each case is unique, we have created a data table in the decoder: 1106 * zen_umc_np2_k_rules. This structure has a number of pieces that describe how 1107 * to transform the address. Logically this computation looks like: 1108 * 1109 * [ upper address / modulus ] | middle bits | low bits 1110 * | | | 1111 * | | +-> Always bits (rule start, 0] 1112 * | | 1113 * | +-> The starting bit is zukr_norm_addr. There 1114 * | are zukr_norm_naddr bits. This is: 1115 * | (zukr_norm_addr + zukr_norm_naddr, 1116 * | zukr_norm_addr]. 1117 * | 1118 * +--> This has two portions everything from (64, zukr_high] and then the 1119 * optional bonus region, which is indicated by zukr_div_addr and 1120 * zukr_div_naddr. These bits are always the low bit. Meaning that the 1121 * initial bits will be shifted over by zukr_div_naddr before we 1122 * perform the division. 1123 * 1124 * Once each of these three pieces has been calculated, all the resulting pieces 1125 * will be shifted so they are contiguous like the other cases as though the 1126 * removed bits didn't exist. 1127 * 1128 * 1129 * That's most of the normalization process for the time being. We will have to 1130 * revisit this when we have to transform a normal address into a system address 1131 * and undo all this. 1132 * 1133 * ------------------------------------- 1134 * Selecting a DIMM and UMC Organization 1135 * ------------------------------------- 1136 * 1137 * One of the more nuanced things in decoding and encoding is the question of 1138 * where do we send a channel normalized address. That is, now that we've gotten 1139 * to a given channel, we need to transform the address into something 1140 * meaningful for a DIMM, and select a DIMM as well. The UMC SMN space contains 1141 * a number of Base Address and Mask registers which they describe as activating 1142 * a chip-select. A given UMC has up to four primary chip-selects (we'll come 1143 * back to DDR5 sub-channels later). The first two always go to the first DIMM 1144 * in the channel and the latter two always go to the second DIMM in the 1145 * channel. Put another way, you can always determine which DIMM you are 1146 * referring to by taking the chip-select and shifting it by 1. 1147 * 1148 * The UMC Channel registers are organized a bit differently in different 1149 * hardware generations. In a DDR5 based UMC, almost all of our settings are on 1150 * a per-chip-select basis while as in a DDR4 based system only the bases and 1151 * masks are. While gathering data we normalize this such that each logical 1152 * chip-select (umc_cs_t) that we have in the system has the same data so that 1153 * way DDR4 and DDR5 based systems are the same to the decoding logic. There is 1154 * also channel-wide data such as hash configurations and related. 1155 * 1156 * Each channel has a set of base and mask registers (and secondary ones as 1157 * well). To determine if we activate a given one, we first check if the 1158 * enabled bit is set. The enabled bit is set on a per-base basis, so both the 1159 * primary and secondary registers have separate enables. As there are four of 1160 * each base, mask, secondary base, and secondary mask, we say that if a 1161 * normalized address matches either a given indexes primary or secondary index, 1162 * then it activates that given UMC index. The basic formula for an enabled 1163 * selection is: 1164 * 1165 * NormAddr & ~Mask[i] == Base[i] & ~Mask[i] 1166 * 1167 * Once this is selected, this index in the UMC is what it always used to derive 1168 * the rest of the information that is specific to a given chip-select or DIMM. 1169 * An important thing to remember is that from this point onwards, while there 1170 * is a bunch of hashing and interleaving logic it doesn't change which UMC 1171 * channel we read the data from. Though the particular DIMM, rank, and address 1172 * we access will change as we go through hashing and interleaving. 1173 * 1174 * ------------------------ 1175 * Row and Column Selection 1176 * ------------------------ 1177 * 1178 * The number of bits that are used for the row and column address of a DIMM 1179 * varies based on the type of module itself. These depend on the density of a 1180 * DIMM module, e.g. how large an individual DRAM block is, a value such as 16 1181 * Gbit, and the number of these wide it is, which is generally phrased as X4, 1182 * X8, and X16. The memory controller encodes the number of bits (derived from 1183 * the DIMM's SPD data) and then determines which bits are used for addresses. 1184 * 1185 * Based on this information we can initially construct a row and a column 1186 * address by leveraging the information about the number of bits and then 1187 * extracting the correct bits out of the normalized channel address. 1188 * 1189 * If you've made it this far, you know nothing is quite this simple, despite it 1190 * seeming so. Importantly, not all DIMMs actually have storage that is a power 1191 * of 2. As such, there's another bit that we have to consult to transform the 1192 * actual value that we have for a row, remarkably the column somehow has no 1193 * transformations applied to it. 1194 * 1195 * The hardware gives us information on inverting the two 'most significant 1196 * bits' of the row address which we store in 'ucs_inv_msbs'. First, we have the 1197 * question of what are our most significant bits here. This is basically 1198 * determined by the number of low and high row bits. In this case higher 1199 * actually is what we want. Note, the high row bits only exist in DDR4. Next, 1200 * we need to know whether we used the primary or secondary base/mask pair for 1201 * this as there is a primary and secondary inversion bits. The higher bit of 1202 * the inversion register (e.g ucs_inv_msbs[1]) corresponds to the highest row 1203 * bit. A zero in the bit position indicates that we should not perform an 1204 * inversion where as a one says that we should invert this. 1205 * 1206 * To actually make this happen we can take advantage of the fact that the 1207 * meaning of a 0/1 above means that this can be implemented with a binary 1208 * exclusive-OR (XOR). Logically speaking if we have a don't invert setting 1209 * present, a 0, then x ^ 0 is always x. However, if we have a 1 present, then 1210 * we know that (for a single bit) x ^ 1 = ~x. We take advantage of this fact in 1211 * the row logic. 1212 * 1213 * --------------------- 1214 * Banks and Bank Groups 1215 * --------------------- 1216 * 1217 * While addressing within a given module is done by the use of a row and column 1218 * address, to increase storage density a module generally has a number of 1219 * banks, which may be organized into one or more bank groups. While a given 1220 * DDR4/5 access happens in some prefetched chunk of say 64 bytes (what do you 1221 * know, that's a cacheline), that all occurs within a single bank. The addition 1222 * of bank groups makes it easier to access data in parallel -- it is often 1223 * faster to read from another bank group than to read another region inside a 1224 * bank group. 1225 * 1226 * Based on the DIMMs internal configuration, there will be a specified number 1227 * of bits used for the overall bank address (including bank group bits) 1228 * followed by a number of bits actually used for bank groups. There are 1229 * separately an array of bits used to concoct the actual address. It appears, 1230 * mostly through experimental evidence, that the bank group bits occur first 1231 * and then are followed by the bank selection itself. This makes some sense if 1232 * you assume that switching bank groups is faster than switching banks. 1233 * 1234 * So if we see the UMC noting 4 bank bits and 2 bank groups bits, that means 1235 * that the umc_cs_t's ucs_bank_bits[1:0] correspond to bank_group[1:0] and 1236 * ucs_bank_bits[3:2] correspond to bank_address[1:0]. However, if there were no 1237 * bank bits indicated, then all of the address bits would correspond to the 1238 * bank address. 1239 * 1240 * Now, this would all be straightforward if not for hashing, our favorite. 1241 * There are five bank hashing registers per channel (UMC_BANK_HASH_DDR4, 1242 * UMC_BANK_HASH_DDR5), one that corresponds to the five possible bank bits. To 1243 * do this we need to use the calculated row and column that we previously 1244 * determined. This calculation happens in a few steps: 1245 * 1246 * 1) First check if the enable bit is set in the rule. If not, just use the 1247 * normal bank address bit and we're done. 1248 * 2) Take a bitwise-AND of the calculated row and hash register's row value. 1249 * Next do the same thing for the column. 1250 * 3) For each bit in the row, progressively XOR it, e.g. row[0] ^ row[1] ^ 1251 * row[2] ^ ... to calculate a net bit value for the row. This then 1252 * repeats itself for the column. What basically has happened is that we're 1253 * using the hash register to select which bits to impact our decision. 1254 * Think of this as a traditional bitwise functional reduce. 1255 * 4) XOR the combined rank bit with the column bit and the actual bank 1256 * address bit from the normalized address. So if this were bank bit 0, 1257 * which indicated we should use bit 15 for bank[0], then we would 1258 * ultimately say our new bit is norm_addr[15] ^ row_xor ^ col_xor 1259 * 1260 * An important caveat is that we would only consult all this if we actually 1261 * were told that the bank bit was being used. For example if we had 3 bank 1262 * bits, then we'd only check the first 3 hash registers. The latter two would 1263 * be ignored. 1264 * 1265 * Once this process is done, then we can go back and split the activated bank 1266 * into the actual bank used and the bank group used based on the first bits 1267 * going to the bank group. 1268 * 1269 * --------------- 1270 * DDR5 Sub-channel 1271 * --------------- 1272 * 1273 * As described in the definitions section, DDR5 has the notion of a 1274 * sub-channel. Here, a single bit is used to determine which of the 1275 * sub-channels to actually operate and utilize. Importantly the same 1276 * chip-select seems to apply to both halves of a given sub-channel. 1277 * 1278 * There is also a hash that is used here. The hash here utilizes the calculated 1279 * bank, column, and row and follows the same pattern used in the bank 1280 * calculation where we do a bunch of running exclusive-ORs and then do that 1281 * with the original value we found to get the new value. Because there's only 1282 * one bit for the sub-channel, we only have a single hash to consider. 1283 * 1284 * ------------------------------------------- 1285 * Ranks, Chip-Select, and Rank Multiplication 1286 * ------------------------------------------- 1287 * 1288 * The notion of ranks and the chip-select are interwoven. From a strict DDR4 1289 * RDIMM perspective, there are two lines that are dedicated for chip-selects 1290 * and then another two that are shared with three 'chip-id' bits that are used 1291 * in 3DS RDIMMs. In all cases the controller starts with two logical chip 1292 * selects and then uses something called rank multiplication to figure out how 1293 * to multiplex that and map to the broader set of things. Basically, in 1294 * reality, DDR4 RDIMMs allow for 4 bits to determine a rank and then 3DS RDIMMs 1295 * use 2 bits for a rank and 3 bits to select a stacked chip. In DDR5 this is 1296 * different and you just have 2 bits for a rank. 1297 * 1298 * It's not entirely clear from what we know from AMD, but it seems that we use 1299 * the RM bits as a way to basically go beyond the basic 2 bits of chip-select 1300 * which is determined based on which channel we logically activate. Initially 1301 * we treat this as two distinct things, here as that's what we get from the 1302 * hardware. There are two hashes here a chip-select and rank-multiplication 1303 * hash. Unlike the others, which rely on the bank, row, and column addresses, 1304 * this hash relies on the normalized address. So we calculate that mask and do 1305 * our same xor dance. 1306 * 1307 * There is one hash for each rank multiplication bit and chip-select bit. The 1308 * number of rank multiplication bits is given to us. The number of chip-select 1309 * bits is fixed, it's simply two because there are four base/mask registers and 1310 * logical chip-selects in a given UMC channel. The chip-select on some DDR5 1311 * platforms has a secondary exclusive-OR hash that can be applied. As this only 1312 * exists in some families, for any where it does exist, we seed it to be zero 1313 * so that it becomes a no-op. 1314 * 1315 * ----------- 1316 * Future Work 1317 * ----------- 1318 * 1319 * As the road goes ever on and on, down from the door where it began, there are 1320 * still some stops on the journey for this driver. In particular, here are the 1321 * major open areas that could be implemented to extend what this can do: 1322 * 1323 * o The ability to transform a normalized channel address back to a system 1324 * address. This is required for MCA/MCA-X error handling as those generally 1325 * work in terms of channel addresses. 1326 * o Integrating with the MCA/MCA-X error handling paths so that way we can 1327 * take correct action in the face of ECC errors and allowing recovery from 1328 * uncorrectable errors. 1329 * o Providing memory controller information to FMA so that way it can opt to 1330 * do predictive failure or give us more information about what is fault 1331 * with ECC errors. 1332 * o Figuring out if we will get MCEs for privileged address decoding and if 1333 * so mapping those back to system addresses and related. 1334 * o 3DS RDIMMs likely will need a little bit of work to ensure we're handling 1335 * the resulting combination of the RM bits and CS and reporting it 1336 * intelligently. 1337 * o Support for the MI300-specific interleave decoding. 1338 * o Understanding the error flow for CXL related address decoding and if we 1339 * should support it in this driver. 1340 */ 1341 1342 #include <sys/types.h> 1343 #include <sys/file.h> 1344 #include <sys/errno.h> 1345 #include <sys/open.h> 1346 #include <sys/cred.h> 1347 #include <sys/ddi.h> 1348 #include <sys/sunddi.h> 1349 #include <sys/stat.h> 1350 #include <sys/conf.h> 1351 #include <sys/devops.h> 1352 #include <sys/cmn_err.h> 1353 #include <sys/x86_archext.h> 1354 #include <sys/sysmacros.h> 1355 #include <sys/mc.h> 1356 1357 #include <zen_umc.h> 1358 #include <sys/amdzen/df.h> 1359 #include <sys/amdzen/umc.h> 1360 1361 static zen_umc_t *zen_umc; 1362 1363 /* 1364 * Per-CPU family information that describes the set of capabilities that they 1365 * implement. When adding support for new CPU generations, you must go through 1366 * what documentation you have and validate these. The best bet is to find a 1367 * similar processor and see what has changed. Unfortunately, there really isn't 1368 * a substitute for just basically checking every register. The family name 1369 * comes from the amdzen_c_family(). One additional note for new CPUs, if our 1370 * parent amdzen nexus driver does not attach (because the DF has changed PCI 1371 * IDs or more), then just adding something here will not be sufficient to make 1372 * it work. 1373 */ 1374 static const zen_umc_fam_data_t zen_umc_fam_data[] = { 1375 { 1376 .zufd_family = X86_PF_AMD_NAPLES, 1377 .zufd_dram_nrules = 16, 1378 .zufd_cs_nrules = 2, 1379 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1380 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1381 .zufd_base_instid = 0 1382 }, { 1383 .zufd_family = X86_PF_HYGON_DHYANA, 1384 .zufd_dram_nrules = 16, 1385 .zufd_cs_nrules = 2, 1386 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1387 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1388 .zufd_base_instid = 0 1389 }, { 1390 .zufd_family = X86_PF_AMD_DALI, 1391 .zufd_dram_nrules = 2, 1392 .zufd_cs_nrules = 2, 1393 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU, 1394 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1395 .zufd_base_instid = 0 1396 }, { 1397 .zufd_family = X86_PF_AMD_ROME, 1398 .zufd_flags = ZEN_UMC_FAM_F_NP2 | ZEN_UMC_FAM_F_NORM_HASH | 1399 ZEN_UMC_FAM_F_UMC_HASH, 1400 .zufd_dram_nrules = 16, 1401 .zufd_cs_nrules = 2, 1402 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1403 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1404 UMC_CHAN_HASH_F_CS, 1405 .zufd_base_instid = 0 1406 }, { 1407 .zufd_family = X86_PF_AMD_RENOIR, 1408 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1409 .zufd_dram_nrules = 2, 1410 .zufd_cs_nrules = 2, 1411 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU, 1412 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC | 1413 UMC_CHAN_HASH_F_CS, 1414 .zufd_base_instid = 0 1415 }, { 1416 .zufd_family = X86_PF_AMD_MATISSE, 1417 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH, 1418 .zufd_dram_nrules = 16, 1419 .zufd_cs_nrules = 2, 1420 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1421 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1422 UMC_CHAN_HASH_F_CS, 1423 .zufd_base_instid = 0 1424 }, { 1425 .zufd_family = X86_PF_AMD_VAN_GOGH, 1426 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1427 .zufd_dram_nrules = 2, 1428 .zufd_cs_nrules = 2, 1429 .zufd_umc_style = ZEN_UMC_UMC_S_HYBRID_LPDDR5, 1430 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1431 .zufd_base_instid = 0 1432 }, { 1433 .zufd_family = X86_PF_AMD_MENDOCINO, 1434 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1435 .zufd_dram_nrules = 2, 1436 .zufd_cs_nrules = 2, 1437 .zufd_umc_style = ZEN_UMC_UMC_S_HYBRID_LPDDR5, 1438 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1439 .zufd_base_instid = 0 1440 }, { 1441 .zufd_family = X86_PF_AMD_MILAN, 1442 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | ZEN_UMC_FAM_F_NP2 | 1443 ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH, 1444 .zufd_dram_nrules = 16, 1445 .zufd_cs_nrules = 2, 1446 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1447 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1448 UMC_CHAN_HASH_F_CS, 1449 .zufd_base_instid = 0 1450 }, { 1451 .zufd_family = X86_PF_AMD_GENOA, 1452 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | 1453 ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR | 1454 ZEN_UMC_FAM_F_CS_XOR, 1455 .zufd_dram_nrules = 20, 1456 .zufd_cs_nrules = 4, 1457 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1458 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1459 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS, 1460 .zufd_base_instid = 0 1461 }, { 1462 .zufd_family = X86_PF_AMD_VERMEER, 1463 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH, 1464 .zufd_dram_nrules = 16, 1465 .zufd_cs_nrules = 2, 1466 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1467 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1468 UMC_CHAN_HASH_F_CS, 1469 .zufd_base_instid = 0 1470 }, { 1471 .zufd_family = X86_PF_AMD_REMBRANDT, 1472 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1473 .zufd_dram_nrules = 2, 1474 .zufd_cs_nrules = 2, 1475 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1476 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1477 .zufd_base_instid = 0 1478 }, { 1479 .zufd_family = X86_PF_AMD_CEZANNE, 1480 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1481 .zufd_dram_nrules = 2, 1482 .zufd_cs_nrules = 2, 1483 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU, 1484 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC | 1485 UMC_CHAN_HASH_F_CS, 1486 .zufd_base_instid = 0 1487 }, { 1488 .zufd_family = X86_PF_AMD_RAPHAEL, 1489 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR, 1490 .zufd_dram_nrules = 2, 1491 .zufd_cs_nrules = 2, 1492 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1493 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1494 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS, 1495 .zufd_base_instid = 0 1496 }, { 1497 .zufd_family = X86_PF_AMD_BERGAMO, 1498 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | 1499 ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR | 1500 ZEN_UMC_FAM_F_CS_XOR, 1501 .zufd_dram_nrules = 20, 1502 .zufd_cs_nrules = 4, 1503 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1504 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1505 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS, 1506 .zufd_base_instid = 0 1507 }, { 1508 .zufd_family = X86_PF_AMD_PHOENIX, 1509 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR, 1510 .zufd_dram_nrules = 2, 1511 .zufd_cs_nrules = 2, 1512 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1513 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1514 .zufd_base_instid = 0 1515 }, { 1516 .zufd_family = X86_PF_AMD_STRIX, 1517 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR, 1518 .zufd_dram_nrules = 2, 1519 .zufd_cs_nrules = 2, 1520 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1521 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1522 .zufd_base_instid = 0 1523 }, { 1524 .zufd_family = X86_PF_AMD_KRACKAN, 1525 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR, 1526 .zufd_dram_nrules = 2, 1527 .zufd_cs_nrules = 2, 1528 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1529 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1530 .zufd_base_instid = 0 1531 }, { 1532 .zufd_family = X86_PF_AMD_STRIX_HALO, 1533 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR, 1534 .zufd_dram_nrules = 3, 1535 .zufd_cs_nrules = 3, 1536 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1537 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS, 1538 .zufd_base_instid = 0 1539 }, { 1540 .zufd_family = X86_PF_AMD_GRANITE_RIDGE, 1541 .zufd_flags = ZEN_UMC_FAM_F_CS_XOR, 1542 .zufd_dram_nrules = 2, 1543 .zufd_cs_nrules = 2, 1544 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1545 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1546 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS, 1547 .zufd_base_instid = 0 1548 }, { 1549 .zufd_family = X86_PF_AMD_TURIN, 1550 .zufd_flags = ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR | 1551 ZEN_UMC_FAM_F_CS_XOR, 1552 .zufd_dram_nrules = 20, 1553 .zufd_cs_nrules = 4, 1554 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1555 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1556 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS, 1557 .zufd_base_instid = 0 1558 }, { 1559 .zufd_family = X86_PF_AMD_DENSE_TURIN, 1560 .zufd_flags = ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR | 1561 ZEN_UMC_FAM_F_CS_XOR, 1562 .zufd_dram_nrules = 20, 1563 .zufd_cs_nrules = 4, 1564 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1565 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1566 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS, 1567 .zufd_base_instid = 0 1568 } 1569 }; 1570 1571 /* 1572 * We use this for the DDR4 and Hybrid DDR4 + LPDDR5 tables to map between the 1573 * specific enumerated speeds which are encoded values and the corresponding 1574 * memory clock and speed. For all DDR4 and LPDDR5 items we assume a a 1:2 ratio 1575 * between them. This is not used for the pure DDR5 / LPDDR5 entries because of 1576 * how the register just encodes the raw value in MHz. 1577 */ 1578 typedef struct zen_umc_freq_map { 1579 uint32_t zufm_reg; 1580 uint32_t zufm_mhz; 1581 uint32_t zufm_mts2; 1582 uint32_t zufm_mts4; 1583 } zen_umc_freq_map_t; 1584 1585 static const zen_umc_freq_map_t zen_umc_ddr4_map[] = { 1586 { UMC_DRAMCFG_DDR4_MEMCLK_667, 667, 1333, 0 }, 1587 { UMC_DRAMCFG_DDR4_MEMCLK_800, 800, 1600, 0 }, 1588 { UMC_DRAMCFG_DDR4_MEMCLK_933, 933, 1866, 0 }, 1589 { UMC_DRAMCFG_DDR4_MEMCLK_1067, 1067, 2133, 0 }, 1590 { UMC_DRAMCFG_DDR4_MEMCLK_1200, 1200, 2400, 0 }, 1591 { UMC_DRAMCFG_DDR4_MEMCLK_1333, 1333, 2666, 0 }, 1592 { UMC_DRAMCFG_DDR4_MEMCLK_1467, 1467, 2933, 0 }, 1593 { UMC_DRAMCFG_DDR4_MEMCLK_1600, 1600, 3200, 0 } 1594 }; 1595 1596 static const zen_umc_freq_map_t zen_umc_lpddr5_map[] = { 1597 { UMC_DRAMCFG_HYB_MEMCLK_333, 333, 667, 1333 }, 1598 { UMC_DRAMCFG_HYB_MEMCLK_400, 400, 800, 1600 }, 1599 { UMC_DRAMCFG_HYB_MEMCLK_533, 533, 1066, 2133 }, 1600 { UMC_DRAMCFG_HYB_MEMCLK_687, 687, 1375, 2750 }, 1601 { UMC_DRAMCFG_HYB_MEMCLK_750, 750, 1500, 3000 }, 1602 { UMC_DRAMCFG_HYB_MEMCLK_800, 800, 1600, 3200 }, 1603 { UMC_DRAMCFG_HYB_MEMCLK_933, 933, 1866, 3733 }, 1604 { UMC_DRAMCFG_HYB_MEMCLK_1066, 1066, 2133, 4267 }, 1605 { UMC_DRAMCFG_HYB_MEMCLK_1200, 1200, 2400, 4800 }, 1606 { UMC_DRAMCFG_HYB_MEMCLK_1375, 1375, 2750, 5500 }, 1607 { UMC_DRAMCFG_HYB_MEMCLK_1500, 1500, 3000, 6000 }, 1608 { UMC_DRAMCFG_HYB_MEMCLK_1600, 1600, 3200, 6400 } 1609 1610 }; 1611 1612 static boolean_t 1613 zen_umc_identify(zen_umc_t *umc) 1614 { 1615 for (uint_t i = 0; i < ARRAY_SIZE(zen_umc_fam_data); i++) { 1616 if (zen_umc_fam_data[i].zufd_family == umc->umc_family) { 1617 umc->umc_fdata = &zen_umc_fam_data[i]; 1618 return (B_TRUE); 1619 } 1620 } 1621 1622 return (B_FALSE); 1623 } 1624 1625 /* 1626 * This operates on DFv2, DFv3, and DFv3.5 DRAM rules, which generally speaking 1627 * are in similar register locations and meanings, but the size of bits in 1628 * memory is not consistent. 1629 */ 1630 static int 1631 zen_umc_read_dram_rule_df_23(zen_umc_t *umc, const uint_t dfno, 1632 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule) 1633 { 1634 int ret; 1635 uint32_t base, limit; 1636 uint64_t dbase, dlimit; 1637 uint16_t addr_ileave, chan_ileave, sock_ileave, die_ileave, dest; 1638 boolean_t hash = B_FALSE; 1639 zen_umc_df_t *df = &umc->umc_dfs[dfno]; 1640 1641 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V2(ruleno), 1642 &base)) != 0) { 1643 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base " 1644 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1645 return (ret); 1646 } 1647 1648 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V2(ruleno), 1649 &limit)) != 0) { 1650 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit " 1651 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1652 return (ret); 1653 } 1654 1655 1656 rule->ddr_raw_base = base; 1657 rule->ddr_raw_limit = limit; 1658 rule->ddr_raw_ileave = rule->ddr_raw_ctrl = 0; 1659 1660 if (!DF_DRAM_BASE_V2_GET_VALID(base)) { 1661 return (0); 1662 } 1663 1664 /* 1665 * Extract all values from the registers and then normalize. While there 1666 * are often different bit patterns for the values, the interpretation 1667 * is the same across all the Zen 1-3 parts. That is while which bits 1668 * may be used for say channel interleave vary, the values of them are 1669 * consistent. 1670 */ 1671 rule->ddr_flags |= DF_DRAM_F_VALID; 1672 if (DF_DRAM_BASE_V2_GET_HOLE_EN(base)) { 1673 rule->ddr_flags |= DF_DRAM_F_HOLE; 1674 } 1675 1676 dbase = DF_DRAM_BASE_V2_GET_BASE(base); 1677 dlimit = DF_DRAM_LIMIT_V2_GET_LIMIT(limit); 1678 switch (umc->umc_df_rev) { 1679 case DF_REV_2: 1680 addr_ileave = DF_DRAM_BASE_V2_GET_ILV_ADDR(base); 1681 chan_ileave = DF_DRAM_BASE_V2_GET_ILV_CHAN(base); 1682 die_ileave = DF_DRAM_LIMIT_V2_GET_ILV_DIE(limit); 1683 sock_ileave = DF_DRAM_LIMIT_V2_GET_ILV_SOCK(limit); 1684 dest = DF_DRAM_LIMIT_V2_GET_DEST_ID(limit); 1685 break; 1686 case DF_REV_3: 1687 addr_ileave = DF_DRAM_BASE_V3_GET_ILV_ADDR(base); 1688 sock_ileave = DF_DRAM_BASE_V3_GET_ILV_SOCK(base); 1689 die_ileave = DF_DRAM_BASE_V3_GET_ILV_DIE(base); 1690 chan_ileave = DF_DRAM_BASE_V3_GET_ILV_CHAN(base); 1691 dest = DF_DRAM_LIMIT_V3_GET_DEST_ID(limit); 1692 break; 1693 case DF_REV_3P5: 1694 addr_ileave = DF_DRAM_BASE_V3P5_GET_ILV_ADDR(base); 1695 sock_ileave = DF_DRAM_BASE_V3P5_GET_ILV_SOCK(base); 1696 die_ileave = DF_DRAM_BASE_V3P5_GET_ILV_DIE(base); 1697 chan_ileave = DF_DRAM_BASE_V3P5_GET_ILV_CHAN(base); 1698 dest = DF_DRAM_LIMIT_V3P5_GET_DEST_ID(limit); 1699 break; 1700 default: 1701 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 1702 "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev); 1703 return (-1); 1704 } 1705 1706 rule->ddr_base = dbase << DF_DRAM_BASE_V2_BASE_SHIFT; 1707 rule->ddr_sock_ileave_bits = sock_ileave; 1708 rule->ddr_die_ileave_bits = die_ileave; 1709 switch (addr_ileave) { 1710 case DF_DRAM_ILV_ADDR_8: 1711 case DF_DRAM_ILV_ADDR_9: 1712 case DF_DRAM_ILV_ADDR_10: 1713 case DF_DRAM_ILV_ADDR_11: 1714 case DF_DRAM_ILV_ADDR_12: 1715 break; 1716 default: 1717 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address " 1718 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1719 dfno, inst, addr_ileave); 1720 return (EINVAL); 1721 } 1722 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave; 1723 1724 switch (chan_ileave) { 1725 case DF_DRAM_BASE_V2_ILV_CHAN_1: 1726 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH; 1727 break; 1728 case DF_DRAM_BASE_V2_ILV_CHAN_2: 1729 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH; 1730 break; 1731 case DF_DRAM_BASE_V2_ILV_CHAN_4: 1732 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH; 1733 break; 1734 case DF_DRAM_BASE_V2_ILV_CHAN_8: 1735 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH; 1736 break; 1737 case DF_DRAM_BASE_V2_ILV_CHAN_6: 1738 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_6CH; 1739 break; 1740 case DF_DRAM_BASE_V2_ILV_CHAN_COD4_2: 1741 hash = B_TRUE; 1742 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD4_2CH; 1743 break; 1744 case DF_DRAM_BASE_V2_ILV_CHAN_COD2_4: 1745 hash = B_TRUE; 1746 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD2_4CH; 1747 break; 1748 case DF_DRAM_BASE_V2_ILV_CHAN_COD1_8: 1749 hash = B_TRUE; 1750 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD1_8CH; 1751 break; 1752 default: 1753 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel " 1754 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1755 dfno, inst, chan_ileave); 1756 return (EINVAL); 1757 } 1758 1759 /* 1760 * If hashing is enabled, note which hashing rules apply to this 1761 * address. This is done to smooth over the differences between DFv3 and 1762 * DFv4, where the flags are in the rules themselves in the latter, but 1763 * global today. 1764 */ 1765 if (hash) { 1766 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_16_18) != 0) { 1767 rule->ddr_flags |= DF_DRAM_F_HASH_16_18; 1768 } 1769 1770 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_21_23) != 0) { 1771 rule->ddr_flags |= DF_DRAM_F_HASH_21_23; 1772 } 1773 1774 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_30_32) != 0) { 1775 rule->ddr_flags |= DF_DRAM_F_HASH_30_32; 1776 } 1777 } 1778 1779 /* 1780 * While DFv4 makes remapping explicit, it is basically always enabled 1781 * and used on supported platforms prior to that point. So flag such 1782 * supported platforms as ones that need to do this. On those systems 1783 * there is only one set of remap rules for an entire DF that are 1784 * determined based on the target socket. To indicate that we use the 1785 * DF_DRAM_F_REMAP_SOCK flag below and skip setting a remap target. 1786 */ 1787 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) { 1788 rule->ddr_flags |= DF_DRAM_F_REMAP_EN | DF_DRAM_F_REMAP_SOCK; 1789 } 1790 1791 rule->ddr_limit = (dlimit << DF_DRAM_LIMIT_V2_LIMIT_SHIFT) + 1792 DF_DRAM_LIMIT_V2_LIMIT_EXCL; 1793 rule->ddr_dest_fabid = dest; 1794 1795 return (0); 1796 } 1797 1798 static int 1799 zen_umc_read_dram_rule_df_4(zen_umc_t *umc, const uint_t dfno, 1800 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule) 1801 { 1802 int ret; 1803 uint16_t addr_ileave; 1804 uint32_t base, limit, ilv, ctl; 1805 1806 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4(ruleno), 1807 &base)) != 0) { 1808 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base " 1809 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1810 return (ret); 1811 } 1812 1813 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4(ruleno), 1814 &limit)) != 0) { 1815 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit " 1816 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1817 return (ret); 1818 } 1819 1820 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4(ruleno), 1821 &ilv)) != 0) { 1822 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 1823 "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno, 1824 inst, ret); 1825 return (ret); 1826 } 1827 1828 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4(ruleno), 1829 &ctl)) != 0) { 1830 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control " 1831 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1832 return (ret); 1833 } 1834 1835 rule->ddr_raw_base = base; 1836 rule->ddr_raw_limit = limit; 1837 rule->ddr_raw_ileave = ilv; 1838 rule->ddr_raw_ctrl = ctl; 1839 1840 if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) { 1841 return (0); 1842 } 1843 1844 rule->ddr_flags |= DF_DRAM_F_VALID; 1845 rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base); 1846 rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT; 1847 rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit); 1848 rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) + 1849 DF_DRAM_LIMIT_V4_LIMIT_EXCL; 1850 rule->ddr_dest_fabid = DF_DRAM_CTL_V4_GET_DEST_ID(ctl); 1851 1852 if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) { 1853 rule->ddr_flags |= DF_DRAM_F_HASH_30_32; 1854 } 1855 1856 if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) { 1857 rule->ddr_flags |= DF_DRAM_F_HASH_21_23; 1858 } 1859 1860 if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) { 1861 rule->ddr_flags |= DF_DRAM_F_HASH_16_18; 1862 } 1863 1864 if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) { 1865 rule->ddr_flags |= DF_DRAM_F_REMAP_EN; 1866 rule->ddr_remap_ent = DF_DRAM_CTL_V4_GET_REMAP_SEL(ctl); 1867 } 1868 1869 if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) { 1870 rule->ddr_flags |= DF_DRAM_F_HOLE; 1871 } 1872 1873 if (DF_DRAM_CTL_V4_GET_SCM(ctl) != 0) { 1874 rule->ddr_flags |= DF_DRAM_F_SCM; 1875 } 1876 1877 rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv); 1878 rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv); 1879 switch (DF_DRAM_ILV_V4_GET_CHAN(ilv)) { 1880 case DF_DRAM_ILV_V4_CHAN_1: 1881 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH; 1882 break; 1883 case DF_DRAM_ILV_V4_CHAN_2: 1884 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH; 1885 break; 1886 case DF_DRAM_ILV_V4_CHAN_4: 1887 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH; 1888 break; 1889 case DF_DRAM_ILV_V4_CHAN_8: 1890 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH; 1891 break; 1892 case DF_DRAM_ILV_V4_CHAN_16: 1893 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH; 1894 break; 1895 case DF_DRAM_ILV_V4_CHAN_32: 1896 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH; 1897 break; 1898 case DF_DRAM_ILV_V4_CHAN_NPS4_2CH: 1899 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH; 1900 break; 1901 case DF_DRAM_ILV_V4_CHAN_NPS2_4CH: 1902 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH; 1903 break; 1904 case DF_DRAM_ILV_V4_CHAN_NPS1_8CH: 1905 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH; 1906 break; 1907 case DF_DRAM_ILV_V4_CHAN_NPS4_3CH: 1908 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH; 1909 break; 1910 case DF_DRAM_ILV_V4_CHAN_NPS2_6CH: 1911 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH; 1912 break; 1913 case DF_DRAM_ILV_V4_CHAN_NPS1_12CH: 1914 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH; 1915 break; 1916 case DF_DRAM_ILV_V4_CHAN_NPS2_5CH: 1917 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH; 1918 break; 1919 case DF_DRAM_ILV_V4_CHAN_NPS1_10CH: 1920 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH; 1921 break; 1922 default: 1923 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel " 1924 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1925 dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv)); 1926 1927 break; 1928 } 1929 1930 addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv); 1931 switch (addr_ileave) { 1932 case DF_DRAM_ILV_ADDR_8: 1933 case DF_DRAM_ILV_ADDR_9: 1934 case DF_DRAM_ILV_ADDR_10: 1935 case DF_DRAM_ILV_ADDR_11: 1936 case DF_DRAM_ILV_ADDR_12: 1937 break; 1938 default: 1939 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address " 1940 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1941 dfno, inst, addr_ileave); 1942 return (EINVAL); 1943 } 1944 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave; 1945 1946 return (0); 1947 } 1948 1949 static int 1950 zen_umc_read_dram_rule_df_4d2(zen_umc_t *umc, const uint_t dfno, 1951 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule) 1952 { 1953 int ret; 1954 uint16_t addr_ileave; 1955 uint32_t base, limit, ilv, ctl; 1956 1957 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4D2(ruleno), 1958 &base)) != 0) { 1959 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base " 1960 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1961 return (ret); 1962 } 1963 1964 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4D2(ruleno), 1965 &limit)) != 0) { 1966 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit " 1967 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1968 return (ret); 1969 } 1970 1971 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4D2(ruleno), 1972 &ilv)) != 0) { 1973 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 1974 "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno, 1975 inst, ret); 1976 return (ret); 1977 } 1978 1979 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4D2(ruleno), 1980 &ctl)) != 0) { 1981 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control " 1982 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1983 return (ret); 1984 } 1985 1986 rule->ddr_raw_base = base; 1987 rule->ddr_raw_limit = limit; 1988 rule->ddr_raw_ileave = ilv; 1989 rule->ddr_raw_ctrl = ctl; 1990 1991 if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) { 1992 return (0); 1993 } 1994 1995 rule->ddr_flags |= DF_DRAM_F_VALID; 1996 rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base); 1997 rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT; 1998 rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit); 1999 rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) + 2000 DF_DRAM_LIMIT_V4_LIMIT_EXCL; 2001 rule->ddr_dest_fabid = DF_DRAM_CTL_V4D2_GET_DEST_ID(ctl); 2002 2003 if (DF_DRAM_CTL_V4D2_GET_HASH_1T(ctl) != 0) { 2004 rule->ddr_flags |= DF_DRAM_F_HASH_40_42; 2005 } 2006 2007 if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) { 2008 rule->ddr_flags |= DF_DRAM_F_HASH_30_32; 2009 } 2010 2011 if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) { 2012 rule->ddr_flags |= DF_DRAM_F_HASH_21_23; 2013 } 2014 2015 if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) { 2016 rule->ddr_flags |= DF_DRAM_F_HASH_16_18; 2017 } 2018 2019 if (DF_DRAM_CTL_V4D2_GET_HASH_4K(ctl) != 0) { 2020 rule->ddr_flags |= DF_DRAM_F_HASH_12_14; 2021 } 2022 2023 if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) { 2024 rule->ddr_flags |= DF_DRAM_F_REMAP_EN; 2025 rule->ddr_remap_ent = DF_DRAM_CTL_V4D2_GET_REMAP_SEL(ctl); 2026 } 2027 2028 if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) { 2029 rule->ddr_flags |= DF_DRAM_F_HOLE; 2030 } 2031 2032 if (DF_DRAM_CTL_V4_GET_SCM(ctl) != 0) { 2033 rule->ddr_flags |= DF_DRAM_F_SCM; 2034 } 2035 2036 rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv); 2037 rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv); 2038 switch (DF_DRAM_ILV_V4D2_GET_CHAN(ilv)) { 2039 case DF_DRAM_ILV_V4D2_CHAN_1: 2040 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH; 2041 break; 2042 case DF_DRAM_ILV_V4D2_CHAN_2: 2043 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH; 2044 break; 2045 case DF_DRAM_ILV_V4D2_CHAN_4: 2046 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH; 2047 break; 2048 case DF_DRAM_ILV_V4D2_CHAN_8: 2049 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH; 2050 break; 2051 case DF_DRAM_ILV_V4D2_CHAN_16: 2052 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH; 2053 break; 2054 case DF_DRAM_ILV_V4D2_CHAN_32: 2055 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH; 2056 break; 2057 case DF_DRAM_ILV_V4D2_CHAN_NPS1_16S8CH_1K: 2058 if (rule->ddr_sock_ileave_bits == 0) { 2059 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_16CH_1K; 2060 } else { 2061 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_1K; 2062 } 2063 break; 2064 case DF_DRAM_ILV_V4D2_CHAN_NPS0_24CH_1K: 2065 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS0_24CH_1K; 2066 break; 2067 case DF_DRAM_ILV_V4D2_CHAN_NPS4_2CH_1K: 2068 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH_1K; 2069 break; 2070 case DF_DRAM_ILV_V4D2_CHAN_NPS2_4CH_1K: 2071 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_1K; 2072 break; 2073 case DF_DRAM_ILV_V4D2_CHAN_NPS1_8S4CH_1K: 2074 if (rule->ddr_sock_ileave_bits == 0) { 2075 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_1K; 2076 } else { 2077 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_1K; 2078 } 2079 break; 2080 case DF_DRAM_ILV_V4D2_CHAN_NPS4_3CH_1K: 2081 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH_1K; 2082 break; 2083 case DF_DRAM_ILV_V4D2_CHAN_NPS2_6CH_1K: 2084 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH_1K; 2085 break; 2086 case DF_DRAM_ILV_V4D2_CHAN_NPS1_12CH_1K: 2087 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH_1K; 2088 break; 2089 case DF_DRAM_ILV_V4D2_CHAN_NPS2_5CH_1K: 2090 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH_1K; 2091 break; 2092 case DF_DRAM_ILV_V4D2_CHAN_NPS1_10CH_1K: 2093 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH_1K; 2094 break; 2095 case DF_DRAM_ILV_V4D2_CHAN_MI3H_8CH: 2096 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_8CH; 2097 break; 2098 case DF_DRAM_ILV_V4D2_CHAN_MI3H_16CH: 2099 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_16CH; 2100 break; 2101 case DF_DRAM_ILV_V4D2_CHAN_MI3H_32CH: 2102 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_MI3H_32CH; 2103 break; 2104 case DF_DRAM_ILV_V4D2_CHAN_NPS4_2CH_2K: 2105 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH_2K; 2106 break; 2107 case DF_DRAM_ILV_V4D2_CHAN_NPS2_4CH_2K: 2108 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_2K; 2109 break; 2110 case DF_DRAM_ILV_V4D2_CHAN_NPS1_8S4CH_2K: 2111 if (rule->ddr_sock_ileave_bits == 0) { 2112 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_2K; 2113 } else { 2114 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_4CH_2K; 2115 } 2116 break; 2117 case DF_DRAM_ILV_V4D2_CHAN_NPS1_16S8CH_2K: 2118 if (rule->ddr_sock_ileave_bits == 0) { 2119 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_16CH_2K; 2120 } else { 2121 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH_2K; 2122 } 2123 break; 2124 case DF_DRAM_ILV_V4D2_CHAN_NPS4_3CH_2K: 2125 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH_2K; 2126 break; 2127 case DF_DRAM_ILV_V4D2_CHAN_NPS2_6CH_2K: 2128 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH_2K; 2129 break; 2130 case DF_DRAM_ILV_V4D2_CHAN_NPS1_12CH_2K: 2131 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH_2K; 2132 break; 2133 case DF_DRAM_ILV_V4D2_CHAN_NPS0_24CH_2K: 2134 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS0_24CH_2K; 2135 break; 2136 case DF_DRAM_ILV_V4D2_CHAN_NPS2_5CH_2K: 2137 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH_2K; 2138 break; 2139 case DF_DRAM_ILV_V4D2_CHAN_NPS2_10CH_2K: 2140 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH_2K; 2141 break; 2142 default: 2143 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel " 2144 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 2145 dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv)); 2146 break; 2147 } 2148 2149 addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv); 2150 switch (addr_ileave) { 2151 case DF_DRAM_ILV_ADDR_8: 2152 case DF_DRAM_ILV_ADDR_9: 2153 case DF_DRAM_ILV_ADDR_10: 2154 case DF_DRAM_ILV_ADDR_11: 2155 case DF_DRAM_ILV_ADDR_12: 2156 break; 2157 default: 2158 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address " 2159 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 2160 dfno, inst, addr_ileave); 2161 return (EINVAL); 2162 } 2163 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave; 2164 2165 return (0); 2166 } 2167 2168 static int 2169 zen_umc_read_dram_rule(zen_umc_t *umc, const uint_t dfno, const uint_t instid, 2170 const uint_t ruleno, df_dram_rule_t *rule) 2171 { 2172 int ret; 2173 2174 switch (umc->umc_df_rev) { 2175 case DF_REV_2: 2176 case DF_REV_3: 2177 case DF_REV_3P5: 2178 ret = zen_umc_read_dram_rule_df_23(umc, dfno, instid, ruleno, 2179 rule); 2180 break; 2181 case DF_REV_4: 2182 ret = zen_umc_read_dram_rule_df_4(umc, dfno, instid, ruleno, 2183 rule); 2184 break; 2185 case DF_REV_4D2: 2186 ret = zen_umc_read_dram_rule_df_4d2(umc, dfno, instid, ruleno, 2187 rule); 2188 break; 2189 default: 2190 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 2191 "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev); 2192 return (-1); 2193 } 2194 2195 if (ret != 0) { 2196 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 2197 "rule %u on df/inst 0x%x/0x%x: %d", ruleno, 2198 dfno, instid, ret); 2199 return (-1); 2200 } 2201 2202 return (0); 2203 } 2204 2205 /* 2206 * The Extended remapper has up to 4 remap rule sets. Each set addresses up to 2207 * 16 remap rules (ala DFv4), but the width of the targets is larger so they are 2208 * all split up amongst 3 registers instead. CPUs indicate support for this in 2209 * the DF::DfCapability register. Not all CPUs actually use all such entries. We 2210 * will read all entries, even if they are not in the PPR with the assumption 2211 * that a CPU DRAM rule will only ever refer to the ones that exist for the 2212 * moment. Our expectation is that these reserved registers are all 0s or all 2213 * 1s, but that has yet to be proven. 2214 */ 2215 static int 2216 zen_umc_read_extremap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid) 2217 { 2218 const uint_t dfno = df->zud_dfno; 2219 const df_reg_def_t remapA[ZEN_UMC_MAX_CS_REMAPS] = { 2220 DF_CS_REMAP0A_V4D2, DF_CS_REMAP1A_V4D2, DF_CS_REMAP2A_V4D2, 2221 DF_CS_REMAP3A_V4D2 }; 2222 const df_reg_def_t remapB[ZEN_UMC_MAX_CS_REMAPS] = { 2223 DF_CS_REMAP0B_V4D2, DF_CS_REMAP1B_V4D2, DF_CS_REMAP2B_V4D2, 2224 DF_CS_REMAP3B_V4D2 }; 2225 const df_reg_def_t remapC[ZEN_UMC_MAX_CS_REMAPS] = { 2226 DF_CS_REMAP0C_V4D2, DF_CS_REMAP1C_V4D2, DF_CS_REMAP2C_V4D2, 2227 DF_CS_REMAP3C_V4D2 }; 2228 2229 df->zud_cs_nremap = ZEN_UMC_MAX_CS_REMAPS; 2230 for (uint_t i = 0; i < df->zud_cs_nremap; i++) { 2231 int ret; 2232 uint32_t rm[3]; 2233 zen_umc_cs_remap_t *remap = &df->zud_remap[i]; 2234 2235 if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i], 2236 &rm[0])) != 0) { 2237 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 2238 "df/inst 0x%x/0x%x remap rule %uA: %d", dfno, 2239 instid, i, ret); 2240 return (-1); 2241 } 2242 2243 if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i], 2244 &rm[1])) != 0) { 2245 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 2246 "df/inst 0x%x/0x%x remap rule %uB: %d", dfno, 2247 instid, i, ret); 2248 return (-1); 2249 } 2250 2251 if ((ret = amdzen_c_df_read32(dfno, instid, remapC[i], 2252 &rm[2])) != 0) { 2253 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 2254 "df/inst 0x%x/0x%x remap rule %uC: %d", dfno, 2255 instid, i, ret); 2256 return (-1); 2257 } 2258 2259 /* 2260 * Remap rule A has CS 0-5, B 6-11, C 12-15 2261 */ 2262 remap->csr_nremaps = ZEN_UMC_MAX_REMAP_ENTS; 2263 for (uint_t ent = 0; ent < remap->csr_nremaps; ent++) { 2264 uint_t reg = ent / ZEN_UMC_REMAP_PER_REG_4D2; 2265 uint_t idx = ent % ZEN_UMC_REMAP_PER_REG_4D2; 2266 remap->csr_remaps[ent] = 2267 DF_CS_REMAP_GET_CSX_V4B(rm[reg], idx); 2268 } 2269 } 2270 return (0); 2271 } 2272 2273 static int 2274 zen_umc_read_remap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid) 2275 { 2276 uint_t nremaps, nents; 2277 const uint_t dfno = df->zud_dfno; 2278 const df_reg_def_t milan_remap0[ZEN_UMC_MILAN_CS_NREMAPS] = { 2279 DF_SKT0_CS_REMAP0_V3, DF_SKT1_CS_REMAP0_V3 }; 2280 const df_reg_def_t milan_remap1[ZEN_UMC_MILAN_CS_NREMAPS] = { 2281 DF_SKT0_CS_REMAP1_V3, DF_SKT1_CS_REMAP1_V3 }; 2282 const df_reg_def_t dfv4_remapA[ZEN_UMC_MAX_CS_REMAPS] = { 2283 DF_CS_REMAP0A_V4, DF_CS_REMAP1A_V4, DF_CS_REMAP2A_V4, 2284 DF_CS_REMAP3A_V4 }; 2285 const df_reg_def_t dfv4_remapB[ZEN_UMC_MAX_CS_REMAPS] = { 2286 DF_CS_REMAP0B_V4, DF_CS_REMAP1B_V4, DF_CS_REMAP2B_V4, 2287 DF_CS_REMAP3B_V4 }; 2288 const df_reg_def_t *remapA, *remapB; 2289 2290 2291 switch (umc->umc_df_rev) { 2292 case DF_REV_3: 2293 nremaps = ZEN_UMC_MILAN_CS_NREMAPS; 2294 nents = ZEN_UMC_MILAN_REMAP_ENTS; 2295 remapA = milan_remap0; 2296 remapB = milan_remap1; 2297 break; 2298 case DF_REV_4: 2299 nremaps = ZEN_UMC_MAX_CS_REMAPS; 2300 nents = ZEN_UMC_MAX_REMAP_ENTS; 2301 remapA = dfv4_remapA; 2302 remapB = dfv4_remapB; 2303 break; 2304 case DF_REV_4D2: 2305 return (zen_umc_read_extremap(umc, df, instid)); 2306 default: 2307 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported DF " 2308 "revision processing remap rules: 0x%x", umc->umc_df_rev); 2309 return (-1); 2310 } 2311 2312 df->zud_cs_nremap = nremaps; 2313 for (uint_t i = 0; i < nremaps; i++) { 2314 int ret; 2315 uint32_t rm[2]; 2316 zen_umc_cs_remap_t *remap = &df->zud_remap[i]; 2317 2318 if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i], 2319 &rm[0])) != 0) { 2320 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 2321 "df/inst 0x%x/0x%x remap socket %u-0/A: %d", dfno, 2322 instid, i, ret); 2323 return (-1); 2324 } 2325 2326 if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i], 2327 &rm[1])) != 0) { 2328 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 2329 "df/inst 0x%x/0x%x remap socket %u-1/B: %d", dfno, 2330 instid, i, ret); 2331 return (-1); 2332 } 2333 2334 remap->csr_nremaps = nents; 2335 for (uint_t ent = 0; ent < remap->csr_nremaps; ent++) { 2336 uint_t reg = ent / ZEN_UMC_REMAP_PER_REG; 2337 uint_t idx = ent % ZEN_UMC_REMAP_PER_REG; 2338 remap->csr_remaps[ent] = DF_CS_REMAP_GET_CSX(rm[reg], 2339 idx); 2340 } 2341 } 2342 2343 return (0); 2344 } 2345 2346 /* 2347 * Now that we have a CCM, we have several different tasks ahead of us: 2348 * 2349 * o Determine whether or not the DRAM hole is valid. 2350 * o Snapshot all of the system address rules and translate them into our 2351 * generic format. 2352 * o Determine if there are any rules to retarget things (currently 2353 * Milan/Genoa). 2354 * o Determine if there are any other hashing rules enabled. 2355 * 2356 * We only require this from a single CCM as these are currently required to be 2357 * the same across all of them. 2358 */ 2359 static int 2360 zen_umc_fill_ccm_cb(const uint_t dfno, const uint32_t fabid, 2361 const uint32_t instid, void *arg) 2362 { 2363 zen_umc_t *umc = arg; 2364 zen_umc_df_t *df = &umc->umc_dfs[dfno]; 2365 df_reg_def_t hole; 2366 int ret; 2367 uint32_t val; 2368 2369 df->zud_dfno = dfno; 2370 df->zud_ccm_inst = instid; 2371 2372 /* 2373 * Read the DF::DfCapability register. This is not instance specific. 2374 */ 2375 if ((ret = amdzen_c_df_read32_bcast(dfno, DF_CAPAB, &df->zud_capab)) != 2376 0) { 2377 dev_err(umc->umc_dip, CE_WARN, "!failed to read DF Capability " 2378 "register: %d", ret); 2379 return (-1); 2380 } 2381 2382 /* 2383 * Next get the DRAM hole. This has the same layout, albeit different 2384 * registers across our different platforms. 2385 */ 2386 switch (umc->umc_df_rev) { 2387 case DF_REV_2: 2388 case DF_REV_3: 2389 case DF_REV_3P5: 2390 hole = DF_DRAM_HOLE_V2; 2391 break; 2392 case DF_REV_4: 2393 case DF_REV_4D2: 2394 hole = DF_DRAM_HOLE_V4; 2395 break; 2396 default: 2397 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 2398 "DF version: 0x%x", umc->umc_df_rev); 2399 return (-1); 2400 } 2401 2402 if ((ret = amdzen_c_df_read32(dfno, instid, hole, &val)) != 0) { 2403 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM Hole: %d", 2404 ret); 2405 return (-1); 2406 } 2407 2408 df->zud_hole_raw = val; 2409 if (DF_DRAM_HOLE_GET_VALID(val)) { 2410 uint64_t t; 2411 2412 df->zud_flags |= ZEN_UMC_DF_F_HOLE_VALID; 2413 t = DF_DRAM_HOLE_GET_BASE(val); 2414 df->zud_hole_base = t << DF_DRAM_HOLE_BASE_SHIFT; 2415 } 2416 2417 /* 2418 * Prior to Zen 4, the hash information was global and applied to all 2419 * COD rules globally. Check if we're on such a system and snapshot this 2420 * so we can use it during the rule application. Note, this was added in 2421 * DFv3. 2422 */ 2423 if (umc->umc_df_rev == DF_REV_3 || umc->umc_df_rev == DF_REV_3P5) { 2424 uint32_t globctl; 2425 2426 if ((ret = amdzen_c_df_read32(dfno, instid, DF_GLOB_CTL_V3, 2427 &globctl)) != 0) { 2428 dev_err(umc->umc_dip, CE_WARN, "!failed to read global " 2429 "control: %d", ret); 2430 return (-1); 2431 } 2432 2433 df->zud_glob_ctl_raw = globctl; 2434 if (DF_GLOB_CTL_V3_GET_HASH_1G(globctl) != 0) { 2435 df->zud_flags |= ZEN_UMC_DF_F_HASH_30_32; 2436 } 2437 2438 if (DF_GLOB_CTL_V3_GET_HASH_2M(globctl) != 0) { 2439 df->zud_flags |= ZEN_UMC_DF_F_HASH_21_23; 2440 } 2441 2442 if (DF_GLOB_CTL_V3_GET_HASH_64K(globctl) != 0) { 2443 df->zud_flags |= ZEN_UMC_DF_F_HASH_16_18; 2444 } 2445 } 2446 2447 df->zud_dram_nrules = umc->umc_fdata->zufd_dram_nrules; 2448 for (uint_t i = 0; i < umc->umc_fdata->zufd_dram_nrules; i++) { 2449 if (zen_umc_read_dram_rule(umc, dfno, instid, i, 2450 &df->zud_rules[i]) != 0) { 2451 return (-1); 2452 } 2453 } 2454 2455 /* 2456 * Once AMD got past DF v4.0 there was a feature bit that indicates 2457 * support for the remapping engine in the DF_CAPAB (DF::DfCapability) 2458 * register. Prior to that we must use our table. 2459 */ 2460 if ((umc->umc_df_rev >= DF_REV_4D2 && 2461 DF_CAPAB_GET_EXTCSREMAP(df->zud_capab) != 0) || 2462 (umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) { 2463 if (zen_umc_read_remap(umc, df, instid) != 0) { 2464 return (-1); 2465 } 2466 } 2467 2468 /* 2469 * We only want a single entry, so always return 1 to terminate us 2470 * early. 2471 */ 2472 return (1); 2473 } 2474 2475 /* 2476 * At this point we can go through and calculate the size of the DIMM that we've 2477 * found. While it would be nice to determine this from the SPD data, we can 2478 * figure this out entirely based upon the information in the memory controller. 2479 * 2480 * This works by first noting that DDR4, LPDDR4, DDR5, and LPDDR5 are all built 2481 * around 64-bit data channels. This means that each row and column provides up 2482 * 64-bits (ignoring ECC) of data. There are a number of banks and bank groups. 2483 * The memory controller tracks the total number of bits that are used for each. 2484 * While DDR5 introduces sub-channels, we don't need to worry about those here, 2485 * because ultimately the sub-channel just splits the 64-bit bus we're assuming 2486 * into 2x 32-bit buses. While they can be independently selected, they should 2487 * have equivalent capacities. 2488 * 2489 * The most confusing part of this is that there is one of these related to each 2490 * rank on the device. The UMC natively has two 'chip-selects', each of which is 2491 * used to correspond to a rank. There are then separately multiple rm bits in 2492 * each chip-select. As far as we can tell the PSP or SMU programs the number of 2493 * rm bits to be zero when you have a dual-rank device. 2494 * 2495 * We end up summing each chip-select rather than assuming that the chip-selects 2496 * are identical. In theory some amount of asymmetric DIMMs exist in the wild, 2497 * but we don't know of many systems using them. 2498 */ 2499 static void 2500 zen_umc_calc_dimm_size(umc_dimm_t *dimm) 2501 { 2502 dimm->ud_dimm_size = 0; 2503 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) { 2504 uint64_t nrc; 2505 const umc_cs_t *cs = &dimm->ud_cs[i]; 2506 2507 if ((cs->ucs_flags & UMC_CS_F_DECODE_EN) == 0) { 2508 continue; 2509 } 2510 2511 nrc = cs->ucs_nrow_lo + cs->ucs_nrow_hi + cs->ucs_ncol; 2512 dimm->ud_dimm_size += (8ULL << nrc) * (1 << cs->ucs_nbanks) * 2513 (1 << cs->ucs_nrm); 2514 } 2515 } 2516 2517 /* 2518 * This is used to fill in the common properties about a DIMM. This should occur 2519 * after the rank information has been filled out. The information used is the 2520 * same between DDR4 and DDR5 DIMMs. The only major difference is the register 2521 * offset. 2522 */ 2523 static boolean_t 2524 zen_umc_fill_dimm_common(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan, 2525 const uint_t dimmno, boolean_t ddr4_style) 2526 { 2527 umc_dimm_t *dimm; 2528 int ret; 2529 smn_reg_t reg; 2530 uint32_t val; 2531 const uint32_t id = chan->chan_logid; 2532 2533 dimm = &chan->chan_dimms[dimmno]; 2534 dimm->ud_dimmno = dimmno; 2535 2536 if (ddr4_style) { 2537 reg = UMC_DIMMCFG_DDR4(id, dimmno); 2538 } else { 2539 reg = UMC_DIMMCFG_DDR5(id, dimmno); 2540 } 2541 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2542 dev_err(umc->umc_dip, CE_WARN, "failed to read DIMM " 2543 "configuration register %x: %d", SMN_REG_ADDR(reg), ret); 2544 return (B_FALSE); 2545 } 2546 dimm->ud_dimmcfg_raw = val; 2547 2548 if (UMC_DIMMCFG_GET_X16(val) != 0) { 2549 dimm->ud_width = UMC_DIMM_W_X16; 2550 } else if (UMC_DIMMCFG_GET_X4(val) != 0) { 2551 dimm->ud_width = UMC_DIMM_W_X4; 2552 } else { 2553 dimm->ud_width = UMC_DIMM_W_X8; 2554 } 2555 2556 if (UMC_DIMMCFG_GET_3DS(val) != 0) { 2557 dimm->ud_kind = UMC_DIMM_K_3DS_RDIMM; 2558 } else if (UMC_DIMMCFG_GET_LRDIMM(val) != 0) { 2559 dimm->ud_kind = UMC_DIMM_K_LRDIMM; 2560 } else if (UMC_DIMMCFG_GET_RDIMM(val) != 0) { 2561 dimm->ud_kind = UMC_DIMM_K_RDIMM; 2562 } else { 2563 dimm->ud_kind = UMC_DIMM_K_UDIMM; 2564 } 2565 2566 /* 2567 * DIMM information in a UMC can be somewhat confusing. There are quite 2568 * a number of non-zero reset values that are here. Flag whether or not 2569 * we think this entry should be usable based on enabled chip-selects. 2570 */ 2571 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) { 2572 if ((dimm->ud_cs[i].ucs_flags & UMC_CS_F_DECODE_EN) != 0) { 2573 dimm->ud_flags |= UMC_DIMM_F_VALID; 2574 break; 2575 } 2576 } 2577 2578 /* 2579 * The remaining calculations we only want to perform if we have actual 2580 * data for a DIMM. 2581 */ 2582 if ((dimm->ud_flags & UMC_DIMM_F_VALID) == 0) { 2583 return (B_TRUE); 2584 } 2585 2586 zen_umc_calc_dimm_size(dimm); 2587 2588 return (B_TRUE); 2589 } 2590 2591 /* 2592 * Fill all the information about a DDR4 DIMM. In the DDR4 UMC, some of this 2593 * information is on a per-chip select basis while at other times it is on a 2594 * per-DIMM basis. In general, chip-selects 0/1 correspond to DIMM 0, and 2595 * chip-selects 2/3 correspond to DIMM 1. To normalize things with the DDR5 UMC 2596 * which generally has things stored on a per-rank/chips-select basis, we 2597 * duplicate information that is DIMM-wide into the chip-select data structure 2598 * (umc_cs_t). 2599 */ 2600 static boolean_t 2601 zen_umc_fill_chan_dimm_ddr4(zen_umc_t *umc, zen_umc_df_t *df, 2602 zen_umc_chan_t *chan, const uint_t dimmno) 2603 { 2604 umc_dimm_t *dimm; 2605 umc_cs_t *cs0, *cs1; 2606 const uint32_t id = chan->chan_logid; 2607 int ret; 2608 uint32_t val; 2609 smn_reg_t reg; 2610 2611 ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS); 2612 dimm = &chan->chan_dimms[dimmno]; 2613 cs0 = &dimm->ud_cs[0]; 2614 cs1 = &dimm->ud_cs[1]; 2615 2616 /* 2617 * DDR4 organization has initial data that exists on a per-chip select 2618 * basis. The rest of it is on a per-DIMM basis. First we grab the 2619 * per-chip-select data. After this for loop, we will always duplicate 2620 * all data that we gather into both chip-selects. 2621 */ 2622 for (uint_t i = 0; i < ZEN_UMC_MAX_CS_PER_DIMM; i++) { 2623 uint64_t addr; 2624 const uint16_t reginst = i + dimmno * 2; 2625 reg = UMC_BASE(id, reginst); 2626 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2627 dev_err(umc->umc_dip, CE_WARN, "failed to read base " 2628 "register %x: %d", SMN_REG_ADDR(reg), ret); 2629 return (B_FALSE); 2630 } 2631 2632 addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT; 2633 dimm->ud_cs[i].ucs_base.udb_base = addr; 2634 dimm->ud_cs[i].ucs_base.udb_valid = UMC_BASE_GET_EN(val); 2635 2636 reg = UMC_BASE_SEC(id, reginst); 2637 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2638 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2639 "secondary base register %x: %d", SMN_REG_ADDR(reg), 2640 ret); 2641 return (B_FALSE); 2642 } 2643 2644 addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT; 2645 dimm->ud_cs[i].ucs_sec.udb_base = addr; 2646 dimm->ud_cs[i].ucs_sec.udb_valid = UMC_BASE_GET_EN(val); 2647 2648 if (dimm->ud_cs[i].ucs_base.udb_valid || 2649 dimm->ud_cs[i].ucs_sec.udb_valid) { 2650 dimm->ud_cs[i].ucs_flags |= UMC_CS_F_DECODE_EN; 2651 } 2652 } 2653 2654 reg = UMC_MASK_DDR4(id, dimmno); 2655 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2656 dev_err(umc->umc_dip, CE_WARN, "failed to read mask register " 2657 "%x: %d", SMN_REG_ADDR(reg), ret); 2658 return (B_FALSE); 2659 } 2660 2661 /* 2662 * When we extract the masks, hardware only checks a limited range of 2663 * bits. Therefore we need to always OR in those lower order bits. 2664 */ 2665 cs0->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 2666 UMC_MASK_ADDR_SHIFT; 2667 cs0->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 2668 cs1->ucs_base_mask = cs0->ucs_base_mask; 2669 2670 reg = UMC_MASK_SEC_DDR4(id, dimmno); 2671 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2672 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask " 2673 "register %x: %d", SMN_REG_ADDR(reg), ret); 2674 return (B_FALSE); 2675 } 2676 cs0->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 2677 UMC_MASK_ADDR_SHIFT; 2678 cs0->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 2679 cs1->ucs_sec_mask = cs0->ucs_sec_mask; 2680 2681 reg = UMC_ADDRCFG_DDR4(id, dimmno); 2682 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2683 dev_err(umc->umc_dip, CE_WARN, "failed to read address config " 2684 "register %x: %d", SMN_REG_ADDR(reg), ret); 2685 return (B_FALSE); 2686 } 2687 2688 cs0->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) + 2689 UMC_ADDRCFG_NBANK_BITS_BASE; 2690 cs1->ucs_nbanks = cs0->ucs_nbanks; 2691 cs0->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) + 2692 UMC_ADDRCFG_NCOL_BITS_BASE; 2693 cs1->ucs_ncol = cs0->ucs_ncol; 2694 cs0->ucs_nrow_hi = UMC_ADDRCFG_DDR4_GET_NROW_BITS_HI(val); 2695 cs1->ucs_nrow_hi = cs0->ucs_nrow_hi; 2696 cs0->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) + 2697 UMC_ADDRCFG_NROW_BITS_LO_BASE; 2698 cs1->ucs_nrow_lo = cs0->ucs_nrow_lo; 2699 cs0->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val); 2700 cs1->ucs_nbank_groups = cs0->ucs_nbank_groups; 2701 /* 2702 * As the chip-select XORs don't always show up, use a dummy value 2703 * that'll result in no change occurring here. 2704 */ 2705 cs0->ucs_cs_xor = cs1->ucs_cs_xor = 0; 2706 2707 /* 2708 * APUs don't seem to support various rank select bits. 2709 */ 2710 if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4) { 2711 cs0->ucs_nrm = UMC_ADDRCFG_DDR4_GET_NRM_BITS(val); 2712 cs1->ucs_nrm = cs0->ucs_nrm; 2713 } else { 2714 cs0->ucs_nrm = cs1->ucs_nrm = 0; 2715 } 2716 2717 reg = UMC_ADDRSEL_DDR4(id, dimmno); 2718 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2719 dev_err(umc->umc_dip, CE_WARN, "failed to read bank address " 2720 "select register %x: %d", SMN_REG_ADDR(reg), ret); 2721 return (B_FALSE); 2722 } 2723 cs0->ucs_row_hi_bit = UMC_ADDRSEL_DDR4_GET_ROW_HI(val) + 2724 UMC_ADDRSEL_DDR4_ROW_HI_BASE; 2725 cs1->ucs_row_hi_bit = cs0->ucs_row_hi_bit; 2726 cs0->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) + 2727 UMC_ADDRSEL_ROW_LO_BASE; 2728 cs1->ucs_row_low_bit = cs0->ucs_row_low_bit; 2729 cs0->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) + 2730 UMC_ADDRSEL_BANK_BASE; 2731 cs0->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) + 2732 UMC_ADDRSEL_BANK_BASE; 2733 cs0->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) + 2734 UMC_ADDRSEL_BANK_BASE; 2735 cs0->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) + 2736 UMC_ADDRSEL_BANK_BASE; 2737 cs0->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) + 2738 UMC_ADDRSEL_BANK_BASE; 2739 bcopy(cs0->ucs_bank_bits, cs1->ucs_bank_bits, 2740 sizeof (cs0->ucs_bank_bits)); 2741 2742 reg = UMC_COLSEL_LO_DDR4(id, dimmno); 2743 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2744 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2745 "select low register %x: %d", SMN_REG_ADDR(reg), ret); 2746 return (B_FALSE); 2747 } 2748 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2749 cs0->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) + 2750 UMC_COLSEL_LO_BASE; 2751 } 2752 2753 reg = UMC_COLSEL_HI_DDR4(id, dimmno); 2754 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2755 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2756 "select high register %x: %d", SMN_REG_ADDR(reg), ret); 2757 return (B_FALSE); 2758 } 2759 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2760 cs0->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] = 2761 UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE; 2762 } 2763 bcopy(cs0->ucs_col_bits, cs1->ucs_col_bits, sizeof (cs0->ucs_col_bits)); 2764 2765 /* 2766 * The next two registers give us information about a given rank select. 2767 * In the APUs, the inversion bits are there; however, the actual bit 2768 * selects are not. In this case we read the reserved bits regardless. 2769 * They should be ignored due to the fact that the number of banks is 2770 * zero. 2771 */ 2772 reg = UMC_RMSEL_DDR4(id, dimmno); 2773 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2774 dev_err(umc->umc_dip, CE_WARN, "failed to read rank address " 2775 "select register %x: %d", SMN_REG_ADDR(reg), ret); 2776 return (B_FALSE); 2777 } 2778 cs0->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBE(val); 2779 cs1->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBO(val); 2780 cs0->ucs_rm_bits[0] = UMC_RMSEL_DDR4_GET_RM0(val) + 2781 UMC_RMSEL_BASE; 2782 cs0->ucs_rm_bits[1] = UMC_RMSEL_DDR4_GET_RM1(val) + 2783 UMC_RMSEL_BASE; 2784 cs0->ucs_rm_bits[2] = UMC_RMSEL_DDR4_GET_RM2(val) + 2785 UMC_RMSEL_BASE; 2786 bcopy(cs0->ucs_rm_bits, cs1->ucs_rm_bits, sizeof (cs0->ucs_rm_bits)); 2787 2788 reg = UMC_RMSEL_SEC_DDR4(id, dimmno); 2789 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2790 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary rank " 2791 "address select register %x: %d", SMN_REG_ADDR(reg), ret); 2792 return (B_FALSE); 2793 } 2794 cs0->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBE(val); 2795 cs1->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBO(val); 2796 cs0->ucs_rm_bits_sec[0] = UMC_RMSEL_DDR4_GET_RM0(val) + 2797 UMC_RMSEL_BASE; 2798 cs0->ucs_rm_bits_sec[1] = UMC_RMSEL_DDR4_GET_RM1(val) + 2799 UMC_RMSEL_BASE; 2800 cs0->ucs_rm_bits_sec[2] = UMC_RMSEL_DDR4_GET_RM2(val) + 2801 UMC_RMSEL_BASE; 2802 bcopy(cs0->ucs_rm_bits_sec, cs1->ucs_rm_bits_sec, 2803 sizeof (cs0->ucs_rm_bits_sec)); 2804 2805 return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_TRUE)); 2806 } 2807 2808 /* 2809 * The DDR5 based systems are organized such that almost all the information we 2810 * care about is split between two different chip-select structures in the UMC 2811 * hardware SMN space. 2812 */ 2813 static boolean_t 2814 zen_umc_fill_chan_rank_ddr5(zen_umc_t *umc, zen_umc_df_t *df, 2815 zen_umc_chan_t *chan, const uint_t dimmno, const uint_t rankno) 2816 { 2817 int ret; 2818 umc_cs_t *cs; 2819 uint32_t val; 2820 smn_reg_t reg; 2821 const uint32_t id = chan->chan_logid; 2822 const uint32_t regno = dimmno * 2 + rankno; 2823 2824 ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS); 2825 ASSERT3U(rankno, <, ZEN_UMC_MAX_CS_PER_DIMM); 2826 cs = &chan->chan_dimms[dimmno].ud_cs[rankno]; 2827 2828 reg = UMC_BASE(id, regno); 2829 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2830 dev_err(umc->umc_dip, CE_WARN, "failed to read base " 2831 "register %x: %d", SMN_REG_ADDR(reg), ret); 2832 return (B_FALSE); 2833 } 2834 cs->ucs_base.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) << 2835 UMC_BASE_ADDR_SHIFT; 2836 cs->ucs_base.udb_valid = UMC_BASE_GET_EN(val); 2837 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2838 uint64_t addr; 2839 2840 reg = UMC_BASE_EXT_DDR5(id, regno); 2841 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2842 0) { 2843 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2844 "extended base register %x: %d", SMN_REG_ADDR(reg), 2845 ret); 2846 return (B_FALSE); 2847 } 2848 2849 addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) << 2850 UMC_BASE_EXT_ADDR_SHIFT; 2851 cs->ucs_base.udb_base |= addr; 2852 } 2853 2854 reg = UMC_BASE_SEC(id, regno); 2855 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2856 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary base " 2857 "register %x: %d", SMN_REG_ADDR(reg), ret); 2858 return (B_FALSE); 2859 } 2860 cs->ucs_sec.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) << 2861 UMC_BASE_ADDR_SHIFT; 2862 cs->ucs_sec.udb_valid = UMC_BASE_GET_EN(val); 2863 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2864 uint64_t addr; 2865 2866 reg = UMC_BASE_EXT_SEC_DDR5(id, regno); 2867 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2868 0) { 2869 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2870 "extended secondary base register %x: %d", 2871 SMN_REG_ADDR(reg), ret); 2872 return (B_FALSE); 2873 } 2874 2875 addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) << 2876 UMC_BASE_EXT_ADDR_SHIFT; 2877 cs->ucs_sec.udb_base |= addr; 2878 } 2879 2880 if (cs->ucs_base.udb_valid || cs->ucs_sec.udb_valid) { 2881 cs->ucs_flags |= UMC_CS_F_DECODE_EN; 2882 } 2883 2884 reg = UMC_MASK_DDR5(id, regno); 2885 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2886 dev_err(umc->umc_dip, CE_WARN, "failed to read mask " 2887 "register %x: %d", SMN_REG_ADDR(reg), ret); 2888 return (B_FALSE); 2889 } 2890 cs->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 2891 UMC_MASK_ADDR_SHIFT; 2892 cs->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 2893 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2894 uint64_t addr; 2895 2896 reg = UMC_MASK_EXT_DDR5(id, regno); 2897 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2898 0) { 2899 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2900 "extended mask register %x: %d", SMN_REG_ADDR(reg), 2901 ret); 2902 return (B_FALSE); 2903 } 2904 2905 addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) << 2906 UMC_MASK_EXT_ADDR_SHIFT; 2907 cs->ucs_base_mask |= addr; 2908 } 2909 2910 2911 reg = UMC_MASK_SEC_DDR5(id, regno); 2912 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2913 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask " 2914 "register %x: %d", SMN_REG_ADDR(reg), ret); 2915 return (B_FALSE); 2916 } 2917 cs->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 2918 UMC_MASK_ADDR_SHIFT; 2919 cs->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 2920 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2921 uint64_t addr; 2922 2923 reg = UMC_MASK_EXT_SEC_DDR5(id, regno); 2924 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2925 0) { 2926 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2927 "extended mask register %x: %d", SMN_REG_ADDR(reg), 2928 ret); 2929 return (B_FALSE); 2930 } 2931 2932 addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) << 2933 UMC_MASK_EXT_ADDR_SHIFT; 2934 cs->ucs_sec_mask |= addr; 2935 } 2936 2937 reg = UMC_ADDRCFG_DDR5(id, regno); 2938 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2939 dev_err(umc->umc_dip, CE_WARN, "failed to read address config " 2940 "register %x: %d", SMN_REG_ADDR(reg), ret); 2941 return (B_FALSE); 2942 } 2943 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_CS_XOR) != 0) { 2944 cs->ucs_cs_xor = UMC_ADDRCFG_DDR5_GET_CSXOR(val); 2945 } else { 2946 cs->ucs_cs_xor = 0; 2947 } 2948 cs->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) + 2949 UMC_ADDRCFG_NBANK_BITS_BASE; 2950 cs->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) + 2951 UMC_ADDRCFG_NCOL_BITS_BASE; 2952 cs->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) + 2953 UMC_ADDRCFG_NROW_BITS_LO_BASE; 2954 cs->ucs_nrow_hi = 0; 2955 cs->ucs_nrm = UMC_ADDRCFG_DDR5_GET_NRM_BITS(val); 2956 cs->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val); 2957 2958 reg = UMC_ADDRSEL_DDR5(id, regno); 2959 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2960 dev_err(umc->umc_dip, CE_WARN, "failed to read address select " 2961 "register %x: %d", SMN_REG_ADDR(reg), ret); 2962 return (B_FALSE); 2963 } 2964 cs->ucs_row_hi_bit = 0; 2965 cs->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) + 2966 UMC_ADDRSEL_ROW_LO_BASE; 2967 cs->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) + 2968 UMC_ADDRSEL_BANK_BASE; 2969 cs->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) + 2970 UMC_ADDRSEL_BANK_BASE; 2971 cs->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) + 2972 UMC_ADDRSEL_BANK_BASE; 2973 cs->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) + 2974 UMC_ADDRSEL_BANK_BASE; 2975 cs->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) + 2976 UMC_ADDRSEL_BANK_BASE; 2977 2978 reg = UMC_COLSEL_LO_DDR5(id, regno); 2979 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2980 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2981 "select low register %x: %d", SMN_REG_ADDR(reg), ret); 2982 return (B_FALSE); 2983 } 2984 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2985 cs->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) + 2986 UMC_COLSEL_LO_BASE; 2987 } 2988 2989 reg = UMC_COLSEL_HI_DDR5(id, regno); 2990 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2991 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2992 "select high register %x: %d", SMN_REG_ADDR(reg), ret); 2993 return (B_FALSE); 2994 } 2995 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2996 cs->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] = 2997 UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE; 2998 } 2999 3000 /* 3001 * Time for our friend, the RM Selection register. Like in DDR4 we end 3002 * up reading everything here, even though most others have reserved 3003 * bits here. The intent is that we won't look at the reserved bits 3004 * unless something actually points us there. 3005 */ 3006 reg = UMC_RMSEL_DDR5(id, regno); 3007 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3008 dev_err(umc->umc_dip, CE_WARN, "failed to read rank multiply " 3009 "select register %x: %d", SMN_REG_ADDR(reg), ret); 3010 return (B_FALSE); 3011 } 3012 3013 /* 3014 * DDR5 based devices have a primary and secondary msbs; however, they 3015 * only have a single set of rm bits. To normalize things with the DDR4 3016 * subsystem, we copy the primary bits to the secondary so we can use 3017 * these the same way in the decoder/encoder. 3018 */ 3019 cs->ucs_inv_msbs = UMC_RMSEL_DDR5_GET_INV_MSBS(val); 3020 cs->ucs_inv_msbs_sec = UMC_RMSEL_DDR5_GET_INV_MSBS_SEC(val); 3021 cs->ucs_subchan = UMC_RMSEL_DDR5_GET_SUBCHAN(val) + 3022 UMC_RMSEL_DDR5_SUBCHAN_BASE; 3023 cs->ucs_rm_bits[3] = UMC_RMSEL_DDR5_GET_RM3(val) + UMC_RMSEL_BASE; 3024 cs->ucs_rm_bits[2] = UMC_RMSEL_DDR5_GET_RM2(val) + UMC_RMSEL_BASE; 3025 cs->ucs_rm_bits[1] = UMC_RMSEL_DDR5_GET_RM1(val) + UMC_RMSEL_BASE; 3026 cs->ucs_rm_bits[0] = UMC_RMSEL_DDR5_GET_RM0(val) + UMC_RMSEL_BASE; 3027 bcopy(cs->ucs_rm_bits, cs->ucs_rm_bits_sec, 3028 sizeof (cs->ucs_rm_bits)); 3029 3030 return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_FALSE)); 3031 } 3032 3033 static void 3034 zen_umc_fill_ddr_type(zen_umc_t *umc, zen_umc_chan_t *chan) 3035 { 3036 umc_dimm_type_t dimm = UMC_DIMM_T_UNKNOWN; 3037 uint8_t val; 3038 3039 /* 3040 * The different UMC styles split into two groups. Those that support 3041 * DDR4 and those that support DDR5 (with the hybrid group being in the 3042 * DDR5 style camp). While all the values are consistent between 3043 * different ones (e.g. reserved values correspond to unsupported 3044 * items), we still check types based on the UMC's design type so if we 3045 * see something weird, we don't accidentally use an older value. 3046 */ 3047 val = UMC_UMCCFG_GET_DDR_TYPE(chan->chan_umccfg_raw); 3048 switch (umc->umc_fdata->zufd_umc_style) { 3049 case ZEN_UMC_UMC_S_DDR4: 3050 case ZEN_UMC_UMC_S_DDR4_APU: 3051 switch (val) { 3052 case UMC_UMCCFG_DDR4_T_DDR4: 3053 dimm = UMC_DIMM_T_DDR4; 3054 break; 3055 case UMC_UMCCFG_DDR4_T_LPDDR4: 3056 dimm = UMC_DIMM_T_LPDDR4; 3057 break; 3058 default: 3059 break; 3060 } 3061 break; 3062 case ZEN_UMC_UMC_S_HYBRID_LPDDR5: 3063 switch (val) { 3064 case UMC_UMCCFG_DDR5_T_LPDDR5: 3065 dimm = UMC_DIMM_T_LPDDR5; 3066 break; 3067 case UMC_UMCCFG_DDR5_T_LPDDR4: 3068 dimm = UMC_DIMM_T_LPDDR4; 3069 break; 3070 default: 3071 break; 3072 } 3073 break; 3074 case ZEN_UMC_UMC_S_DDR5: 3075 case ZEN_UMC_UMC_S_DDR5_APU: 3076 switch (val) { 3077 case UMC_UMCCFG_DDR5_T_DDR5: 3078 dimm = UMC_DIMM_T_DDR5; 3079 break; 3080 case UMC_UMCCFG_DDR5_T_LPDDR5: 3081 dimm = UMC_DIMM_T_LPDDR5; 3082 break; 3083 default: 3084 break; 3085 } 3086 break; 3087 } 3088 3089 chan->chan_type = dimm; 3090 } 3091 3092 /* 3093 * Use the DDR4 frequency table to determine the speed of this. Note that our 3094 * hybrid based UMCs use 8 bits for the clock, while the traditional DDR4 ones 3095 * only use 7. The caller is responsible for using the right mask for the UMC. 3096 */ 3097 static void 3098 zen_umc_fill_chan_ddr4(zen_umc_chan_t *chan, uint_t mstate, 3099 const uint32_t clock) 3100 { 3101 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_ddr4_map); i++) { 3102 if (clock == zen_umc_ddr4_map[i].zufm_reg) { 3103 chan->chan_clock[mstate] = zen_umc_ddr4_map[i].zufm_mhz; 3104 chan->chan_speed[mstate] = 3105 zen_umc_ddr4_map[i].zufm_mts2; 3106 break; 3107 } 3108 } 3109 } 3110 3111 static void 3112 zen_umc_fill_chan_hyb_lpddr5(zen_umc_chan_t *chan, uint_t mstate) 3113 { 3114 const uint32_t reg = chan->chan_dramcfg_raw[mstate]; 3115 const uint32_t wck = UMC_DRAMCFG_HYB_GET_WCLKRATIO(reg); 3116 const uint32_t clock = UMC_DRAMCFG_HYB_GET_MEMCLK(reg); 3117 boolean_t twox; 3118 3119 switch (wck) { 3120 case UMC_DRAMCFG_WCLKRATIO_1TO2: 3121 twox = B_TRUE; 3122 break; 3123 case UMC_DRAMCFG_WCLKRATIO_1TO4: 3124 twox = B_FALSE; 3125 break; 3126 default: 3127 return; 3128 } 3129 3130 for (size_t i = 0; i < ARRAY_SIZE(zen_umc_lpddr5_map); i++) { 3131 if (clock == zen_umc_lpddr5_map[i].zufm_reg) { 3132 chan->chan_clock[mstate] = 3133 zen_umc_lpddr5_map[i].zufm_mhz; 3134 3135 if (twox) { 3136 chan->chan_speed[mstate] = 3137 zen_umc_lpddr5_map[i].zufm_mts2; 3138 } else { 3139 chan->chan_speed[mstate] = 3140 zen_umc_lpddr5_map[i].zufm_mts4; 3141 } 3142 break; 3143 } 3144 } 3145 } 3146 3147 /* 3148 * Determine the current operating frequency of the channel. This varies based 3149 * upon the type of UMC that we're operating on as there are multiple ways to 3150 * determine this. There are up to four memory P-states that exist in the UMC. 3151 * This grabs it for a single P-state at a time. 3152 * 3153 * Unlike other things, if we cannot determine the frequency of the clock or 3154 * transfer speed, we do not consider this fatal because that does not stop 3155 * decoding. It only means that we cannot give a bit of useful information to 3156 * topo. 3157 */ 3158 static void 3159 zen_umc_fill_chan_freq(zen_umc_t *umc, zen_umc_chan_t *chan, uint_t mstate) 3160 { 3161 const uint32_t cfg = chan->chan_dramcfg_raw[mstate]; 3162 const umc_dimm_type_t dimm_type = chan->chan_type; 3163 3164 switch (umc->umc_fdata->zufd_umc_style) { 3165 case ZEN_UMC_UMC_S_HYBRID_LPDDR5: 3166 if (dimm_type == UMC_DIMM_T_LPDDR5) { 3167 zen_umc_fill_chan_hyb_lpddr5(chan, mstate); 3168 } else if (dimm_type != UMC_DIMM_T_LPDDR4) { 3169 zen_umc_fill_chan_ddr4(chan, mstate, 3170 UMC_DRAMCFG_HYB_GET_MEMCLK(cfg)); 3171 } 3172 break; 3173 case ZEN_UMC_UMC_S_DDR4: 3174 case ZEN_UMC_UMC_S_DDR4_APU: 3175 zen_umc_fill_chan_ddr4(chan, mstate, 3176 UMC_DRAMCFG_DDR4_GET_MEMCLK(cfg)); 3177 break; 3178 case ZEN_UMC_UMC_S_DDR5: 3179 case ZEN_UMC_UMC_S_DDR5_APU: 3180 chan->chan_clock[mstate] = UMC_DRAMCFG_DDR5_GET_MEMCLK(cfg); 3181 if (dimm_type == UMC_DIMM_T_DDR5) { 3182 chan->chan_speed[mstate] = 2 * chan->chan_clock[mstate]; 3183 } else if (dimm_type == UMC_DIMM_T_LPDDR5) { 3184 switch (UMC_DRAMCFG_LPDDR5_GET_WCKRATIO(cfg)) { 3185 case UMC_DRAMCFG_WCLKRATIO_1TO2: 3186 chan->chan_speed[mstate] = 2 * 3187 chan->chan_clock[mstate]; 3188 break; 3189 case UMC_DRAMCFG_WCLKRATIO_1TO4: 3190 chan->chan_speed[mstate] = 4 * 3191 chan->chan_clock[mstate]; 3192 break; 3193 default: 3194 break; 3195 } 3196 } 3197 break; 3198 } 3199 } 3200 3201 /* 3202 * Fill common channel information. While the locations of many of the registers 3203 * changed between the DDR4-capable and DDR5-capable devices, the actual 3204 * contents are the same so we process them together. 3205 */ 3206 static boolean_t 3207 zen_umc_fill_chan_hash(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan, 3208 boolean_t ddr4) 3209 { 3210 int ret; 3211 smn_reg_t reg; 3212 uint32_t val; 3213 3214 const umc_chan_hash_flags_t flags = umc->umc_fdata->zufd_chan_hash; 3215 const uint32_t id = chan->chan_logid; 3216 umc_chan_hash_t *chash = &chan->chan_hash; 3217 chash->uch_flags = flags; 3218 3219 if ((flags & UMC_CHAN_HASH_F_BANK) != 0) { 3220 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BANK_HASH; i++) { 3221 umc_bank_hash_t *bank = &chash->uch_bank_hashes[i]; 3222 3223 if (ddr4) { 3224 reg = UMC_BANK_HASH_DDR4(id, i); 3225 } else { 3226 reg = UMC_BANK_HASH_DDR5(id, i); 3227 } 3228 3229 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 3230 &val)) != 0) { 3231 dev_err(umc->umc_dip, CE_WARN, "failed to read " 3232 "bank hash register %x: %d", 3233 SMN_REG_ADDR(reg), ret); 3234 return (B_FALSE); 3235 } 3236 3237 bank->ubh_row_xor = UMC_BANK_HASH_GET_ROW(val); 3238 bank->ubh_col_xor = UMC_BANK_HASH_GET_COL(val); 3239 bank->ubh_en = UMC_BANK_HASH_GET_EN(val); 3240 } 3241 } 3242 3243 if ((flags & UMC_CHAN_HASH_F_RM) != 0) { 3244 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_RM_HASH; i++) { 3245 uint64_t addr; 3246 umc_addr_hash_t *rm = &chash->uch_rm_hashes[i]; 3247 3248 if (ddr4) { 3249 reg = UMC_RANK_HASH_DDR4(id, i); 3250 } else { 3251 reg = UMC_RANK_HASH_DDR5(id, i); 3252 } 3253 3254 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 3255 &val)) != 0) { 3256 dev_err(umc->umc_dip, CE_WARN, "failed to read " 3257 "rm hash register %x: %d", 3258 SMN_REG_ADDR(reg), ret); 3259 return (B_FALSE); 3260 } 3261 3262 addr = UMC_RANK_HASH_GET_ADDR(val); 3263 rm->uah_addr_xor = addr << UMC_RANK_HASH_SHIFT; 3264 rm->uah_en = UMC_RANK_HASH_GET_EN(val); 3265 3266 if (ddr4 || (umc->umc_fdata->zufd_flags & 3267 ZEN_UMC_FAM_F_UMC_EADDR) == 0) { 3268 continue; 3269 } 3270 3271 reg = UMC_RANK_HASH_EXT_DDR5(id, i); 3272 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 3273 &val)) != 0) { 3274 dev_err(umc->umc_dip, CE_WARN, "failed to read " 3275 "rm hash ext register %x: %d", 3276 SMN_REG_ADDR(reg), ret); 3277 return (B_FALSE); 3278 } 3279 3280 addr = UMC_RANK_HASH_EXT_GET_ADDR(val); 3281 rm->uah_addr_xor |= addr << 3282 UMC_RANK_HASH_EXT_ADDR_SHIFT; 3283 } 3284 } 3285 3286 if ((flags & UMC_CHAN_HASH_F_PC) != 0) { 3287 umc_pc_hash_t *pc = &chash->uch_pc_hash; 3288 3289 if (ddr4) { 3290 reg = UMC_PC_HASH_DDR4(id); 3291 } else { 3292 reg = UMC_PC_HASH_DDR5(id); 3293 } 3294 3295 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3296 dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash " 3297 "register %x: %d", SMN_REG_ADDR(reg), ret); 3298 return (B_FALSE); 3299 } 3300 3301 pc->uph_row_xor = UMC_PC_HASH_GET_ROW(val); 3302 pc->uph_col_xor = UMC_PC_HASH_GET_COL(val); 3303 pc->uph_en = UMC_PC_HASH_GET_EN(val); 3304 3305 if (ddr4) { 3306 reg = UMC_PC_HASH2_DDR4(id); 3307 } else { 3308 reg = UMC_PC_HASH2_DDR5(id); 3309 } 3310 3311 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3312 dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash " 3313 "2 register %x: %d", SMN_REG_ADDR(reg), ret); 3314 return (B_FALSE); 3315 } 3316 3317 pc->uph_bank_xor = UMC_PC_HASH2_GET_BANK(val); 3318 } 3319 3320 if ((flags & UMC_CHAN_HASH_F_CS) != 0) { 3321 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_CS_HASH; i++) { 3322 uint64_t addr; 3323 umc_addr_hash_t *rm = &chash->uch_cs_hashes[i]; 3324 3325 if (ddr4) { 3326 reg = UMC_CS_HASH_DDR4(id, i); 3327 } else { 3328 reg = UMC_CS_HASH_DDR5(id, i); 3329 } 3330 3331 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 3332 &val)) != 0) { 3333 dev_err(umc->umc_dip, CE_WARN, "failed to read " 3334 "cs hash register %x", SMN_REG_ADDR(reg)); 3335 return (B_FALSE); 3336 } 3337 3338 addr = UMC_CS_HASH_GET_ADDR(val); 3339 rm->uah_addr_xor = addr << UMC_CS_HASH_SHIFT; 3340 rm->uah_en = UMC_CS_HASH_GET_EN(val); 3341 3342 if (ddr4 || (umc->umc_fdata->zufd_flags & 3343 ZEN_UMC_FAM_F_UMC_EADDR) == 0) { 3344 continue; 3345 } 3346 3347 reg = UMC_CS_HASH_EXT_DDR5(id, i); 3348 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 3349 &val)) != 0) { 3350 dev_err(umc->umc_dip, CE_WARN, "failed to read " 3351 "cs hash ext register %x", 3352 SMN_REG_ADDR(reg)); 3353 return (B_FALSE); 3354 } 3355 3356 addr = UMC_CS_HASH_EXT_GET_ADDR(val); 3357 rm->uah_addr_xor |= addr << UMC_CS_HASH_EXT_ADDR_SHIFT; 3358 } 3359 } 3360 3361 return (B_TRUE); 3362 } 3363 3364 /* 3365 * This fills in settings that we care about which are valid for the entire 3366 * channel and are the same between DDR4/5 capable devices. 3367 */ 3368 static boolean_t 3369 zen_umc_fill_chan(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan) 3370 { 3371 uint32_t val; 3372 smn_reg_t reg; 3373 const uint32_t id = chan->chan_logid; 3374 int ret; 3375 boolean_t ddr4; 3376 3377 if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4 || 3378 umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4_APU) { 3379 ddr4 = B_TRUE; 3380 } else { 3381 ddr4 = B_FALSE; 3382 } 3383 3384 /* 3385 * Begin by gathering all of the information related to hashing. What is 3386 * valid here varies based on the actual chip family and then the 3387 * registers vary based on DDR4 and DDR5. 3388 */ 3389 if (!zen_umc_fill_chan_hash(umc, df, chan, ddr4)) { 3390 return (B_FALSE); 3391 } 3392 3393 reg = UMC_UMCCFG(id); 3394 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3395 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC " 3396 "configuration register %x: %d", SMN_REG_ADDR(reg), ret); 3397 return (B_FALSE); 3398 } 3399 3400 chan->chan_umccfg_raw = val; 3401 if (UMC_UMCCFG_GET_ECC_EN(val)) { 3402 chan->chan_flags |= UMC_CHAN_F_ECC_EN; 3403 } 3404 3405 /* 3406 * Grab the DRAM configuration register. This can be used to determine 3407 * the frequency and speed of the memory channel. At this time we only 3408 * capture Memory P-state 0. 3409 */ 3410 reg = UMC_DRAMCFG(id, 0); 3411 3412 /* 3413 * This register contains information to determine the type of DIMM. 3414 * All DIMMs in the channel must be the same type so we leave this 3415 * setting on the channel. Once we have that, we proceed to obtain the 3416 * currently configuration information for the DRAM in each memory 3417 * P-state. 3418 */ 3419 zen_umc_fill_ddr_type(umc, chan); 3420 for (uint_t i = 0; i < ZEN_UMC_NMEM_PSTATES; i++) { 3421 chan->chan_clock[i] = ZEN_UMC_UNKNOWN_FREQ; 3422 chan->chan_speed[i] = ZEN_UMC_UNKNOWN_FREQ; 3423 3424 reg = UMC_DRAMCFG(id, i); 3425 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3426 dev_err(umc->umc_dip, CE_WARN, "failed to read DRAM " 3427 "Configuration register P-state %u %x: %d", i, 3428 SMN_REG_ADDR(reg), ret); 3429 return (B_FALSE); 3430 } 3431 chan->chan_dramcfg_raw[i] = val; 3432 3433 zen_umc_fill_chan_freq(umc, chan, i); 3434 } 3435 3436 /* 3437 * Grab data that we can use to determine if we're scrambling or 3438 * encrypting regions of memory. 3439 */ 3440 reg = UMC_DATACTL(id); 3441 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3442 dev_err(umc->umc_dip, CE_WARN, "failed to read data control " 3443 "register %x: %d", SMN_REG_ADDR(reg), ret); 3444 return (B_FALSE); 3445 } 3446 chan->chan_datactl_raw = val; 3447 if (UMC_DATACTL_GET_SCRAM_EN(val)) { 3448 chan->chan_flags |= UMC_CHAN_F_SCRAMBLE_EN; 3449 } 3450 3451 if (UMC_DATACTL_GET_ENCR_EN(val)) { 3452 chan->chan_flags |= UMC_CHAN_F_ENCR_EN; 3453 } 3454 3455 /* 3456 * At the moment we snapshot the raw ECC control information. When we do 3457 * further work of making this a part of the MCA/X decoding, we'll want 3458 * to further take this apart for syndrome decoding. Until then, simply 3459 * cache it for future us and observability. 3460 */ 3461 reg = UMC_ECCCTL(id); 3462 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3463 dev_err(umc->umc_dip, CE_WARN, "failed to read ECC control " 3464 "register %x: %d", SMN_REG_ADDR(reg), ret); 3465 return (B_FALSE); 3466 } 3467 chan->chan_eccctl_raw = val; 3468 3469 /* 3470 * Read and snapshot the UMC capability registers for debugging in the 3471 * future. 3472 */ 3473 reg = UMC_UMCCAP(id); 3474 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3475 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap" 3476 "register %x: %d", SMN_REG_ADDR(reg), ret); 3477 return (B_FALSE); 3478 } 3479 chan->chan_umccap_raw = val; 3480 3481 reg = UMC_UMCCAP_HI(id); 3482 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 3483 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap high " 3484 "register %x: %d", SMN_REG_ADDR(reg), ret); 3485 return (B_FALSE); 3486 } 3487 chan->chan_umccap_hi_raw = val; 3488 3489 return (B_TRUE); 3490 } 3491 3492 static int 3493 zen_umc_fill_umc_cb(const uint_t dfno, const uint32_t fabid, 3494 const uint32_t instid, void *arg) 3495 { 3496 zen_umc_t *umc = arg; 3497 zen_umc_df_t *df = &umc->umc_dfs[dfno]; 3498 zen_umc_chan_t *chan = &df->zud_chan[df->zud_nchan]; 3499 3500 df->zud_nchan++; 3501 VERIFY3U(df->zud_nchan, <=, ZEN_UMC_MAX_UMCS); 3502 3503 /* 3504 * The data fabric is generally organized such that all UMC entries 3505 * should be continuous in their fabric ID space; however, we don't 3506 * want to rely on specific ID locations. The UMC SMN addresses are 3507 * organized in a relative order. To determine the SMN ID to use (the 3508 * chan_logid) we assume the iteration order will always be from the 3509 * lowest Instance ID to the highest Instance ID. But using the 3510 * iteration index is not enough as there's still an unstated assumption 3511 * that we'll encounter all the UMCs -- even those with no DIMMs 3512 * populated. While this previously seemed like a reasonable assumption 3513 * (every system in question behaved as such), it is seemingly no longer 3514 * always the case: 3515 * 3516 * On a 12-channel SP5 system (running either Genoa or Turin), the DF 3517 * reports 16 CS entities (of which 12 should be the UMCs). But with 3518 * DIMMs only in channels A and G (each of which are mapped to different 3519 * UMCs and not necessarily in alphabetic order), we only discover 3520 * 2 UMCs and so end up with something like: 3521 * zud_nchan = 2 3522 * zud_chan[0].chan_instid = 3 // (A) 3523 * zud_chan[1].chan_instid = 9 // (G) 3524 * 3525 * Attempting to use the logical zud_chan index (0/1) as the SMN ID 3526 * for the UMC registers returns misleading results, e.g., our DIMM 3527 * presence check claims there are none whereas using the Instance 3528 * IDs (3/9) returns the correct results. 3529 * 3530 * Taking that all into account then we arrive at 3531 * chan_logid = chan_instid - <Base UMC Instance ID> 3532 * 3533 * Unfortunately though, there's no way to determine what that base ID 3534 * should be programmatically and so we hardcore it as part of the 3535 * static per SoC family data. 3536 */ 3537 chan->chan_logid = instid - umc->umc_fdata->zufd_base_instid; 3538 chan->chan_fabid = fabid; 3539 chan->chan_instid = instid; 3540 chan->chan_nrules = umc->umc_fdata->zufd_cs_nrules; 3541 for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules; i++) { 3542 if (zen_umc_read_dram_rule(umc, dfno, instid, i, 3543 &chan->chan_rules[i]) != 0) { 3544 return (-1); 3545 } 3546 } 3547 3548 for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules - 1; i++) { 3549 int ret; 3550 uint32_t offset; 3551 uint64_t t; 3552 df_reg_def_t off_reg; 3553 chan_offset_t *offp = &chan->chan_offsets[i]; 3554 3555 switch (umc->umc_df_rev) { 3556 case DF_REV_2: 3557 case DF_REV_3: 3558 case DF_REV_3P5: 3559 ASSERT3U(i, ==, 0); 3560 off_reg = DF_DRAM_OFFSET_V2; 3561 break; 3562 case DF_REV_4: 3563 case DF_REV_4D2: 3564 off_reg = DF_DRAM_OFFSET_V4(i); 3565 break; 3566 default: 3567 dev_err(umc->umc_dip, CE_WARN, "!encountered " 3568 "unsupported DF revision processing DRAM Offsets: " 3569 "0x%x", umc->umc_df_rev); 3570 return (-1); 3571 } 3572 3573 if ((ret = amdzen_c_df_read32(dfno, instid, off_reg, 3574 &offset)) != 0) { 3575 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 3576 "offset %u on 0x%x/0x%x: %d", i, dfno, instid, ret); 3577 return (-1); 3578 } 3579 3580 offp->cho_raw = offset; 3581 offp->cho_valid = DF_DRAM_OFFSET_GET_EN(offset); 3582 3583 switch (umc->umc_df_rev) { 3584 case DF_REV_2: 3585 t = DF_DRAM_OFFSET_V2_GET_OFFSET(offset); 3586 break; 3587 case DF_REV_3: 3588 case DF_REV_3P5: 3589 t = DF_DRAM_OFFSET_V3_GET_OFFSET(offset); 3590 break; 3591 case DF_REV_4: 3592 case DF_REV_4D2: 3593 t = DF_DRAM_OFFSET_V4_GET_OFFSET(offset); 3594 break; 3595 default: 3596 dev_err(umc->umc_dip, CE_WARN, "!encountered " 3597 "unsupported DF revision processing DRAM Offsets: " 3598 "0x%x", umc->umc_df_rev); 3599 return (-1); 3600 } 3601 offp->cho_offset = t << DF_DRAM_OFFSET_SHIFT; 3602 } 3603 3604 /* 3605 * If this platform supports our favorete Zen 3 6-channel hash special 3606 * then we need to grab the NP2 configuration registers. This will only 3607 * be referenced if this channel is actually being used for a 6-channel 3608 * hash, so even if the contents are weird that should still be ok. 3609 */ 3610 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_NP2) != 0) { 3611 uint32_t np2; 3612 int ret; 3613 3614 if ((ret = amdzen_c_df_read32(dfno, instid, DF_NP2_CONFIG_V3, 3615 &np2)) != 0) { 3616 dev_err(umc->umc_dip, CE_WARN, "!failed to read NP2 " 3617 "config: %d", ret); 3618 return (-1); 3619 } 3620 3621 chan->chan_np2_raw = np2; 3622 chan->chan_np2_space0 = DF_NP2_CONFIG_V3_GET_SPACE0(np2); 3623 } 3624 3625 /* 3626 * Now that we have everything we need from the data fabric, read out 3627 * the rest of what we need from the UMC channel data in SMN register 3628 * space. 3629 */ 3630 switch (umc->umc_fdata->zufd_umc_style) { 3631 case ZEN_UMC_UMC_S_DDR4: 3632 case ZEN_UMC_UMC_S_DDR4_APU: 3633 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) { 3634 if (!zen_umc_fill_chan_dimm_ddr4(umc, df, chan, i)) { 3635 return (-1); 3636 } 3637 } 3638 break; 3639 case ZEN_UMC_UMC_S_HYBRID_LPDDR5: 3640 case ZEN_UMC_UMC_S_DDR5: 3641 case ZEN_UMC_UMC_S_DDR5_APU: 3642 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) { 3643 for (uint_t r = 0; r < ZEN_UMC_MAX_CS_PER_DIMM; r++) { 3644 if (!zen_umc_fill_chan_rank_ddr5(umc, df, chan, 3645 i, r)) { 3646 return (-1); 3647 } 3648 } 3649 } 3650 break; 3651 default: 3652 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 3653 "Zen family: 0x%x", umc->umc_fdata->zufd_umc_style); 3654 return (-1); 3655 } 3656 3657 if (!zen_umc_fill_chan(umc, df, chan)) { 3658 return (-1); 3659 } 3660 3661 return (0); 3662 } 3663 3664 /* 3665 * Today there are no privileges for the memory controller information, it is 3666 * restricted based on file system permissions. 3667 */ 3668 static int 3669 zen_umc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 3670 { 3671 zen_umc_t *umc = zen_umc; 3672 3673 if ((flag & (FEXCL | FNDELAY | FNONBLOCK | FWRITE)) != 0) { 3674 return (EINVAL); 3675 } 3676 3677 if (otyp != OTYP_CHR) { 3678 return (EINVAL); 3679 } 3680 3681 if (getminor(*devp) >= umc->umc_ndfs) { 3682 return (ENXIO); 3683 } 3684 3685 return (0); 3686 } 3687 3688 static void 3689 zen_umc_ioctl_decode(zen_umc_t *umc, mc_encode_ioc_t *encode) 3690 { 3691 zen_umc_decoder_t dec; 3692 uint32_t sock, die, comp; 3693 3694 bzero(&dec, sizeof (dec)); 3695 if (!zen_umc_decode_pa(umc, encode->mcei_pa, &dec)) { 3696 encode->mcei_err = (uint32_t)dec.dec_fail; 3697 encode->mcei_errdata = dec.dec_fail_data; 3698 return; 3699 } 3700 3701 encode->mcei_errdata = 0; 3702 encode->mcei_err = 0; 3703 encode->mcei_chan_addr = dec.dec_norm_addr; 3704 encode->mcei_rank_addr = UINT64_MAX; 3705 encode->mcei_board = 0; 3706 zen_fabric_id_decompose(&umc->umc_decomp, dec.dec_targ_fabid, &sock, 3707 &die, &comp); 3708 encode->mcei_chip = sock; 3709 encode->mcei_die = die; 3710 encode->mcei_mc = dec.dec_umc_chan->chan_logid; 3711 encode->mcei_chan = 0; 3712 encode->mcei_dimm = dec.dec_dimm_no; 3713 encode->mcei_row = dec.dec_dimm_row; 3714 encode->mcei_column = dec.dec_dimm_col; 3715 /* 3716 * We don't have a logical rank that something matches to, we have the 3717 * actual chip-select and rank multiplication. If we could figure out 3718 * how to transform that into an actual rank, that'd be grand. 3719 */ 3720 encode->mcei_rank = UINT8_MAX; 3721 encode->mcei_cs = dec.dec_dimm_csno; 3722 encode->mcei_rm = dec.dec_dimm_rm; 3723 encode->mcei_bank = dec.dec_dimm_bank; 3724 encode->mcei_bank_group = dec.dec_dimm_bank_group; 3725 encode->mcei_subchan = dec.dec_dimm_subchan; 3726 } 3727 3728 static void 3729 umc_decoder_pack(zen_umc_t *umc) 3730 { 3731 char *buf = NULL; 3732 size_t len = 0; 3733 3734 ASSERT(MUTEX_HELD(&umc->umc_nvl_lock)); 3735 if (umc->umc_decoder_buf != NULL) { 3736 return; 3737 } 3738 3739 if (umc->umc_decoder_nvl == NULL) { 3740 umc->umc_decoder_nvl = zen_umc_dump_decoder(umc); 3741 if (umc->umc_decoder_nvl == NULL) { 3742 return; 3743 } 3744 } 3745 3746 if (nvlist_pack(umc->umc_decoder_nvl, &buf, &len, NV_ENCODE_XDR, 3747 KM_NOSLEEP_LAZY) != 0) { 3748 return; 3749 } 3750 3751 umc->umc_decoder_buf = buf; 3752 umc->umc_decoder_len = len; 3753 } 3754 3755 static int 3756 zen_umc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 3757 int *rvalp) 3758 { 3759 int ret; 3760 zen_umc_t *umc = zen_umc; 3761 mc_encode_ioc_t encode; 3762 mc_snapshot_info_t info; 3763 3764 if (getminor(dev) >= umc->umc_ndfs) { 3765 return (ENXIO); 3766 } 3767 3768 switch (cmd) { 3769 case MC_IOC_DECODE_PA: 3770 if (crgetzoneid(credp) != GLOBAL_ZONEID || 3771 drv_priv(credp) != 0) { 3772 ret = EPERM; 3773 break; 3774 } 3775 3776 if (ddi_copyin((void *)arg, &encode, sizeof (encode), 3777 mode & FKIOCTL) != 0) { 3778 ret = EFAULT; 3779 break; 3780 } 3781 3782 zen_umc_ioctl_decode(umc, &encode); 3783 ret = 0; 3784 3785 if (ddi_copyout(&encode, (void *)arg, sizeof (encode), 3786 mode & FKIOCTL) != 0) { 3787 ret = EFAULT; 3788 break; 3789 } 3790 break; 3791 case MC_IOC_DECODE_SNAPSHOT_INFO: 3792 mutex_enter(&umc->umc_nvl_lock); 3793 umc_decoder_pack(umc); 3794 3795 if (umc->umc_decoder_buf == NULL) { 3796 mutex_exit(&umc->umc_nvl_lock); 3797 ret = EIO; 3798 break; 3799 } 3800 3801 if (umc->umc_decoder_len > UINT32_MAX) { 3802 mutex_exit(&umc->umc_nvl_lock); 3803 ret = EOVERFLOW; 3804 break; 3805 } 3806 3807 info.mcs_size = umc->umc_decoder_len; 3808 info.mcs_gen = 0; 3809 if (ddi_copyout(&info, (void *)arg, sizeof (info), 3810 mode & FKIOCTL) != 0) { 3811 mutex_exit(&umc->umc_nvl_lock); 3812 ret = EFAULT; 3813 break; 3814 } 3815 3816 mutex_exit(&umc->umc_nvl_lock); 3817 ret = 0; 3818 break; 3819 case MC_IOC_DECODE_SNAPSHOT: 3820 mutex_enter(&umc->umc_nvl_lock); 3821 umc_decoder_pack(umc); 3822 3823 if (umc->umc_decoder_buf == NULL) { 3824 mutex_exit(&umc->umc_nvl_lock); 3825 ret = EIO; 3826 break; 3827 } 3828 3829 if (ddi_copyout(umc->umc_decoder_buf, (void *)arg, 3830 umc->umc_decoder_len, mode & FKIOCTL) != 0) { 3831 mutex_exit(&umc->umc_nvl_lock); 3832 ret = EFAULT; 3833 break; 3834 } 3835 3836 mutex_exit(&umc->umc_nvl_lock); 3837 ret = 0; 3838 break; 3839 default: 3840 ret = ENOTTY; 3841 break; 3842 } 3843 3844 return (ret); 3845 } 3846 3847 static int 3848 zen_umc_close(dev_t dev, int flag, int otyp, cred_t *credp) 3849 { 3850 return (0); 3851 } 3852 3853 static void 3854 zen_umc_cleanup(zen_umc_t *umc) 3855 { 3856 nvlist_free(umc->umc_decoder_nvl); 3857 umc->umc_decoder_nvl = NULL; 3858 if (umc->umc_decoder_buf != NULL) { 3859 kmem_free(umc->umc_decoder_buf, umc->umc_decoder_len); 3860 umc->umc_decoder_buf = NULL; 3861 umc->umc_decoder_len = 0; 3862 } 3863 3864 if (umc->umc_dip != NULL) { 3865 ddi_remove_minor_node(umc->umc_dip, NULL); 3866 } 3867 mutex_destroy(&umc->umc_nvl_lock); 3868 kmem_free(umc, sizeof (zen_umc_t)); 3869 } 3870 3871 static int 3872 zen_umc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3873 { 3874 int ret; 3875 zen_umc_t *umc; 3876 3877 if (cmd == DDI_RESUME) { 3878 return (DDI_SUCCESS); 3879 } else if (cmd != DDI_ATTACH) { 3880 return (DDI_FAILURE); 3881 } 3882 if (zen_umc != NULL) { 3883 dev_err(dip, CE_WARN, "!zen_umc is already attached to a " 3884 "dev_info_t: %p", zen_umc->umc_dip); 3885 return (DDI_FAILURE); 3886 } 3887 3888 /* 3889 * To get us going, we need to do several bits of set up. First, we need 3890 * to use the knowledge about the actual hardware that we're using to 3891 * encode a bunch of different data: 3892 * 3893 * o The set of register styles and extra hardware features that exist 3894 * on the hardware platform. 3895 * o The number of actual rules there are for the CCMs and UMCs. 3896 * o How many actual things exist (DFs, etc.) 3897 * o Useful fabric and instance IDs for all of the different UMC 3898 * entries so we can actually talk to them. 3899 * 3900 * Only once we have all the above will we go dig into the actual data. 3901 */ 3902 umc = kmem_zalloc(sizeof (zen_umc_t), KM_SLEEP); 3903 mutex_init(&umc->umc_nvl_lock, NULL, MUTEX_DRIVER, NULL); 3904 umc->umc_family = chiprev_family(cpuid_getchiprev(CPU)); 3905 umc->umc_ndfs = amdzen_c_df_count(); 3906 umc->umc_dip = dip; 3907 3908 if (!zen_umc_identify(umc)) { 3909 dev_err(dip, CE_WARN, "!encountered unsupported CPU"); 3910 goto err; 3911 } 3912 3913 umc->umc_df_rev = amdzen_c_df_rev(); 3914 switch (umc->umc_df_rev) { 3915 case DF_REV_2: 3916 case DF_REV_3: 3917 case DF_REV_3P5: 3918 case DF_REV_4: 3919 case DF_REV_4D2: 3920 break; 3921 default: 3922 dev_err(dip, CE_WARN, "!encountered unknown DF revision: %x", 3923 umc->umc_df_rev); 3924 goto err; 3925 } 3926 3927 if ((ret = amdzen_c_df_fabric_decomp(&umc->umc_decomp)) != 0) { 3928 dev_err(dip, CE_WARN, "!failed to get fabric decomposition: %d", 3929 ret); 3930 } 3931 3932 umc->umc_tom = rdmsr(MSR_AMD_TOM); 3933 umc->umc_tom2 = rdmsr(MSR_AMD_TOM2); 3934 3935 /* 3936 * For each DF, start by reading all of the data that we need from it. 3937 * This involves finding a target CCM, reading all of the rules, 3938 * ancillary settings, and related. Then we'll do a pass over all of the 3939 * actual UMC targets there. 3940 */ 3941 for (uint_t i = 0; i < umc->umc_ndfs; i++) { 3942 if (amdzen_c_df_iter(i, ZEN_DF_TYPE_CCM_CPU, 3943 zen_umc_fill_ccm_cb, umc) < 0 || 3944 amdzen_c_df_iter(i, ZEN_DF_TYPE_CS_UMC, zen_umc_fill_umc_cb, 3945 umc) != 0) { 3946 goto err; 3947 } 3948 } 3949 3950 /* 3951 * Create a minor node for each df that we encounter. 3952 */ 3953 for (uint_t i = 0; i < umc->umc_ndfs; i++) { 3954 int ret; 3955 char minor[64]; 3956 3957 (void) snprintf(minor, sizeof (minor), "mc-umc-%u", i); 3958 if ((ret = ddi_create_minor_node(umc->umc_dip, minor, S_IFCHR, 3959 i, "ddi_mem_ctrl", 0)) != 0) { 3960 dev_err(dip, CE_WARN, "!failed to create minor %s: %d", 3961 minor, ret); 3962 goto err; 3963 } 3964 } 3965 3966 zen_umc = umc; 3967 return (DDI_SUCCESS); 3968 3969 err: 3970 zen_umc_cleanup(umc); 3971 return (DDI_FAILURE); 3972 } 3973 3974 static int 3975 zen_umc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 3976 { 3977 zen_umc_t *umc; 3978 3979 if (zen_umc == NULL || zen_umc->umc_dip == NULL) { 3980 return (DDI_FAILURE); 3981 } 3982 umc = zen_umc; 3983 3984 switch (cmd) { 3985 case DDI_INFO_DEVT2DEVINFO: 3986 *resultp = (void *)umc->umc_dip; 3987 break; 3988 case DDI_INFO_DEVT2INSTANCE: 3989 *resultp = (void *)(uintptr_t)ddi_get_instance( 3990 umc->umc_dip); 3991 break; 3992 default: 3993 return (DDI_FAILURE); 3994 } 3995 return (DDI_SUCCESS); 3996 } 3997 3998 static int 3999 zen_umc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 4000 { 4001 zen_umc_t *umc; 4002 4003 if (cmd == DDI_SUSPEND) { 4004 return (DDI_SUCCESS); 4005 } else if (cmd != DDI_DETACH) { 4006 return (DDI_FAILURE); 4007 } 4008 4009 if (zen_umc == NULL) { 4010 dev_err(dip, CE_WARN, "!asked to detach zen_umc, but it " 4011 "was never successfully attached"); 4012 return (DDI_FAILURE); 4013 } 4014 4015 umc = zen_umc; 4016 zen_umc = NULL; 4017 zen_umc_cleanup(umc); 4018 return (DDI_SUCCESS); 4019 } 4020 4021 static struct cb_ops zen_umc_cb_ops = { 4022 .cb_open = zen_umc_open, 4023 .cb_close = zen_umc_close, 4024 .cb_strategy = nodev, 4025 .cb_print = nodev, 4026 .cb_dump = nodev, 4027 .cb_read = nodev, 4028 .cb_write = nodev, 4029 .cb_ioctl = zen_umc_ioctl, 4030 .cb_devmap = nodev, 4031 .cb_mmap = nodev, 4032 .cb_segmap = nodev, 4033 .cb_chpoll = nochpoll, 4034 .cb_prop_op = ddi_prop_op, 4035 .cb_flag = D_MP, 4036 .cb_rev = CB_REV, 4037 .cb_aread = nodev, 4038 .cb_awrite = nodev 4039 }; 4040 4041 static struct dev_ops zen_umc_dev_ops = { 4042 .devo_rev = DEVO_REV, 4043 .devo_refcnt = 0, 4044 .devo_getinfo = zen_umc_getinfo, 4045 .devo_identify = nulldev, 4046 .devo_probe = nulldev, 4047 .devo_attach = zen_umc_attach, 4048 .devo_detach = zen_umc_detach, 4049 .devo_reset = nodev, 4050 .devo_quiesce = ddi_quiesce_not_needed, 4051 .devo_cb_ops = &zen_umc_cb_ops 4052 }; 4053 4054 static struct modldrv zen_umc_modldrv = { 4055 .drv_modops = &mod_driverops, 4056 .drv_linkinfo = "AMD Zen Unified Memory Controller", 4057 .drv_dev_ops = &zen_umc_dev_ops 4058 }; 4059 4060 static struct modlinkage zen_umc_modlinkage = { 4061 .ml_rev = MODREV_1, 4062 .ml_linkage = { &zen_umc_modldrv, NULL } 4063 }; 4064 4065 int 4066 _init(void) 4067 { 4068 return (mod_install(&zen_umc_modlinkage)); 4069 } 4070 4071 int 4072 _info(struct modinfo *modinfop) 4073 { 4074 return (mod_info(&zen_umc_modlinkage, modinfop)); 4075 } 4076 4077 int 4078 _fini(void) 4079 { 4080 return (mod_remove(&zen_umc_modlinkage)); 4081 } 4082