1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2022 Oxide Computer Company 14 */ 15 16 /* 17 * AMD Zen Unified Memory Controller Driver 18 * 19 * This file forms the core logic around transforming a physical address that 20 * we're used to using into a specific location on a DIMM. This has support for 21 * a wide range of AMD CPUs and APUs ranging from Zen 1 - Zen 4. 22 * 23 * The goal of this driver is to implement the infrastructure and support 24 * necessary to understand how DRAM requests are being routed in the system and 25 * to be able to map those to particular channels and then DIMMs. This is used 26 * as part of RAS (reliability, availability, and serviceability) to enable 27 * aspects around understanding ECC errors, hardware topology, and more. Like 28 * with any software project, there is more to do here. Please see the Future 29 * Work section at the end of this big theory statement for more information. 30 * 31 * ------------------- 32 * Driver Organization 33 * ------------------- 34 * 35 * This driver is organized into two major pieces: 36 * 37 * 1. Logic to interface with hardware, discover the data fabric, memory 38 * controller configuration, and transform that into a normalized fashion 39 * that can be used across all different Zen family CPUs. This is 40 * implemented generally in this file, and is designed to assume it is in 41 * the kernel (as it requires access to the SMN, DF PCI registers, and the 42 * amdzen nexus driver client services). 43 * 44 * 2. Logic that can take the above normalized memory information and perform 45 * decoding (e.g. physical address to DIMM information). This generally 46 * lives in common/mc/zen_uc/zen_umc_decode.c. This file is in common/, 47 * meaning it is designed to be shared by userland and the kernel. Even 48 * more so, it is designed to operate on a const version of our primary 49 * data structure (zen_umc_t), not allowing it to be modified. This allows 50 * us to more easily unit test the decoding logic and utilize it in other 51 * circumstances such as with the mcdecode utility. 52 * 53 * There is corresponding traditional dev_ops(9S) and cb_ops(9S) logic in the 54 * driver (currently this file) which take care of interfacing with the broader 55 * operating system environment. 56 * 57 * There is only ever one instance of this driver, e.g. it is a singleton in 58 * design pattern parlance. There is a single struct, the zen_umc_t found in the 59 * global (albeit static) variable zen_umc. This structure itself contains a 60 * hierarchical set of structures that describe the system. To make management 61 * of memory simpler, all of the nested structures that we discover from 62 * hardware are allocated in the same structure. The only exception to this rule 63 * is when we cache serialized nvlists for dumping. 64 * 65 * The organization of the structures inside the zen_umc_t, generally mimics the 66 * hardware organization and is structured as follows: 67 * 68 * +-----------+ 69 * | zen_umc_t | 70 * +-----------+ 71 * | 72 * +-------------------------------+ 73 * v v 74 * +--------------+ +--------------+ One instance of the 75 * | zen_umc_df_t | ... | zen_umc_df_t | zen_umc_df_t per 76 * +--------------+ +--------------+ discovered DF. 77 * ||| 78 * ||| 79 * ||| +----------------+ +----------------+ Global DRAM 80 * ||+--->| df_dram_rule_t | ... | df_dram_rule_t | rules for the 81 * || +----------------+ +----------------+ platform. 82 * || 83 * || +--------------------+ +--------------------+ UMC remap 84 * |+--->| zen_umc_cs_remap_t | ... | zen_umc_cs_remap_t | rule arrays. 85 * | +--------------------+ +--------------------+ 86 * | 87 * v 88 * +----------------+ +----------------+ One structure per 89 * | zen_umc_chan_t | ... | zen_umc_chan_t | discovered DDR4/5 90 * +----------------+ +----------------+ memory channel. 91 * |||| 92 * |||| 93 * |||| +----------------+ +----------------+ Channel specific 94 * |||+--->| df_dram_rule_t | ... | df_dram_rule_t | copy of DRAM rules. 95 * ||| +----------------+ +----------------+ Less than global. 96 * ||| 97 * ||| +---------------+ +---------------+ Per-Channel DRAM 98 * ||+---->| chan_offset_t | ... | chan_offset_t | offset that is used 99 * || +---------------+ +---------------+ for normalization. 100 * || 101 * || +-----------------+ Channel-specific 102 * |+----->| umc_chan_hash_t | hashing rules. 103 * | +-----------------+ 104 * | 105 * | +------------+ +------------+ One structure for 106 * +------>| umc_dimm_t | ... | umc_dimm_t | each DIMM in the 107 * +------------+ +------------+ channel. Always two. 108 * | 109 * | +----------+ +----------+ Per chip-select 110 * +---> | umc_cs_t | ... | umc_cs_t | data. Always two. 111 * +----------+ +----------+ 112 * 113 * In the data structures themselves you'll often find several pieces of data 114 * that have the term 'raw' in their name. The point of these is to basically 115 * capture the original value that we read from the register before processing 116 * it. These are generally used either for debugging or to help answer future 117 * curiosity with resorting to the udf and usmn tooling, which hopefully aren't 118 * actually installed on systems. 119 * 120 * With the exception of some of the members in the zen_umc_t that are around 121 * management of state for userland ioctls, everything in the structure is 122 * basically write-once and from that point on should be treated as read-only. 123 * 124 * --------------- 125 * Memory Decoding 126 * --------------- 127 * 128 * To understand the process of memory decoding, it's worth going through and 129 * understanding a bunch of the terminology that is used in this process. As an 130 * additional reference when understanding this, you may want to turn to either 131 * an older generation AMD BIOS and Kernel Developer's Guide or the more current 132 * Processor Programming Reference. In addition, the imc driver, which is the 133 * Intel equivalent, also provides an additional bit of reference. 134 * 135 * SYSTEM ADDRESS 136 * 137 * This is a physical address and is the way that the operating system 138 * normally thinks of memory. System addresses can refer to many different 139 * things. For example, you have traditional DRAM, memory-mapped PCIe 140 * devices, peripherals that the processor exposes such as the xAPIC, data 141 * from the FCH (Fusion Controller Hub), etc. 142 * 143 * TOM, TOM2, and the DRAM HOLE 144 * 145 * Physical memory has a complicated layout on x86 in part because of 146 * support for traditional 16-bit and 32-bit systems. As a result, contrary 147 * to popular belief, DRAM is not at a consistent address range in the 148 * processor. AMD processors have a few different ranges. There is a 32-bit 149 * region that starts at effectively physical address zero and goes to the 150 * TOM MSR (top of memory -- Core::X86::Msr::TOP_MEM). This indicates a 151 * limit below 4 GiB, generally around 2 GiB. 152 * 153 * From there, the next region of DRAM starts at 4 GiB and goes to TOM2 154 * (top of memory 2 -- Core::X86::Msr::TOM2). The region between TOM and 155 * 4 GiB is called the DRAM hole. Physical addresses in this region are 156 * used for memory mapped I/O. This breaks up contiguous physical 157 * addresses being used for DRAM, creating a "hole". 158 * 159 * DATA FABRIC 160 * 161 * The data fabric (DF) is the primary interface that different parts of 162 * the system use to communicate with one another. This includes the I/O 163 * engines (where PCIe traffic goes), CPU caches and their cores, memory 164 * channels, cross-socket communication, and a whole lot more. The first 165 * part of decoding addresses and figuring out which DRAM channel an 166 * address should be directed to all come from the data fabric. 167 * 168 * The data fabric is comprised of instances. So there is one instance for 169 * each group of cores, each memory channel, etc. Each instance has its own 170 * independent set of register information. As the data fabric is a series 171 * of devices exposed over PCI, if you do a normal PCI configuration space 172 * read or write that'll end up broadcasting the I/O. Instead, to access a 173 * particular instance's register information there is an indirect access 174 * mechanism. The primary way that this driver accesses data fabric 175 * registers is via these indirect reads. 176 * 177 * There is one instance of the Data Fabric per socket starting with Zen 2. 178 * In Zen 1, there was one instance of the data fabric per CCD -- core 179 * complex die (see cpuid.c's big theory statement for more information). 180 * 181 * DF INSTANCE ID 182 * 183 * A DF instance ID is an identifier for a single entity or component in a 184 * data fabric. The set of instance IDs is unique only with a single data 185 * fabric. So for example, each memory channel, I/O endpoint (e.g. PCIe 186 * logic), group of cores, has its own instance ID. Anything within the 187 * same data fabric (e.g. the same die) can be reached via its instance ID. 188 * The instance ID is used to indicate which instance to contact when 189 * performing indirect accesses. 190 * 191 * Not everything that has an instance ID will be globally routable (e.g. 192 * between multiple sockets). For things that are, such as the memory 193 * channels and coherent core initiators, there is a second ID called a 194 * fabric ID. 195 * 196 * DF FABRIC ID 197 * 198 * A DF fabric ID is an identifier that combines information to indicate 199 * both which instance of the data fabric a component is on and a component 200 * itself. So with this number you can distinguish between a memory channel 201 * on one of two sockets. A Fabric ID is made up of two parts. The upper 202 * part indicates which DF we are talking to and is referred to as a Node 203 * ID. The Node ID is itself broken into two parts: one that identifies a 204 * socket, and one that identifies a die. The lower part of a fabric ID is 205 * called a component ID and indicates which component in a particular data 206 * fabric that we are talking to. While only a subset of the total 207 * components in the data fabric are routable, for everything that is, its 208 * component ID matches its instance ID. 209 * 210 * Put differently, the component portion of a fabric ID and a component's 211 * instance ID are always the same for routable entities. For things which 212 * cannot be routed, they only have an instance ID and no fabric ID. 213 * Because this code is always interacting with data fabric components that 214 * are routable, sometimes instance ID and the component ID portion of the 215 * data fabric ID may be used interchangeably. 216 * 217 * Finally, it's worth calling out that the number of bits that are used to 218 * indicate the socket, die, and component in a fabric ID changes from 219 * hardware generation to hardware generation. 220 * 221 * Inside the code here, the socket and die decomposition information is 222 * always relative to the node ID. AMD phrases the decomposition 223 * information in terms of a series of masks and shifts. This is 224 * information that can be retrieved from the data fabric itself, allowing 225 * us to avoid hardcoding too much information other than which registers 226 * actually have which fields. With both masks and shifts, it's important 227 * to establish which comes first. We follow AMD's convention and always 228 * apply masks before shifts. With that, let's look at an example of a 229 * made up bit set: 230 * 231 * Assumptions (to make this example simple): 232 * o The fabric ID is 16 bits 233 * o The component ID is 8 bits 234 * o The node ID is 8 bits 235 * o The socket and die ID are both 4 bits 236 * 237 * Here, let's say that we have the ID 0x2106. This decomposes into a 238 * socket 0x2, die 0x1, and component 0x6. Here is how that works in more 239 * detail: 240 * 241 * 0x21 0x06 242 * |------| |------| 243 * Node ID Component ID 244 * Mask: 0xff00 0x00ff 245 * Shift: 8 0 246 * 247 * Next we would decompose the Node ID as: 248 * 0x2 0x1 249 * |------| |------| 250 * Sock ID Die ID 251 * Mask: 0xf0 0x0f 252 * Shift: 4 0 253 * 254 * Composing a fabric ID from its parts would work in a similar way by 255 * applying masks and shifts. 256 * 257 * NORMAL ADDRESS 258 * 259 * A normal address is one of the primary address types that AMD uses in 260 * memory decoding. It takes into account the DRAM hole, interleave 261 * settings, and is basically the address that is dispatched to the broader 262 * data fabric towards a particular DRAM channel. 263 * 264 * Often, phrases like 'normalizing the address' or normalization refer to 265 * the process of transforming a system address into the channel address. 266 * 267 * INTERLEAVING 268 * 269 * The idea of interleaving is to take a contiguous range and weave it 270 * between multiple different actual entities. Generally certain bits in 271 * the range are used to select one of several smaller regions. For 272 * example, if you have 8 regions each that are 4 GiB in size, that creates 273 * a single 32 GiB region. You can use three bits in that 32 GiB space to 274 * select one of the 8 regions. For a more visual example, see the 275 * definition of this in uts/intel/io/imc/imc.c. 276 * 277 * CHANNEL 278 * 279 * A channel is used to refer to a single memory channel. This is sometimes 280 * called a DRAM channel as well. A channel operates in a specific mode 281 * based on the JEDEC DRAM standards (e.g. DDR4, LPDDR5, etc.). A 282 * (LP)DDR4/5 channel may support up to two DIMMs inside the channel. The 283 * number of slots is platform dependent and from there the number of DIMMs 284 * installed can vary. Generally speaking, a DRAM channel defines a set 285 * number of signals, most of which go to all DIMMs in the channel, what 286 * varies is which "chip-select" is activated which causes a given DIMM to 287 * pay attention or not. 288 * 289 * DIMM 290 * 291 * A DIMM refers to a physical hardware component that is installed into a 292 * computer to provide access to dynamic memory. Originally this stood for 293 * dual-inline memory module, though the DIMM itself has evolved beyond 294 * that. A DIMM is organized into various pages, which are addressed by 295 * a combination of rows, columns, banks, bank groups, and ranks. How this 296 * fits together changes from generation to generation and is standardized 297 * in something like DDR4, LPDDR4, DDR5, LPDDR5, etc. These standards 298 * define the general individual modules that are assembled into a DIMM. 299 * There are slightly different standards for combined memory modules 300 * (which is what we use the term DIMM for). Examples of those include 301 * things like registered DIMMs (RDIMMs). 302 * 303 * A DDR4 DIMM contains a single channel that is 64-bits wide with 8 check 304 * bits. A DDR5 DIMM has a notable change in this scheme from earlier DDR 305 * standards. It breaks a single DDR5 DIMM into two sub-channels. Each 306 * sub-channel is independently addressed and contains 32-bits of data and 307 * 8-bits of check data. 308 * 309 * ROW AND COLUMN 310 * 311 * The most basic building block of a DIMM is a die. A DIMM consists of 312 * multiple dies that are organized together (we'll discuss the 313 * organization next). A given die is organized into a series of rows and 314 * columns. First, one selects a row. At which point one is able to select 315 * a specific column. It is more expensive to change rows than columns, 316 * leading a given row to contain approximately 1 KiB of data spread across 317 * its columns. The exact size depends on the device. Each row/column is a 318 * series of capacitors and transistors. The transistor is used to select 319 * data from the capacitor and the capacitor actually contains the logical 320 * 0/1 value. 321 * 322 * BANKS AND BANK GROUPS 323 * 324 * An individual DRAM die is organized in something called a bank. A DIMM 325 * has a number of banks that sit in series. These are then grouped into 326 * larger bank groups. Generally speaking, each bank group has the same 327 * number of banks. Let's take a look at an example of a system with 4 328 * bank groups, each with 4 banks. 329 * 330 * +-----------------------+ +-----------------------+ 331 * | Bank Group 0 | | Bank Group 1 | 332 * | +--------+ +--------+ | | +--------+ +--------+ | 333 * | | Bank 0 | | Bank 1 | | | | Bank 0 | | Bank 1 | | 334 * | +--------+ +--------+ | | +--------+ +--------+ | 335 * | +--------+ +--------+ | | +--------+ +--------+ | 336 * | | Bank 2 | | Bank 3 | | | | Bank 2 | | Bank 3 | | 337 * | +--------+ +--------+ | | +--------+ +--------+ | 338 * +-----------------------+ +-----------------------+ 339 * 340 * +-----------------------+ +-----------------------+ 341 * | Bank Group 2 | | Bank Group 3 | 342 * | +--------+ +--------+ | | +--------+ +--------+ | 343 * | | Bank 0 | | Bank 1 | | | | Bank 0 | | Bank 1 | | 344 * | +--------+ +--------+ | | +--------+ +--------+ | 345 * | +--------+ +--------+ | | +--------+ +--------+ | 346 * | | Bank 2 | | Bank 3 | | | | Bank 2 | | Bank 3 | | 347 * | +--------+ +--------+ | | +--------+ +--------+ | 348 * +-----------------------+ +-----------------------+ 349 * 350 * On a DIMM, only a single bank and bank group can be active at a time for 351 * reading or writing an 8 byte chunk of data. However, these are still 352 * pretty important and useful because of the time involved to switch 353 * between them. It is much cheaper to switch between bank groups than 354 * between banks and that time can be cheaper than activating a new row. 355 * This allows memory controllers to pipeline this substantially. 356 * 357 * RANK AND CHIP-SELECT 358 * 359 * The next level of organization is a rank. A rank is effectively an 360 * independent copy of all the bank and bank groups on a DIMM. That is, 361 * there are additional copies of the DIMM's organization, but not the data 362 * itself. Originally a 363 * single or dual rank DIMM was built such that one copy of everything was 364 * on each physical side of the DIMM. As the number of ranks has increased 365 * this has changed as well. Generally speaking, the contents of the rank 366 * are equivalent. That is, you have the same number of bank groups, banks, 367 * and each bank has the same number of rows and columns. 368 * 369 * Ranks are selected by what's called a chip-select, often abbreviated as 370 * CS_L in the various DRAM standards. AMD also often abbreviates this as a 371 * CS (which is not to be confused with the DF class of device called a 372 * CS). These signals are used to select a rank to activate on a DIMM. 373 * There are some number of these for each DIMM which is how the memory 374 * controller chooses which of the DIMMs it's actually going to activate in 375 * the system. 376 * 377 * One interesting gotcha here is how AMD organizes things. Each DIMM 378 * logically is broken into two chip-selects in hardware. Between DIMMs 379 * with more than 2 ranks and 3D stacked RDIMMs, there are ways to 380 * potentially activate more bits. Ultimately these are mapped to a series 381 * of rank multiplication logic internally. These ultimately then control 382 * some of these extra pins, though the exact method isn't 100% clear at 383 * this time. 384 * 385 * ----------------------- 386 * Rough Hardware Process 387 * ----------------------- 388 * 389 * To better understand how everything is implemented and structured, it's worth 390 * briefly describing what happens when hardware wants to read a given physical 391 * address. This is roughly summarized in the following chart. In the left hand 392 * side is the type of address, which is transformed and generally shrinks along 393 * the way. Next to it is the actor that is taking action and the type of 394 * address that it starts with. 395 * 396 * +---------+ +------+ 397 * | Virtual | | CPU | 398 * | Address | | Core | 399 * +---------+ +------+ 400 * | | The CPU core receives a memory request and then 401 * | * . . . . determines whether this request is DRAM or MMIO 402 * | | (memory-mapped I/O) and then sends it to the data 403 * v v fabric. 404 * +----------+ +--------+ 405 * | Physical | | Data | 406 * | Address | | Fabric | 407 * +----------+ +--------+ 408 * | | The data fabric instance in the CCX/D uses the 409 * | * . . . . programmed DRAM rules to determine what DRAM 410 * | | channel to direct a request to and what the 411 * | | channel-relative address is. It then sends the 412 * | | request through the fabric. Note, the number of 413 * | | DRAM rules varies based on the processor SoC. 414 * | | Server parts like Milan have many more rules than 415 * | | an APU like Cezanne. The DRAM rules tell us both 416 * v v how to find and normalize the physical address. 417 * +---------+ +---------+ 418 * | Channel | | DRAM | 419 * | Address | | Channel | 420 * +---------+ +---------+ 421 * | | The UMC (unified memory controller) receives the 422 * | * . . . . DRAM request and determines which DIMM to send 423 * | | the request to along with the rank, banks, row, 424 * | | column, etc. It initiates a DRAM transaction and 425 * | | then sends the results back through the data 426 * v v fabric to the CPU core. 427 * +---------+ +--------+ 428 * | DIMM | | Target | 429 * | Address | | DIMM | 430 * +---------+ +--------+ 431 * 432 * The above is all generally done in hardware. There are multiple steps 433 * internal to this that we end up mimicking in software. This includes things 434 * like, applying hashing logic, address transformations, and related. 435 * Thankfully the hardware is fairly generic and programmed with enough 436 * information that we can pull out to figure this out. The rest of this theory 437 * statement covers the major parts of this: interleaving, the act of 438 * determining which memory channel to actually go to, and normalization, the 439 * act of removing some portion of the physical address bits to determine the 440 * address relative to a channel. 441 * 442 * ------------------------ 443 * Data Fabric Interleaving 444 * ------------------------ 445 * 446 * One of the major parts of address decoding is to understand how the 447 * interleaving features work in the data fabric. This is used to allow an 448 * address range to be spread out between multiple memory channels and then, 449 * later on, when normalizing the address. As mentioned above, a system address 450 * matches a rule which has information on interleaving. Interleaving comes in 451 * many different flavors. It can be used to just switch between channels, 452 * sockets, and dies. It can also end up involving some straightforward and some 453 * fairly complex hashing operations. 454 * 455 * Each DRAM rule has instructions on how to perform this interleaving. The way 456 * this works is that the rule first says to start at a given address bit, 457 * generally ranging from bit 8-12. These influence the granularity of the 458 * interleaving going on. From there, the rules determine how many bits to use 459 * from the address to determine the die, socket, and channel. In the simplest 460 * form, these perform a log2 of the actual number of things you're interleaving 461 * across (we'll come back to non-powers of two). So let's work a few common 462 * examples: 463 * 464 * o 8-channel interleave, 1-die interleave, 2-socket interleave 465 * Start at bit 9 466 * 467 * In this case we have 3 bits that determine the channel to use, 0 bits 468 * for the die, 1 bit for the socket. Here we would then use the following 469 * bits to determine what the channel, die, and socket IDs are: 470 * 471 * [12] - Socket ID 472 * [11:9] - Channel ID 473 * 474 * You'll note that there was no die-interleave, which means the die ID is 475 * always zero. This is the general thing you expect to see in Zen 2 and 3 476 * based systems as they only have one die or a Zen 1 APU. 477 * 478 * o 2-channel interleave, 4-die interleave, 2-socket interleave 479 * Start at bit 10 480 * 481 * In this case we have 1 bit for the channel and socket interleave. We 482 * have 2 bits for the die. This is something you might see on a Zen 1 483 * system. This results in the following bits: 484 * 485 * [13] - Socket ID 486 * [12:11] - Die ID 487 * [10] - Channel ID 488 * 489 * 490 * COD and NPS HASHING 491 * 492 * However, this isn't the only primary extraction rule of the above values. The 493 * other primary method is using a hash. While the exact hash methods vary 494 * between Zen 2/3 and Zen 4 based systems, they follow a general scheme. In the 495 * system there are three interleaving configurations that are either global or 496 * enabled on a per-rule basis. These indicate whether one should perform the 497 * XOR computation using addresses at: 498 * 499 * o 64 KiB (starting at bit 16) 500 * o 2 MiB (starting at bit 21) 501 * o 1 GiB (starting at bit 30) 502 * 503 * In this world, you take the starting address bit defined by the rule and XOR 504 * it with each enabled interleave address. If you have more than one bit to 505 * select (e.g. because you are hashing across more than 2 channels), then you 506 * continue taking subsequent bits from each enabled region. So the second bit 507 * would use 17, 21, and 31 if all three ranges were enabled while the third bit 508 * would use 18, 22, and 32. While these are straightforward, there is a catch. 509 * 510 * While the DRAM rule contains what the starting address bit, you don't 511 * actually use subsequent bits in the same way. Instead subsequent bits are 512 * deterministic and use bits 12 and 13 from the address. This is not the same 513 * consecutive thing that one might expect. Let's look at a Rome/Milan based 514 * example: 515 * 516 * o 8-channel "COD" hashing, starting at address 9. All three ranges enabled. 517 * 1-die and 1-socket interleaving. 518 * 519 * In this model we are using 3 bits for the channel, 0 bits for the socket 520 * and die. 521 * 522 * Channel ID[0] = addr[9] ^ addr[16] ^ addr[21] ^ addr[30] 523 * Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 524 * Channel ID[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32] 525 * 526 * So through this scheme we'd have a socket/die of 0, and then the channel 527 * ID is computed based on that. The number of bits that we use here 528 * depends on how many channels the hash is going across. 529 * 530 * The Genoa and related variants, termed "NPS", has a few wrinkles. First, 531 * rather than 3 bits being used for the channel, up to 4 bits are. Second, 532 * while the Rome/Milan "COD" hash above does not support socket or die 533 * interleaving, the "NPS" hash actually supports socket interleaving. However, 534 * unlike the straightforward non-hashing scheme, the first bit is used to 535 * determine the socket when enabled as opposed to the last one. In addition, if 536 * we're not performing socket interleaving, then we end up throwing address bit 537 * 14 into the mix here. Let's look at examples: 538 * 539 * o 4-channel "NPS" hashing, starting at address 8. All three ranges enabled. 540 * 1-die and 1-socket interleaving. 541 * 542 * In this model we are using 2 bits for the channel, 0 bits for the socket 543 * and die. Because socket interleaving is not being used, bit 14 ends up 544 * being added into the first bit of the channel selection. Presumably this 545 * is to improve the address distribution in some form. 546 * 547 * Channel ID[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14] 548 * Channel ID[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 549 * 550 * o 8-channel "NPS" hashing, starting at address 9. All three ranges enabled. 551 * 1-die and 2-socket interleaving. 552 * 553 * In this model we are using 3 bits for the channel and 1 for the socket. 554 * The die is always set to 0. Unlike the above, address bit 14 is not used 555 * because it ends up being required for the 4th address bit. 556 * 557 * Socket ID[0] = addr[9] ^ addr[16] ^ addr[21] ^ addr[30] 558 * Channel ID[0] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 559 * Channel ID[1] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32] 560 * Channel ID[2] = addr[14] ^ addr[19] ^ addr[24] ^ addr[33] 561 * 562 * 563 * ZEN 3 6-CHANNEL 564 * 565 * These were the simple cases. Things get more complex when we move to 566 * non-power of 2 based hashes between channels. There are two different sets of 567 * these schemes. The first of these is 6-channel hashing that was added in Zen 568 * 3. The second of these is a more complex and general form that was added in 569 * Zen 4. Let's start with the Zen 3 case. The Zen 3 6-channel hash requires 570 * starting at address bits 11 or 12 and varies its logic somewhat from there. 571 * In the 6-channel world, the socket and die interleaving must be disabled. 572 * Let's walk through an example: 573 * 574 * o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled. 575 * 1-die and 1-socket interleaving. 576 * 577 * Regardless of the starting address, we will always use three bits to 578 * determine a channel address. However, it's worth calling out that the 579 * 64K range is not considered for this at all. Another oddity is that when 580 * calculating the hash bits the order of the extracted 2M and 1G addresses 581 * are different. 582 * 583 * This flow starts by calculating the three hash bits. This is defined 584 * below. In the following, all bits marked with an '@' are ones that will 585 * change when starting at address bit 12. In those cases the value will 586 * increase by 1. Here's how we calculate the hash bits: 587 * 588 * hash[0] = addr[11@] ^ addr[14@] ^ addr[23] ^ addr[32] 589 * hash[1] = addr[12@] ^ addr[21] ^ addr[30] 590 * hash[2] = addr[13@] ^ addr[22] ^ addr[31] 591 * 592 * With this calculated, we always assign the first bit of the channel 593 * based on the hash. The other bits are more complicated as we have to 594 * deal with that gnarly power of two problem. We determine whether or not 595 * to use the hash bits directly in the channel based on their value. If 596 * they are not equal to 3, then we use it, otherwise if they are, then we 597 * need to go back to the physical address and we take its modulus. 598 * Basically: 599 * 600 * Channel Id[0] = hash[0] 601 * if (hash[2:1] == 3) 602 * Channel ID[2:1] = (addr >> [11@+3]) % 3 603 * else 604 * Channel ID[2:1] = hash[2:1] 605 * 606 * 607 * ZEN 4 NON-POWER OF 2 608 * 609 * I hope you like modulus calculations, because things get even more complex 610 * here now in Zen 4 which has many more modulus variations. These function in a 611 * similar way to the older 6-channel hash in Milan. They require one to start 612 * at address bit 8, they require that there is no die interleaving, and they 613 * support socket interleaving. The different channel arrangements end up in one 614 * of two sets of modulus values: a mod % 3 and a mod % 5 based on the number 615 * of channels used. Unlike the Milan form, all three address ranges (64 KiB, 2 616 * MiB, 1 GiB) are allowed to be used. 617 * 618 * o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 619 * 1-die and 2-socket interleaving. 620 * 621 * We start by calculating the following set of hash bits regardless of 622 * the number of channels that exist. The set of hash bits that is actually 623 * used in various computations ends up varying based upon the number of 624 * channels used. In 3-5 configs, only hash[0] is used. 6-10, both hash[0] 625 * and hash[2] (yes, not hash[1]). The 12 channel config uses all three. 626 * 627 * hash[0] = addr[8] ^ addr[16] ^ addr[21] ^ addr[30] ^ addr[14] 628 * hash[1] = addr[12] ^ addr[17] ^ addr[22] ^ addr[31] 629 * hash[2] = addr[13] ^ addr[18] ^ addr[23] ^ addr[32] 630 * 631 * Unlike other schemes where bits directly map here, they instead are used 632 * to seed the overall value. Depending on whether hash[0] is a 0 or 1, the 633 * system goes through two different calculations entirely. Though all of 634 * them end up involving the remainder of the system address going through 635 * the modulus. In the following, a '3@' indicates the modulus value would 636 * be swapped to 5 in a different scenario. 637 * 638 * Channel ID = addr[63:14] % 3@ 639 * if (hash[0] == 1) 640 * Channel ID = (Channel ID + 1) % 3@ 641 * 642 * Once this base has for the channel ID has been calculated, additional 643 * portions are added in. As this is the 6-channel form, we say: 644 * 645 * Channel ID = Channel ID + (hash[2] * 3@) 646 * 647 * Finally the socket is deterministic and always comes from hash[0]. 648 * Basically: 649 * 650 * Socket ID = hash[0] 651 * 652 * o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 653 * 1-die and 1-socket interleaving. 654 * 655 * This is a variant of the above. The hash is calculated the same way. 656 * The base Channel ID is the same and if socket interleaving were enabled 657 * it would also be hash[0]. What instead differs is how we use hash[1] 658 * and hash[2]. The following logic is used instead of the final 659 * calculation above. 660 * 661 * Channel ID = Channel ID + (hash[2:1] * 3@) 662 * 663 * 664 * POST BIT EXTRACTION 665 * 666 * Now, all of this was done to concoct up a series of indexes used. However, 667 * you'll note that a given DRAM rule actually already has a fabric target. So 668 * what do we do here? We add them together. 669 * 670 * The data fabric has registers that describe which bits in a fabric ID 671 * correspond to a socket, die, and channel. Taking the channel, die, and socket 672 * IDs above, one can construct a fabric ID. From there, we add the two data 673 * fabric IDs together and can then get to the fabric ID of the actual logical 674 * target. This is why all of the socket and die interleaving examples with no 675 * interleaving are OK to result in a zero. The idea here is that the base 676 * fabric ID in the DRAM rule will take care of indicating those other things as 677 * required. 678 * 679 * You'll note the use of the term "logical target" up above. That's because 680 * some platforms have the ability to remap logical targets to physical targets 681 * (identified by the use of the ZEN_UMC_FAM_F_TARG_REMAP flag in the family 682 * data). The way that remapping works changes based on the hardware generation. 683 * This was first added in Milan (Zen 3) CPUs. In that model, you would use the 684 * socket and component information from the target ID to identify which 685 * remapping rules to use. On Genoa (Zen 4) CPUs, you would instead use 686 * information in the rule itself to determine which of the remap rule sets to 687 * use and then uses the component ID to select which rewrite rule to use. 688 * 689 * Finally, there's one small wrinkle with this whole scheme that we haven't 690 * discussed: what actually is the address that we plug into this calculation. 691 * While you might think it actually is just the system address itself, that 692 * isn't actually always the case. Sometimes rather than using the address 693 * itself, it gets normalized based on the DRAM rule, which involves subtracting 694 * out the base address and potentially subtracting out the size of the DRAM 695 * hole (if the address is above the hole and hoisting is active for that 696 * range). When this is performed appears to tie to the DF generation. After Zen 697 * 3, it is always the default (e.g. Zen 4 and things from DF gen 3.5). At and 698 * before Zen 3, it only occurs if we are doing a non-power of 2 based hashing. 699 * 700 * -------------------------------------------- 701 * Data Fabric Interleave Address Normalization 702 * -------------------------------------------- 703 * 704 * While you may have thought that we were actually done with the normalization 705 * fun in the last section, there's still a bit more here that we need to 706 * consider. In particular, there's a secondary transformation beyond 707 * interleaving that occurs as part of constructing the channel normalized 708 * address. Effectively, we need to account for all the bits that were used in 709 * the interleaving and generally speaking remove them from our normalized 710 * address. 711 * 712 * While this may sound weird on paper, the way to think about it is that 713 * interleaving at some granularity means that each device is grabbing the same 714 * set of addresses, the interleave just is used to direct it to its own 715 * location. When working with a channel normalized address, we're effectively 716 * creating a new region of addresses that have meaning within the DIMMs 717 * themselves. The channel doesn't care about what got it there, mainly just 718 * what it is now. So with that in mind, we need to discuss how we remove all 719 * the interleaving information in our different modes. 720 * 721 * Just to make sure it's clear, we are _removing_ all bits that were used for 722 * interleaving. This causes all bits above the removed ones to be shifted 723 * right. 724 * 725 * First, we have the case of standard power of 2 interleaving that applies to 726 * the 1, 2, 4, 8, 16, and 32 channel configurations. Here, we need to account 727 * for the total number of bits that are used for the channel, die, and socket 728 * interleaving and we simply remove all those bits starting from the starting 729 * address. 730 * 731 * o 8-channel interleave, 1-die interleave, 2-socket interleave 732 * Start at bit 9 733 * 734 * If we look at this example, we are using 3 bits for the channel, 1 for 735 * the socket, for a total of 4 bits. Because this is starting at bit 9, 736 * this means that interleaving covers the bit range [12:9]. In this case 737 * our new address would be (orig[63:13] >> 4) | orig[8:0]. 738 * 739 * 740 * COD and NPS HASHING 741 * 742 * That was the simple case, next we have the COD/NPS hashing case that we need 743 * to consider. If we look at these, the way that they work is that they split 744 * which bits they use for determining the channel address and then hash others 745 * in. Here, we need to extract the starting address bit, then continue at bit 746 * 12 based on the number of bits in use and whether or not socket interleaving 747 * is at play for the NPS variant. Let's look at an example here: 748 * 749 * o 8-channel "COD" hashing, starting at address 9. All three ranges enabled. 750 * 1-die and 1-socket interleaving. 751 * 752 * Here we have three total bits being used. Because we start at bit 9, this 753 * means we need to drop bits [13:12], [9]. So our new address would be: 754 * 755 * orig[63:14] >> 3 | orig[11:10] >> 1 | orig[8:0] 756 * | | +-> stays the same 757 * | +-> relocated to bit 9 -- shifted by 1 because we 758 * | removed bit 9. 759 * +--> Relocated to bit 11 -- shifted by 3 because we removed bits, 9, 12, 760 * and 13. 761 * 762 * o 8-channel "NPS" hashing, starting at address 8. All three ranges enabled. 763 * 1-die and 2-socket interleaving. 764 * 765 * Here we need to remove bits [14:12], [8]. We're removing an extra bit 766 * because we have 2-socket interleaving. This results in a new address of: 767 * 768 * orig[63:15] >> 4 | orig[11:9] >> 1 | orig[7:0] 769 * | | +-> stays the same 770 * | +-> relocated to bit 8 -- shifted by 1 because we 771 * | removed bit 8. 772 * +--> Relocated to bit 11 -- shifted by 4 because we removed bits, 8, 12, 773 * 13, and 14. 774 * 775 * 776 * ZEN 3 6-CHANNEL 777 * 778 * Now, to the real fun stuff, our non-powers of two. First, let's start with 779 * our friend, the Zen 3 6-channel hash. So, the first thing that we need to do 780 * here is start by recomputing our hash again based on the current normalized 781 * address. Regardless of the hash value, this first removes all three bits from 782 * the starting address, so that's removing either [14:12] or [13:11]. 783 * 784 * The rest of the normalization process here is quite complex and somewhat mind 785 * bending. Let's start working through an example here and build this up. 786 * First, let's assume that each channel has a single 16 GiB RDIMM. This would 787 * mean that the channel itself has 96 GiB RDIMM. However, by removing 3 bits 788 * worth, that technically corresponds to an 8-channel configuration that 789 * normally suggest a 128 GiB configuration. The processor requires us to record 790 * this fact in the DF::Np2ChannelConfig register. The value that it wants us a 791 * bit weird. We believe it's calculated by the following: 792 * 793 * 1. Round the channel size up to the next power of 2. 794 * 2. Divide this total size by 64 KiB. 795 * 3. Determine the log base 2 that satisfies this value. 796 * 797 * In our particular example above. We have a 96 GiB channel, so for (1) we end 798 * up with 128 GiB (2^37). We now divide that by 64 KiB (2^16), so this becomes 799 * 2^(37 - 16) or 2^21. Because we want the log base 2 of 2^21 from (2), this 800 * simply becomes 21. The DF::Np2ChannelConfig has two members, a 'space 0' and 801 * 'space 1'. Near as we can tell, in this mode only 'space 0' is used. 802 * 803 * Before we get into the actual normalization scheme, we have to ask ourselves 804 * how do we actually interleave data 6 ways. The scheme here is involved. 805 * First, it's important to remember like with other normalization schemes, we 806 * do adjust for the address for the base address in the DRAM rule and then also 807 * take into account the DRAM hole if present. 808 * 809 * If we delete 3 bits, let's take a sample address and see where it would end 810 * up in the above scheme. We're going to take our 3 address bits and say that 811 * they start at bit 12, so this means that the bits removed are [14:12]. So the 812 * following are the 8 addresses that we have here and where they end up 813 * starting with 1ff: 814 * 815 * o 0x01ff -> 0x1ff, Channel 0 (hash 0b000) 816 * o 0x11ff -> 0x1ff, Channel 1 (hash 0b001) 817 * o 0x21ff -> 0x1ff, Channel 2 (hash 0b010) 818 * o 0x31ff -> 0x1ff, Channel 3 (hash 0b011) 819 * o 0x41ff -> 0x1ff, Channel 4 (hash 0b100) 820 * o 0x51ff -> 0x1ff, Channel 5 (hash 0b101) 821 * o 0x61ff -> 0x3000001ff, Channel 0 (hash 0b110) 822 * o 0x71ff -> 0x3000001ff, Channel 1 (hash 0b111) 823 * 824 * Yes, we did just jump to near the top of what is a 16 GiB DIMM's range for 825 * those last two. The way we determine when to do this jump is based on our 826 * hash. Effectively we ask what is hash[2:1]. If it is 0b11, then we need to 827 * do something different and enter this special case, basically jumping to the 828 * top of the range. If we think about a 6-channel configuration for a moment, 829 * the thing that doesn't exist are the traditional 8-channel hash DIMMs 0b110 830 * and 0b111. 831 * 832 * If you go back to the interleave this kind of meshes, that tried to handle 833 * the case of the hash being 0, 1, and 2, normally, and then did special things 834 * with the case of the hash being in this upper quadrant. The hash then 835 * determined where it went by shifting over the upper address and doing a mod 836 * 3 and using that to determine the upper two bits. With that weird address at 837 * the top of the range, let's go through and see what else actually goes to 838 * those weird addresses: 839 * 840 * o 0x08000061ff -> 0x3000001ff, Channel 2 (hash 0b110) 841 * o 0x08000071ff -> 0x3000001ff, Channel 3 (hash 0b111) 842 * o 0x10000061ff -> 0x3000001ff, Channel 4 (hash 0b110) 843 * o 0x10000071ff -> 0x3000001ff, Channel 5 (hash 0b111) 844 * 845 * Based on the above you can see that we've split the 16 GiB DIMM into a 12 GiB 846 * region (e.g. [ 0x0, 0x300000000 ), and a 4 GiB region [ 0x300000000, 847 * 0x400000000 ). What seems to happen is that the CPU algorithmically is going 848 * to put things in this upper range. To perform that action it goes back to the 849 * register information that we stored in DF::Np2ChannelConfig. The way this 850 * seems to be thought of is it wants to set the upper two bits of a 64 KiB 851 * chunk (e.g. bits [15:14]) to 0b11 and then shift that over based on the DIMM 852 * size. 853 * 854 * Our 16 GiB DIMM has 34 bits, so effectively we want to set bits [33:32] in 855 * this case. The channel is 37 bits wide, which the CPU again knows as 2^21 * 856 * 2^16. So it constructs the 64 KiB value of [15:14] = 0b11 and fills the rest 857 * with zeros. It then multiplies it by 2^(21 - 3), or 2^18. The - 3 comes from 858 * the fact that we removed 3 address bits. This when added to the above gets 859 * us bits [33,32] = 0b11. 860 * 861 * While this appears to be the logic, I don't have a proof that this scheme 862 * actually evenly covers the entire range, but a few examples appear to work 863 * out. 864 * 865 * With this, the standard example flow that we give, results in something like: 866 * 867 * o 6-channel Zen 3, starting at address 11. 2M and 1G range enabled. Here, 868 * we assume that the value of the NP2 space0 is 21 bits. This example 869 * assumes we have 96 GiB total memory, which means rounding up to 128 GiB. 870 * 871 * Step 1 here is to adjust our address to remove the three bits indicated. 872 * So we simply always set our new address to: 873 * 874 * orig[63:14] >> 3 | orig[10:0] 875 * | +-> stays the same 876 * +--> Relocated to bit 11 because a 6-channel config always uses 3 bits to 877 * perform interleaving. 878 * 879 * At this step, one would need to consult the hash of the normalized 880 * address before removing bits (but after adjusting for the base / DRAM 881 * hole). If hash[2:1] == 3, then we would say that the address is actually: 882 * 883 * 0b11 << 32 | orig[63:14] >> 3 | orig[10:0] 884 * 885 * 886 * ZEN 4 NON-POWER OF 2 887 * 888 * Next, we have the DFv4 versions of the 3, 5, 6, 10, and 12 channel hashing. 889 * An important part of this is whether or not there is any socket hashing going 890 * on. Recall there, that if socket hashing was going on, then it is part of the 891 * interleave logic; however, if it is not, then its hash actually becomes 892 * part of the normalized address, but not in the same spot! 893 * 894 * In this mode, we always remove the bits that are actually used by the hash. 895 * Recall that some modes use hash[0], others hash[0] and hash[2], and then only 896 * the 12-channel config uses hash[2:0]. This means we need to be careful in how 897 * we actually remove address bits. All other bits in this lower range we end up 898 * keeping and using. The top bits, e.g. addr[63:14] are kept and divided by the 899 * actual channel-modulus. If we're not performing socket interleaving and 900 * therefore need to keep the value of hash[0], then it is appended as the least 901 * significant bit of that calculation. 902 * 903 * Let's look at an example of this to try to make sense of it all. 904 * 905 * o 6-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 906 * 1-die and 2-socket interleaving. 907 * 908 * Here we'd start by calculating hash[2:0] as described in the earlier 909 * interleaving situation. Because we're using a socket interleave, we will 910 * not opt to include hash[0] in the higher-level address calculation. 911 * Because this is a 6-channel calculation, our modulus is 3. Here, we will 912 * strip out bits 8 and 13 (recall in the interleaving 6-channel example we 913 * ignored hash[1], thus no bit 12 here). Our new address will be: 914 * 915 * (orig[63:14] / 3) >> 2 | orig[12:9] >> 1 | orig[7:0] 916 * | | +-> stays the same 917 * | +-> relocated to bit 8 -- shifted by 1 because 918 * | we removed bit 8. 919 * +--> Relocated to bit 12 -- shifted by 2 because we removed bits 8 and 920 * 13. 921 * 922 * o 12-channel Zen 4, starting at address 8. 64K, 2M, and 1G range enabled. 923 * 1-die and 1-socket interleaving. 924 * 925 * This is a slightly different case from the above in two ways. First, we 926 * will end up removing bits 8, 12, and 13, but then we'll also reuse 927 * hash[0]. Our new address will be: 928 * 929 * ((orig[63:14] / 3) << 1 | hash[0]) >> 3 | orig[11:9] >> 1 | orig[7:0] 930 * | | +-> stays the 931 * | | same 932 * | +-> relocated to bit 8 -- shifted by 933 * | 1 because we removed bit 8. 934 * +--> Relocated to bit 11 -- shifted by 3 because we removed bits 8, 12, 935 * and 13. 936 * 937 * That's most of the normalization process for the time being. We will have to 938 * revisit this when we have to transform a normal address into a system address 939 * and undo all this. 940 * 941 * ------------------------------------- 942 * Selecting a DIMM and UMC Organization 943 * ------------------------------------- 944 * 945 * One of the more nuanced things in decoding and encoding is the question of 946 * where do we send a channel normalized address. That is, now that we've gotten 947 * to a given channel, we need to transform the address into something 948 * meaningful for a DIMM, and select a DIMM as well. The UMC SMN space contains 949 * a number of Base Address and Mask registers which they describe as activating 950 * a chip-select. A given UMC has up to four primary chip-selects (we'll come 951 * back to DDR5 sub-channels later). The first two always go to the first DIMM 952 * in the channel and the latter two always go to the second DIMM in the 953 * channel. Put another way, you can always determine which DIMM you are 954 * referring to by taking the chip-select and shifting it by 1. 955 * 956 * The UMC Channel registers are organized a bit differently in different 957 * hardware generations. In a DDR5 based UMC, almost all of our settings are on 958 * a per-chip-select basis while as in a DDR4 based system only the bases and 959 * masks are. While gathering data we normalize this such that each logical 960 * chip-select (umc_cs_t) that we have in the system has the same data so that 961 * way DDR4 and DDR5 based systems are the same to the decoding logic. There is 962 * also channel-wide data such as hash configurations and related. 963 * 964 * Each channel has a set of base and mask registers (and secondary ones as 965 * well). To determine if we activate a given one, we first check if the 966 * enabled bit is set. The enabled bit is set on a per-base basis, so both the 967 * primary and secondary registers have separate enables. As there are four of 968 * each base, mask, secondary base, and secondary mask, we say that if a 969 * normalized address matches either a given indexes primary or secondary index, 970 * then it activates that given UMC index. The basic formula for an enabled 971 * selection is: 972 * 973 * NormAddr & ~Mask[i] == Base[i] & ~Mask[i] 974 * 975 * Once this is selected, this index in the UMC is what it always used to derive 976 * the rest of the information that is specific to a given chip-select or DIMM. 977 * An important thing to remember is that from this point onwards, while there 978 * is a bunch of hashing and interleaving logic it doesn't change which UMC 979 * channel we read the data from. Though the particular DIMM, rank, and address 980 * we access will change as we go through hashing and interleaving. 981 * 982 * ------------------------ 983 * Row and Column Selection 984 * ------------------------ 985 * 986 * The number of bits that are used for the row and column address of a DIMM 987 * varies based on the type of module itself. These depend on the density of a 988 * DIMM module, e.g. how large an individual DRAM block is, a value such as 16 989 * Gbit, and the number of these wide it is, which is generally phrased as X4, 990 * X8, and X16. The memory controller encodes the number of bits (derived from 991 * the DIMM's SPD data) and then determines which bits are used for addresses. 992 * 993 * Based on this information we can initially construct a row and a column 994 * address by leveraging the information about the number of bits and then 995 * extracting the correct bits out of the normalized channel address. 996 * 997 * If you've made it this far, you know nothing is quite this simple, despite it 998 * seeming so. Importantly, not all DIMMs actually have storage that is a power 999 * of 2. As such, there's another bit that we have to consult to transform the 1000 * actual value that we have for a row, remarkably the column somehow has no 1001 * transformations applied to it. 1002 * 1003 * The hardware gives us information on inverting the two 'most significant 1004 * bits' of the row address which we store in 'ucs_inv_msbs'. First, we have the 1005 * question of what are our most significant bits here. This is basically 1006 * determined by the number of low and high row bits. In this case higher 1007 * actually is what we want. Note, the high row bits only exist in DDR4. Next, 1008 * we need to know whether we used the primary or secondary base/mask pair for 1009 * this as there is a primary and secondary inversion bits. The higher bit of 1010 * the inversion register (e.g ucs_inv_msbs[1]) corresponds to the highest row 1011 * bit. A zero in the bit position indicates that we should not perform an 1012 * inversion where as a one says that we should invert this. 1013 * 1014 * To actually make this happen we can take advantage of the fact that the 1015 * meaning of a 0/1 above means that this can be implemented with a binary 1016 * exclusive-OR (XOR). Logically speaking if we have a don't invert setting 1017 * present, a 0, then x ^ 0 is always x. However, if we have a 1 present, then 1018 * we know that (for a single bit) x ^ 1 = ~x. We take advantage of this fact in 1019 * the row logic. 1020 * 1021 * --------------------- 1022 * Banks and Bank Groups 1023 * --------------------- 1024 * 1025 * While addressing within a given module is done by the use of a row and column 1026 * address, to increase storage density a module generally has a number of 1027 * banks, which may be organized into one or more bank groups. While a given 1028 * DDR4/5 access happens in some prefetched chunk of say 64 bytes (what do you 1029 * know, that's a cacheline), that all occurs within a single bank. The addition 1030 * of bank groups makes it easier to access data in parallel -- it is often 1031 * faster to read from another bank group than to read another region inside a 1032 * bank group. 1033 * 1034 * Based on the DIMMs internal configuration, there will be a specified number 1035 * of bits used for the overall bank address (including bank group bits) 1036 * followed by a number of bits actually used for bank groups. There are 1037 * separately an array of bits used to concoct the actual address. It appears, 1038 * mostly through experimental evidence, that the bank group bits occur first 1039 * and then are followed by the bank selection itself. This makes some sense if 1040 * you assume that switching bank groups is faster than switching banks. 1041 * 1042 * So if we see the UMC noting 4 bank bits and 2 bank groups bits, that means 1043 * that the umc_cs_t's ucs_bank_bits[1:0] correspond to bank_group[1:0] and 1044 * ucs_bank_bits[3:2] correspond to bank_address[1:0]. However, if there were no 1045 * bank bits indicated, then all of the address bits would correspond to the 1046 * bank address. 1047 * 1048 * Now, this would all be straightforward if not for hashing, our favorite. 1049 * There are five bank hashing registers per channel (UMC_BANK_HASH_DDR4, 1050 * UMC_BANK_HASH_DDR5), one that corresponds to the five possible bank bits. To 1051 * do this we need to use the calculated row and column that we previously 1052 * determined. This calculation happens in a few steps: 1053 * 1054 * 1) First check if the enable bit is set in the rule. If not, just use the 1055 * normal bank address bit and we're done. 1056 * 2) Take a bitwise-AND of the calculated row and hash register's row value. 1057 * Next do the same thing for the column. 1058 * 3) For each bit in the row, progressively XOR it, e.g. row[0] ^ row[1] ^ 1059 * row[2] ^ ... to calculate a net bit value for the row. This then 1060 * repeats itself for the column. What basically has happened is that we're 1061 * using the hash register to select which bits to impact our decision. 1062 * Think of this as a traditional bitwise functional reduce. 1063 * 4) XOR the combined rank bit with the column bit and the actual bank 1064 * address bit from the normalized address. So if this were bank bit 0, 1065 * which indicated we should use bit 15 for bank[0], then we would 1066 * ultimately say our new bit is norm_addr[15] ^ row_xor ^ col_xor 1067 * 1068 * An important caveat is that we would only consult all this if we actually 1069 * were told that the bank bit was being used. For example if we had 3 bank 1070 * bits, then we'd only check the first 3 hash registers. The latter two would 1071 * be ignored. 1072 * 1073 * Once this process is done, then we can go back and split the activated bank 1074 * into the actual bank used and the bank group used based on the first bits 1075 * going to the bank group. 1076 * 1077 * --------------- 1078 * DDR5 Sub-channel 1079 * --------------- 1080 * 1081 * As described in the definitions section, DDR5 has the notion of a 1082 * sub-channel. Here, a single bit is used to determine which of the 1083 * sub-channels to actually operate and utilize. Importantly the same 1084 * chip-select seems to apply to both halves of a given sub-channel. 1085 * 1086 * There is also a hash that is used here. The hash here utilizes the calculated 1087 * bank, column, and row and follows the same pattern used in the bank 1088 * calculation where we do a bunch of running exclusive-ORs and then do that 1089 * with the original value we found to get the new value. Because there's only 1090 * one bit for the sub-channel, we only have a single hash to consider. 1091 * 1092 * ------------------------------------------- 1093 * Ranks, Chip-Select, and Rank Multiplication 1094 * ------------------------------------------- 1095 * 1096 * The notion of ranks and the chip-select are interwoven. From a strict DDR4 1097 * RDIMM perspective, there are two lines that are dedicated for chip-selects 1098 * and then another two that are shared with three 'chip-id' bits that are used 1099 * in 3DS RDIMMs. In all cases the controller starts with two logical chip 1100 * selects and then uses something called rank multiplication to figure out how 1101 * to multiplex that and map to the broader set of things. Basically, in 1102 * reality, DDR4 RDIMMs allow for 4 bits to determine a rank and then 3DS RDIMMs 1103 * use 2 bits for a rank and 3 bits to select a stacked chip. In DDR5 this is 1104 * different and you just have 2 bits for a rank. 1105 * 1106 * It's not entirely clear from what we know from AMD, but it seems that we use 1107 * the RM bits as a way to basically go beyond the basic 2 bits of chip-select 1108 * which is determined based on which channel we logically activate. Initially 1109 * we treat this as two distinct things, here as that's what we get from the 1110 * hardware. There are two hashes here a chip-select and rank-multiplication 1111 * hash. Unlike the others, which rely on the bank, row, and column addresses, 1112 * this hash relies on the normalized address. So we calculate that mask and do 1113 * our same xor dance. 1114 * 1115 * There is one hash for each rank multiplication bit and chip-select bit. The 1116 * number of rank multiplication bits is given to us. The number of chip-select 1117 * bits is fixed, it's simply two because there are four base/mask registers and 1118 * logical chip-selects in a given UMC channel. The chip-select on some DDR5 1119 * platforms has a secondary exclusive-OR hash that can be applied. As this only 1120 * exists in some families, for any where it does exist, we seed it to be zero 1121 * so that it becomes a no-op. 1122 * 1123 * ----------- 1124 * Future Work 1125 * ----------- 1126 * 1127 * As the road goes ever on and on, down from the door where it began, there are 1128 * still some stops on the journey for this driver. In particular, here are the 1129 * major open areas that could be implemented to extend what this can do: 1130 * 1131 * o The ability to transform a normalized channel address back to a system 1132 * address. This is required for MCA/MCA-X error handling as those generally 1133 * work in terms of channel addresses. 1134 * o Integrating with the MCA/MCA-X error handling paths so that way we can 1135 * take correct action in the face of ECC errors and allowing recovery from 1136 * uncorrectable errors. 1137 * o Providing memory controller information to FMA so that way it can opt to 1138 * do predictive failure or give us more information about what is fault 1139 * with ECC errors. 1140 * o Figuring out if we will get MCEs for privilged address decoding and if so 1141 * mapping those back to system addresses and related. 1142 * o 3DS RDIMMs likely will need a little bit of work to ensure we're handling 1143 * the resulting combination of the RM bits and CS and reporting it 1144 * intelligently. 1145 */ 1146 1147 #include <sys/types.h> 1148 #include <sys/file.h> 1149 #include <sys/errno.h> 1150 #include <sys/open.h> 1151 #include <sys/cred.h> 1152 #include <sys/ddi.h> 1153 #include <sys/sunddi.h> 1154 #include <sys/stat.h> 1155 #include <sys/conf.h> 1156 #include <sys/devops.h> 1157 #include <sys/cmn_err.h> 1158 #include <sys/x86_archext.h> 1159 #include <sys/sysmacros.h> 1160 #include <sys/mc.h> 1161 1162 #include <zen_umc.h> 1163 #include <sys/amdzen/df.h> 1164 #include <sys/amdzen/umc.h> 1165 1166 static zen_umc_t *zen_umc; 1167 1168 /* 1169 * Per-CPU family information that describes the set of capabilities that they 1170 * implement. When adding support for new CPU generations, you must go through 1171 * what documentation you have and validate these. The best bet is to find a 1172 * similar processor and see what has changed. Unfortunately, there really isn't 1173 * a substitute for just basically checking every register. The family name 1174 * comes from the amdzen_c_family(). One additional note for new CPUs, if our 1175 * parent amdzen nexus driver does not attach (because the DF has changed PCI 1176 * IDs or more), then just adding something here will not be sufficient to make 1177 * it work. 1178 */ 1179 static const zen_umc_fam_data_t zen_umc_fam_data[] = { 1180 { 1181 .zufd_family = X86_PF_AMD_NAPLES, 1182 .zufd_dram_nrules = 16, 1183 .zufd_cs_nrules = 2, 1184 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1185 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS 1186 }, { 1187 .zufd_family = X86_PF_HYGON_DHYANA, 1188 .zufd_dram_nrules = 16, 1189 .zufd_cs_nrules = 2, 1190 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1191 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS 1192 }, { 1193 .zufd_family = X86_PF_AMD_DALI, 1194 .zufd_dram_nrules = 2, 1195 .zufd_cs_nrules = 2, 1196 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU, 1197 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS 1198 }, { 1199 .zufd_family = X86_PF_AMD_ROME, 1200 .zufd_flags = ZEN_UMC_FAM_F_NP2 | ZEN_UMC_FAM_F_NORM_HASH | 1201 ZEN_UMC_FAM_F_UMC_HASH, 1202 .zufd_dram_nrules = 16, 1203 .zufd_cs_nrules = 2, 1204 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1205 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1206 UMC_CHAN_HASH_F_CS 1207 }, { 1208 .zufd_family = X86_PF_AMD_RENOIR, 1209 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1210 .zufd_dram_nrules = 2, 1211 .zufd_cs_nrules = 2, 1212 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU, 1213 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC | 1214 UMC_CHAN_HASH_F_CS 1215 }, { 1216 .zufd_family = X86_PF_AMD_MATISSE, 1217 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH, 1218 .zufd_dram_nrules = 16, 1219 .zufd_cs_nrules = 2, 1220 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1221 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1222 UMC_CHAN_HASH_F_CS 1223 }, { 1224 .zufd_family = X86_PF_AMD_VAN_GOGH, 1225 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1226 .zufd_dram_nrules = 2, 1227 .zufd_cs_nrules = 2, 1228 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1229 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS 1230 }, { 1231 .zufd_family = X86_PF_AMD_MENDOCINO, 1232 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1233 .zufd_dram_nrules = 2, 1234 .zufd_cs_nrules = 2, 1235 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1236 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS 1237 }, { 1238 .zufd_family = X86_PF_AMD_MILAN, 1239 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | ZEN_UMC_FAM_F_NP2 | 1240 ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH, 1241 .zufd_dram_nrules = 16, 1242 .zufd_cs_nrules = 2, 1243 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1244 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1245 UMC_CHAN_HASH_F_CS 1246 }, { 1247 .zufd_family = X86_PF_AMD_GENOA, 1248 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | 1249 ZEN_UMC_FAM_F_UMC_HASH | ZEN_UMC_FAM_F_UMC_EADDR | 1250 ZEN_UMC_FAM_F_CS_XOR, 1251 .zufd_dram_nrules = 20, 1252 .zufd_cs_nrules = 4, 1253 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1254 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1255 UMC_CHAN_HASH_F_PC | UMC_CHAN_HASH_F_CS 1256 }, { 1257 .zufd_family = X86_PF_AMD_VERMEER, 1258 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH | ZEN_UMC_FAM_F_UMC_HASH, 1259 .zufd_dram_nrules = 16, 1260 .zufd_cs_nrules = 2, 1261 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4, 1262 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_RM | 1263 UMC_CHAN_HASH_F_CS, 1264 }, { 1265 .zufd_family = X86_PF_AMD_REMBRANDT, 1266 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1267 .zufd_dram_nrules = 2, 1268 .zufd_cs_nrules = 2, 1269 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5_APU, 1270 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_CS 1271 }, { 1272 .zufd_family = X86_PF_AMD_CEZANNE, 1273 .zufd_flags = ZEN_UMC_FAM_F_NORM_HASH, 1274 .zufd_dram_nrules = 2, 1275 .zufd_cs_nrules = 2, 1276 .zufd_umc_style = ZEN_UMC_UMC_S_DDR4_APU, 1277 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC | 1278 UMC_CHAN_HASH_F_CS 1279 }, { 1280 .zufd_family = X86_PF_AMD_RAPHAEL, 1281 .zufd_flags = ZEN_UMC_FAM_F_TARG_REMAP | ZEN_UMC_FAM_F_CS_XOR, 1282 .zufd_dram_nrules = 2, 1283 .zufd_cs_nrules = 2, 1284 .zufd_umc_style = ZEN_UMC_UMC_S_DDR5, 1285 .zufd_chan_hash = UMC_CHAN_HASH_F_BANK | UMC_CHAN_HASH_F_PC | 1286 UMC_CHAN_HASH_F_CS 1287 } 1288 }; 1289 1290 static boolean_t 1291 zen_umc_identify(zen_umc_t *umc) 1292 { 1293 for (uint_t i = 0; i < ARRAY_SIZE(zen_umc_fam_data); i++) { 1294 if (zen_umc_fam_data[i].zufd_family == umc->umc_family) { 1295 umc->umc_fdata = &zen_umc_fam_data[i]; 1296 return (B_TRUE); 1297 } 1298 } 1299 1300 return (B_FALSE); 1301 } 1302 1303 /* 1304 * This operates on DFv2, DFv3, and DFv3.5 DRAM rules, which generally speaking 1305 * are in similar register locations and meanings, but the size of bits in 1306 * memory is not consistent. 1307 */ 1308 static int 1309 zen_umc_read_dram_rule_df_23(zen_umc_t *umc, const uint_t dfno, 1310 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule) 1311 { 1312 int ret; 1313 uint32_t base, limit; 1314 uint64_t dbase, dlimit; 1315 uint16_t addr_ileave, chan_ileave, sock_ileave, die_ileave, dest; 1316 boolean_t hash = B_FALSE; 1317 zen_umc_df_t *df = &umc->umc_dfs[dfno]; 1318 1319 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V2(ruleno), 1320 &base)) != 0) { 1321 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base " 1322 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1323 return (ret); 1324 } 1325 1326 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V2(ruleno), 1327 &limit)) != 0) { 1328 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit " 1329 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1330 return (ret); 1331 } 1332 1333 1334 rule->ddr_raw_base = base; 1335 rule->ddr_raw_limit = limit; 1336 rule->ddr_raw_ileave = rule->ddr_raw_ctrl = 0; 1337 1338 if (!DF_DRAM_BASE_V2_GET_VALID(base)) { 1339 return (0); 1340 } 1341 1342 /* 1343 * Extract all values from the registers and then normalize. While there 1344 * are often different bit patterns for the values, the interpretation 1345 * is the same across all the Zen 1-3 parts. That is while which bits 1346 * may be used for say channel interleave vary, the values of them are 1347 * consistent. 1348 */ 1349 rule->ddr_flags |= DF_DRAM_F_VALID; 1350 if (DF_DRAM_BASE_V2_GET_HOLE_EN(base)) { 1351 rule->ddr_flags |= DF_DRAM_F_HOLE; 1352 } 1353 1354 dbase = DF_DRAM_BASE_V2_GET_BASE(base); 1355 dlimit = DF_DRAM_LIMIT_V2_GET_LIMIT(limit); 1356 switch (umc->umc_df_rev) { 1357 case DF_REV_2: 1358 addr_ileave = DF_DRAM_BASE_V2_GET_ILV_ADDR(base); 1359 chan_ileave = DF_DRAM_BASE_V2_GET_ILV_CHAN(base); 1360 die_ileave = DF_DRAM_LIMIT_V2_GET_ILV_DIE(limit); 1361 sock_ileave = DF_DRAM_LIMIT_V2_GET_ILV_SOCK(limit); 1362 dest = DF_DRAM_LIMIT_V2_GET_DEST_ID(limit); 1363 break; 1364 case DF_REV_3: 1365 addr_ileave = DF_DRAM_BASE_V3_GET_ILV_ADDR(base); 1366 sock_ileave = DF_DRAM_BASE_V3_GET_ILV_SOCK(base); 1367 die_ileave = DF_DRAM_BASE_V3_GET_ILV_DIE(base); 1368 chan_ileave = DF_DRAM_BASE_V3_GET_ILV_CHAN(base); 1369 dest = DF_DRAM_LIMIT_V3_GET_DEST_ID(limit); 1370 break; 1371 case DF_REV_3P5: 1372 addr_ileave = DF_DRAM_BASE_V3P5_GET_ILV_ADDR(base); 1373 sock_ileave = DF_DRAM_BASE_V3P5_GET_ILV_SOCK(base); 1374 die_ileave = DF_DRAM_BASE_V3P5_GET_ILV_DIE(base); 1375 chan_ileave = DF_DRAM_BASE_V3P5_GET_ILV_CHAN(base); 1376 dest = DF_DRAM_LIMIT_V3P5_GET_DEST_ID(limit); 1377 break; 1378 default: 1379 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 1380 "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev); 1381 return (-1); 1382 } 1383 1384 rule->ddr_base = dbase << DF_DRAM_BASE_V2_BASE_SHIFT; 1385 rule->ddr_sock_ileave_bits = sock_ileave; 1386 rule->ddr_die_ileave_bits = die_ileave; 1387 switch (addr_ileave) { 1388 case DF_DRAM_ILV_ADDR_8: 1389 case DF_DRAM_ILV_ADDR_9: 1390 case DF_DRAM_ILV_ADDR_10: 1391 case DF_DRAM_ILV_ADDR_11: 1392 case DF_DRAM_ILV_ADDR_12: 1393 break; 1394 default: 1395 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address " 1396 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1397 dfno, inst, addr_ileave); 1398 return (EINVAL); 1399 } 1400 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave; 1401 1402 switch (chan_ileave) { 1403 case DF_DRAM_BASE_V2_ILV_CHAN_1: 1404 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH; 1405 break; 1406 case DF_DRAM_BASE_V2_ILV_CHAN_2: 1407 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH; 1408 break; 1409 case DF_DRAM_BASE_V2_ILV_CHAN_4: 1410 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH; 1411 break; 1412 case DF_DRAM_BASE_V2_ILV_CHAN_8: 1413 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH; 1414 break; 1415 case DF_DRAM_BASE_V2_ILV_CHAN_6: 1416 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_6CH; 1417 break; 1418 case DF_DRAM_BASE_V2_ILV_CHAN_COD4_2: 1419 hash = B_TRUE; 1420 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD4_2CH; 1421 break; 1422 case DF_DRAM_BASE_V2_ILV_CHAN_COD2_4: 1423 hash = B_TRUE; 1424 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD2_4CH; 1425 break; 1426 case DF_DRAM_BASE_V2_ILV_CHAN_COD1_8: 1427 hash = B_TRUE; 1428 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD1_8CH; 1429 break; 1430 default: 1431 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel " 1432 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1433 dfno, inst, chan_ileave); 1434 return (EINVAL); 1435 } 1436 1437 /* 1438 * If hashing is enabled, note which hashing rules apply to this 1439 * address. This is done to smooth over the differences between DFv3 and 1440 * DFv4, where the flags are in the rules themselves in the latter, but 1441 * global today. 1442 */ 1443 if (hash) { 1444 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_16_18) != 0) { 1445 rule->ddr_flags |= DF_DRAM_F_HASH_16_18; 1446 } 1447 1448 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_21_23) != 0) { 1449 rule->ddr_flags |= DF_DRAM_F_HASH_21_23; 1450 } 1451 1452 if ((df->zud_flags & ZEN_UMC_DF_F_HASH_30_32) != 0) { 1453 rule->ddr_flags |= DF_DRAM_F_HASH_30_32; 1454 } 1455 } 1456 1457 /* 1458 * While DFv4 makes remapping explicit, it is basically always enabled 1459 * and used on supported platforms prior to that point. So flag such 1460 * supported platforms as ones that need to do this. On those systems 1461 * there is only one set of remap rules for an entire DF that are 1462 * determined based on the target socket. To indicate that we use the 1463 * DF_DRAM_F_REMAP_SOCK flag below and skip setting a remap target. 1464 */ 1465 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) { 1466 rule->ddr_flags |= DF_DRAM_F_REMAP_EN | DF_DRAM_F_REMAP_SOCK; 1467 } 1468 1469 rule->ddr_limit = (dlimit << DF_DRAM_LIMIT_V2_LIMIT_SHIFT) + 1470 DF_DRAM_LIMIT_V2_LIMIT_EXCL; 1471 rule->ddr_dest_fabid = dest; 1472 1473 return (0); 1474 } 1475 1476 static int 1477 zen_umc_read_dram_rule_df_4(zen_umc_t *umc, const uint_t dfno, 1478 const uint_t inst, const uint_t ruleno, df_dram_rule_t *rule) 1479 { 1480 int ret; 1481 uint16_t addr_ileave; 1482 uint32_t base, limit, ilv, ctl; 1483 1484 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_BASE_V4(ruleno), 1485 &base)) != 0) { 1486 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM base " 1487 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1488 return (ret); 1489 } 1490 1491 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_LIMIT_V4(ruleno), 1492 &limit)) != 0) { 1493 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM limit " 1494 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1495 return (ret); 1496 } 1497 1498 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_ILV_V4(ruleno), 1499 &ilv)) != 0) { 1500 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 1501 "interleave register %u on 0x%x/0x%x: %d", ruleno, dfno, 1502 inst, ret); 1503 return (ret); 1504 } 1505 1506 if ((ret = amdzen_c_df_read32(dfno, inst, DF_DRAM_CTL_V4(ruleno), 1507 &ctl)) != 0) { 1508 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM control " 1509 "register %u on 0x%x/0x%x: %d", ruleno, dfno, inst, ret); 1510 return (ret); 1511 } 1512 1513 rule->ddr_raw_base = base; 1514 rule->ddr_raw_limit = limit; 1515 rule->ddr_raw_ileave = ilv; 1516 rule->ddr_raw_ctrl = ctl; 1517 1518 if (!DF_DRAM_CTL_V4_GET_VALID(ctl)) { 1519 return (0); 1520 } 1521 1522 rule->ddr_flags |= DF_DRAM_F_VALID; 1523 rule->ddr_base = DF_DRAM_BASE_V4_GET_ADDR(base); 1524 rule->ddr_base = rule->ddr_base << DF_DRAM_BASE_V4_BASE_SHIFT; 1525 rule->ddr_limit = DF_DRAM_LIMIT_V4_GET_ADDR(limit); 1526 rule->ddr_limit = (rule->ddr_limit << DF_DRAM_LIMIT_V4_LIMIT_SHIFT) + 1527 DF_DRAM_LIMIT_V4_LIMIT_EXCL; 1528 rule->ddr_dest_fabid = DF_DRAM_CTL_V4_GET_DEST_ID(ctl); 1529 1530 if (DF_DRAM_CTL_V4_GET_HASH_1G(ctl) != 0) { 1531 rule->ddr_flags |= DF_DRAM_F_HASH_30_32; 1532 } 1533 1534 if (DF_DRAM_CTL_V4_GET_HASH_2M(ctl) != 0) { 1535 rule->ddr_flags |= DF_DRAM_F_HASH_21_23; 1536 } 1537 1538 if (DF_DRAM_CTL_V4_GET_HASH_64K(ctl) != 0) { 1539 rule->ddr_flags |= DF_DRAM_F_HASH_16_18; 1540 } 1541 1542 if (DF_DRAM_CTL_V4_GET_REMAP_EN(ctl) != 0) { 1543 rule->ddr_flags |= DF_DRAM_F_REMAP_EN; 1544 rule->ddr_remap_ent = DF_DRAM_CTL_V4_GET_REMAP_SEL(ctl); 1545 } 1546 1547 if (DF_DRAM_CTL_V4_GET_HOLE_EN(ctl) != 0) { 1548 rule->ddr_flags |= DF_DRAM_F_HOLE; 1549 } 1550 1551 rule->ddr_sock_ileave_bits = DF_DRAM_ILV_V4_GET_SOCK(ilv); 1552 rule->ddr_die_ileave_bits = DF_DRAM_ILV_V4_GET_DIE(ilv); 1553 switch (DF_DRAM_ILV_V4_GET_CHAN(ilv)) { 1554 case DF_DRAM_ILV_V4_CHAN_1: 1555 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_1CH; 1556 break; 1557 case DF_DRAM_ILV_V4_CHAN_2: 1558 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_2CH; 1559 break; 1560 case DF_DRAM_ILV_V4_CHAN_4: 1561 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_4CH; 1562 break; 1563 case DF_DRAM_ILV_V4_CHAN_8: 1564 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_8CH; 1565 break; 1566 case DF_DRAM_ILV_V4_CHAN_16: 1567 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_16CH; 1568 break; 1569 case DF_DRAM_ILV_V4_CHAN_32: 1570 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_32CH; 1571 break; 1572 case DF_DRAM_ILV_V4_CHAN_NPS4_2CH: 1573 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_2CH; 1574 break; 1575 case DF_DRAM_ILV_V4_CHAN_NPS2_4CH: 1576 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_COD2_4CH; 1577 break; 1578 case DF_DRAM_ILV_V4_CHAN_NPS1_8CH: 1579 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_8CH; 1580 break; 1581 case DF_DRAM_ILV_V4_CHAN_NPS4_3CH: 1582 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS4_3CH; 1583 break; 1584 case DF_DRAM_ILV_V4_CHAN_NPS2_6CH: 1585 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_6CH; 1586 break; 1587 case DF_DRAM_ILV_V4_CHAN_NPS1_12CH: 1588 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_12CH; 1589 break; 1590 case DF_DRAM_ILV_V4_CHAN_NPS2_5CH: 1591 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS2_5CH; 1592 break; 1593 case DF_DRAM_ILV_V4_CHAN_NPS1_10CH: 1594 rule->ddr_chan_ileave = DF_CHAN_ILEAVE_NPS1_10CH; 1595 break; 1596 default: 1597 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid channel " 1598 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1599 dfno, inst, DF_DRAM_ILV_V4_GET_CHAN(ilv)); 1600 1601 break; 1602 } 1603 1604 addr_ileave = DF_DRAM_ILV_V4_GET_ADDR(ilv); 1605 switch (addr_ileave) { 1606 case DF_DRAM_ILV_ADDR_8: 1607 case DF_DRAM_ILV_ADDR_9: 1608 case DF_DRAM_ILV_ADDR_10: 1609 case DF_DRAM_ILV_ADDR_11: 1610 case DF_DRAM_ILV_ADDR_12: 1611 break; 1612 default: 1613 dev_err(umc->umc_dip, CE_WARN, "!encountered invalid address " 1614 "interleave on rule %u, df/inst 0x%x/0x%x: 0x%x", ruleno, 1615 dfno, inst, addr_ileave); 1616 return (EINVAL); 1617 } 1618 rule->ddr_addr_start = DF_DRAM_ILV_ADDR_BASE + addr_ileave; 1619 1620 return (0); 1621 } 1622 1623 static int 1624 zen_umc_read_dram_rule(zen_umc_t *umc, const uint_t dfno, const uint_t instid, 1625 const uint_t ruleno, df_dram_rule_t *rule) 1626 { 1627 int ret; 1628 1629 switch (umc->umc_df_rev) { 1630 case DF_REV_2: 1631 case DF_REV_3: 1632 case DF_REV_3P5: 1633 ret = zen_umc_read_dram_rule_df_23(umc, dfno, instid, ruleno, 1634 rule); 1635 break; 1636 case DF_REV_4: 1637 ret = zen_umc_read_dram_rule_df_4(umc, dfno, instid, ruleno, 1638 rule); 1639 break; 1640 default: 1641 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 1642 "DF revision processing DRAM rules: 0x%x", umc->umc_df_rev); 1643 return (-1); 1644 } 1645 1646 if (ret != 0) { 1647 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 1648 "rule %u on df/inst 0x%x/0x%x: %d", ruleno, 1649 dfno, instid, ret); 1650 return (-1); 1651 } 1652 1653 return (0); 1654 } 1655 1656 static int 1657 zen_umc_read_remap(zen_umc_t *umc, zen_umc_df_t *df, const uint_t instid) 1658 { 1659 uint_t nremaps, nents; 1660 uint_t dfno = df->zud_dfno; 1661 const df_reg_def_t milan_remap0[ZEN_UMC_MILAN_CS_NREMAPS] = { 1662 DF_SKT0_CS_REMAP0_V3, DF_SKT1_CS_REMAP0_V3 }; 1663 const df_reg_def_t milan_remap1[ZEN_UMC_MILAN_CS_NREMAPS] = { 1664 DF_SKT0_CS_REMAP1_V3, DF_SKT1_CS_REMAP1_V3 }; 1665 const df_reg_def_t dfv4_remapA[ZEN_UMC_MAX_CS_REMAPS] = { 1666 DF_CS_REMAP0A_V4, DF_CS_REMAP1A_V4, DF_CS_REMAP2A_V4, 1667 DF_CS_REMAP3A_V4 }; 1668 const df_reg_def_t dfv4_remapB[ZEN_UMC_MAX_CS_REMAPS] = { 1669 DF_CS_REMAP0B_V4, DF_CS_REMAP1B_V4, DF_CS_REMAP2B_V4, 1670 DF_CS_REMAP3B_V4 }; 1671 const df_reg_def_t *remapA, *remapB; 1672 1673 1674 switch (umc->umc_df_rev) { 1675 case DF_REV_3: 1676 nremaps = ZEN_UMC_MILAN_CS_NREMAPS; 1677 nents = ZEN_UMC_MILAN_REMAP_ENTS; 1678 remapA = milan_remap0; 1679 remapB = milan_remap1; 1680 break; 1681 case DF_REV_4: 1682 nremaps = ZEN_UMC_MAX_CS_REMAPS; 1683 nents = ZEN_UMC_MAX_REMAP_ENTS; 1684 remapA = dfv4_remapA; 1685 remapB = dfv4_remapB; 1686 break; 1687 default: 1688 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported DF " 1689 "revision processing remap rules: 0x%x", umc->umc_df_rev); 1690 return (-1); 1691 } 1692 1693 df->zud_cs_nremap = nremaps; 1694 for (uint_t i = 0; i < nremaps; i++) { 1695 int ret; 1696 uint32_t rmA, rmB; 1697 zen_umc_cs_remap_t *remap = &df->zud_remap[i]; 1698 1699 if ((ret = amdzen_c_df_read32(dfno, instid, remapA[i], 1700 &rmA)) != 0) { 1701 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 1702 "df/inst 0x%x/0x%x remap socket %u-0/A: %d", dfno, 1703 instid, i, ret); 1704 return (-1); 1705 } 1706 1707 if ((ret = amdzen_c_df_read32(dfno, instid, remapB[i], 1708 &rmB)) != 0) { 1709 dev_err(umc->umc_dip, CE_WARN, "!failed to read " 1710 "df/inst 0x%x/0x%x remap socket %u-1/B: %d", dfno, 1711 instid, i, ret); 1712 return (-1); 1713 } 1714 1715 remap->csr_nremaps = nents; 1716 for (uint_t ent = 0; ent < ZEN_UMC_REMAP_PER_REG; ent++) { 1717 uint_t alt = ent + ZEN_UMC_REMAP_PER_REG; 1718 boolean_t do_alt = alt < nents; 1719 remap->csr_remaps[ent] = DF_CS_REMAP_GET_CSX(rmA, 1720 ent); 1721 if (do_alt) { 1722 remap->csr_remaps[alt] = 1723 DF_CS_REMAP_GET_CSX(rmB, ent); 1724 } 1725 } 1726 } 1727 1728 return (0); 1729 } 1730 1731 /* 1732 * Now that we have a CCM, we have several different tasks ahead of us: 1733 * 1734 * o Determine whether or not the DRAM hole is valid. 1735 * o Snapshot all of the system address rules and translate them into our 1736 * generic format. 1737 * o Determine if there are any rules to retarget things (currently 1738 * Milan/Genoa). 1739 * o Determine if there are any other hashing rules enabled. 1740 * 1741 * We only require this from a single CCM as these are currently required to be 1742 * the same across all of them. 1743 */ 1744 static int 1745 zen_umc_fill_ccm_cb(const uint_t dfno, const uint32_t fabid, 1746 const uint32_t instid, void *arg) 1747 { 1748 zen_umc_t *umc = arg; 1749 zen_umc_df_t *df = &umc->umc_dfs[dfno]; 1750 df_reg_def_t hole; 1751 int ret; 1752 uint32_t val; 1753 1754 df->zud_dfno = dfno; 1755 df->zud_ccm_inst = instid; 1756 1757 /* 1758 * First get the DRAM hole. This has the same layout, albeit different 1759 * registers across our different platforms. 1760 */ 1761 switch (umc->umc_df_rev) { 1762 case DF_REV_2: 1763 case DF_REV_3: 1764 case DF_REV_3P5: 1765 hole = DF_DRAM_HOLE_V2; 1766 break; 1767 case DF_REV_4: 1768 hole = DF_DRAM_HOLE_V4; 1769 break; 1770 default: 1771 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 1772 "DF version: 0x%x", umc->umc_df_rev); 1773 return (-1); 1774 } 1775 1776 if ((ret = amdzen_c_df_read32(dfno, instid, hole, &val)) != 0) { 1777 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM Hole: %d", 1778 ret); 1779 return (-1); 1780 } 1781 1782 df->zud_hole_raw = val; 1783 if (DF_DRAM_HOLE_GET_VALID(val)) { 1784 uint64_t t; 1785 1786 df->zud_flags |= ZEN_UMC_DF_F_HOLE_VALID; 1787 t = DF_DRAM_HOLE_GET_BASE(val); 1788 df->zud_hole_base = t << DF_DRAM_HOLE_BASE_SHIFT; 1789 } 1790 1791 /* 1792 * Prior to Zen 4, the hash information was global and applied to all 1793 * COD rules globally. Check if we're on such a system and snapshot this 1794 * so we can use it during the rule application. Note, this was added in 1795 * DFv3. 1796 */ 1797 if (umc->umc_df_rev == DF_REV_3 || umc->umc_df_rev == DF_REV_3P5) { 1798 uint32_t globctl; 1799 1800 if ((ret = amdzen_c_df_read32(dfno, instid, DF_GLOB_CTL_V3, 1801 &globctl)) != 0) { 1802 dev_err(umc->umc_dip, CE_WARN, "!failed to read global " 1803 "control: %d", ret); 1804 return (-1); 1805 } 1806 1807 df->zud_glob_ctl_raw = globctl; 1808 if (DF_GLOB_CTL_V3_GET_HASH_1G(globctl) != 0) { 1809 df->zud_flags |= ZEN_UMC_DF_F_HASH_30_32; 1810 } 1811 1812 if (DF_GLOB_CTL_V3_GET_HASH_2M(globctl) != 0) { 1813 df->zud_flags |= ZEN_UMC_DF_F_HASH_21_23; 1814 } 1815 1816 if (DF_GLOB_CTL_V3_GET_HASH_64K(globctl) != 0) { 1817 df->zud_flags |= ZEN_UMC_DF_F_HASH_16_18; 1818 } 1819 } 1820 1821 df->zud_dram_nrules = umc->umc_fdata->zufd_dram_nrules; 1822 for (uint_t i = 0; i < umc->umc_fdata->zufd_dram_nrules; i++) { 1823 if (zen_umc_read_dram_rule(umc, dfno, instid, i, 1824 &df->zud_rules[i]) != 0) { 1825 return (-1); 1826 } 1827 } 1828 1829 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_TARG_REMAP) != 0) { 1830 if (zen_umc_read_remap(umc, df, instid) != 0) { 1831 return (-1); 1832 } 1833 } 1834 1835 /* 1836 * We only want a single entry, so always return 1 to terminate us 1837 * early. 1838 */ 1839 return (1); 1840 } 1841 1842 /* 1843 * This is used to fill in the common properties about a DIMM. This should occur 1844 * after the rank information has been filled out. The information used is the 1845 * same between DDR4 and DDR5 DIMMs. The only major difference is the register 1846 * offset. 1847 */ 1848 static boolean_t 1849 zen_umc_fill_dimm_common(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan, 1850 const uint_t dimmno, boolean_t ddr4) 1851 { 1852 umc_dimm_t *dimm; 1853 int ret; 1854 smn_reg_t reg; 1855 uint32_t val; 1856 const uint32_t id = chan->chan_logid; 1857 1858 dimm = &chan->chan_dimms[dimmno]; 1859 dimm->ud_dimmno = dimmno; 1860 1861 if (ddr4) { 1862 reg = UMC_DIMMCFG_DDR4(id, dimmno); 1863 } else { 1864 reg = UMC_DIMMCFG_DDR5(id, dimmno); 1865 } 1866 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 1867 dev_err(umc->umc_dip, CE_WARN, "failed to read DIMM " 1868 "configuration register %x: %d", SMN_REG_ADDR(reg), ret); 1869 return (B_FALSE); 1870 } 1871 dimm->ud_dimmcfg_raw = val; 1872 1873 if (UMC_DIMMCFG_GET_X16(val) != 0) { 1874 dimm->ud_width = UMC_DIMM_W_X16; 1875 } else if (UMC_DIMMCFG_GET_X4(val) != 0) { 1876 dimm->ud_width = UMC_DIMM_W_X4; 1877 } else { 1878 dimm->ud_width = UMC_DIMM_W_X8; 1879 } 1880 1881 if (UMC_DIMMCFG_GET_3DS(val) != 0) { 1882 dimm->ud_kind = UMC_DIMM_K_3DS_RDIMM; 1883 } else if (UMC_DIMMCFG_GET_LRDIMM(val) != 0) { 1884 dimm->ud_kind = UMC_DIMM_K_LRDIMM; 1885 } else if (UMC_DIMMCFG_GET_RDIMM(val) != 0) { 1886 dimm->ud_kind = UMC_DIMM_K_RDIMM; 1887 } else { 1888 dimm->ud_kind = UMC_DIMM_K_UDIMM; 1889 } 1890 1891 /* 1892 * DIMM information in a UMC can be somewhat confusing. There are quite 1893 * a number of non-zero reset values that are here. Flag whether or not 1894 * we think this entry should be usable based on enabled chip-selects. 1895 */ 1896 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BASE; i++) { 1897 if (dimm->ud_cs[i].ucs_base.udb_valid || 1898 dimm->ud_cs[i].ucs_sec.udb_valid) { 1899 dimm->ud_flags |= UMC_DIMM_F_VALID; 1900 break; 1901 } 1902 } 1903 1904 return (B_TRUE); 1905 } 1906 1907 /* 1908 * Fill all the information about a DDR4 DIMM. In the DDR4 UMC, some of this 1909 * information is on a per-chip select basis while at other times it is on a 1910 * per-DIMM basis. In general, chip-selects 0/1 correspond to DIMM 0, and 1911 * chip-selects 2/3 correspond to DIMM 1. To normalize things with the DDR5 UMC 1912 * which generally has things stored on a per-rank/chips-select basis, we 1913 * duplicate information that is DIMM-wide into the chip-select data structure 1914 * (umc_cs_t). 1915 */ 1916 static boolean_t 1917 zen_umc_fill_chan_dimm_ddr4(zen_umc_t *umc, zen_umc_df_t *df, 1918 zen_umc_chan_t *chan, const uint_t dimmno) 1919 { 1920 umc_dimm_t *dimm; 1921 umc_cs_t *cs0, *cs1; 1922 const uint32_t id = chan->chan_logid; 1923 int ret; 1924 uint32_t val; 1925 smn_reg_t reg; 1926 1927 ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS); 1928 dimm = &chan->chan_dimms[dimmno]; 1929 cs0 = &dimm->ud_cs[0]; 1930 cs1 = &dimm->ud_cs[1]; 1931 1932 /* 1933 * DDR4 organization has initial data that exists on a per-chip select 1934 * basis. The rest of it is on a per-DIMM basis. First we grab the 1935 * per-chip-select data. After this for loop, we will always duplicate 1936 * all data that we gather into both chip-selects. 1937 */ 1938 for (uint_t i = 0; i < ZEN_UMC_MAX_CS_PER_DIMM; i++) { 1939 uint64_t addr; 1940 const uint16_t reginst = i + dimmno * 2; 1941 reg = UMC_BASE(id, reginst); 1942 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 1943 dev_err(umc->umc_dip, CE_WARN, "failed to read base " 1944 "register %x: %d", SMN_REG_ADDR(reg), ret); 1945 return (B_FALSE); 1946 } 1947 1948 addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT; 1949 dimm->ud_cs[i].ucs_base.udb_base = addr; 1950 dimm->ud_cs[i].ucs_base.udb_valid = UMC_BASE_GET_EN(val); 1951 1952 reg = UMC_BASE_SEC(id, reginst); 1953 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 1954 dev_err(umc->umc_dip, CE_WARN, "failed to read " 1955 "secondary base register %x: %d", SMN_REG_ADDR(reg), 1956 ret); 1957 return (B_FALSE); 1958 } 1959 1960 addr = (uint64_t)UMC_BASE_GET_ADDR(val) << UMC_BASE_ADDR_SHIFT; 1961 dimm->ud_cs[i].ucs_sec.udb_base = addr; 1962 dimm->ud_cs[i].ucs_sec.udb_valid = UMC_BASE_GET_EN(val); 1963 } 1964 1965 reg = UMC_MASK_DDR4(id, dimmno); 1966 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 1967 dev_err(umc->umc_dip, CE_WARN, "failed to read mask register " 1968 "%x: %d", SMN_REG_ADDR(reg), ret); 1969 return (B_FALSE); 1970 } 1971 1972 /* 1973 * When we extract the masks, hardware only checks a limited range of 1974 * bits. Therefore we need to always OR in those lower order bits. 1975 */ 1976 cs0->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 1977 UMC_MASK_ADDR_SHIFT; 1978 cs0->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 1979 cs1->ucs_base_mask = cs0->ucs_base_mask; 1980 1981 reg = UMC_MASK_SEC_DDR4(id, dimmno); 1982 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 1983 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask " 1984 "register %x: %d", SMN_REG_ADDR(reg), ret); 1985 return (B_FALSE); 1986 } 1987 cs0->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 1988 UMC_MASK_ADDR_SHIFT; 1989 cs0->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 1990 cs1->ucs_sec_mask = cs0->ucs_sec_mask; 1991 1992 reg = UMC_ADDRCFG_DDR4(id, dimmno); 1993 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 1994 dev_err(umc->umc_dip, CE_WARN, "failed to read address config " 1995 "register %x: %d", SMN_REG_ADDR(reg), ret); 1996 return (B_FALSE); 1997 } 1998 1999 cs0->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) + 2000 UMC_ADDRCFG_NBANK_BITS_BASE; 2001 cs1->ucs_nbanks = cs0->ucs_nbanks; 2002 cs0->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) + 2003 UMC_ADDRCFG_NCOL_BITS_BASE; 2004 cs1->ucs_ncol = cs0->ucs_ncol; 2005 cs0->ucs_nrow_hi = UMC_ADDRCFG_DDR4_GET_NROW_BITS_HI(val); 2006 cs1->ucs_nrow_hi = cs0->ucs_nrow_hi; 2007 cs0->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) + 2008 UMC_ADDRCFG_NROW_BITS_LO_BASE; 2009 cs1->ucs_nrow_lo = cs0->ucs_nrow_lo; 2010 cs0->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val); 2011 cs1->ucs_nbank_groups = cs0->ucs_nbank_groups; 2012 /* 2013 * As the chip-select XORs don't always show up, use a dummy value 2014 * that'll result in no change occurring here. 2015 */ 2016 cs0->ucs_cs_xor = cs1->ucs_cs_xor = 0; 2017 2018 /* 2019 * APUs don't seem to support various rank select bits. 2020 */ 2021 if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4) { 2022 cs0->ucs_nrm = UMC_ADDRCFG_DDR4_GET_NRM_BITS(val); 2023 cs1->ucs_nrm = cs0->ucs_nrm; 2024 } else { 2025 cs0->ucs_nrm = cs1->ucs_nrm = 0; 2026 } 2027 2028 reg = UMC_ADDRSEL_DDR4(id, dimmno); 2029 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2030 dev_err(umc->umc_dip, CE_WARN, "failed to read bank address " 2031 "select register %x: %d", SMN_REG_ADDR(reg), ret); 2032 return (B_FALSE); 2033 } 2034 cs0->ucs_row_hi_bit = UMC_ADDRSEL_DDR4_GET_ROW_HI(val) + 2035 UMC_ADDRSEL_DDR4_ROW_HI_BASE; 2036 cs1->ucs_row_hi_bit = cs0->ucs_row_hi_bit; 2037 cs0->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) + 2038 UMC_ADDRSEL_ROW_LO_BASE; 2039 cs1->ucs_row_low_bit = cs0->ucs_row_low_bit; 2040 cs0->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) + 2041 UMC_ADDRSEL_BANK_BASE; 2042 cs0->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) + 2043 UMC_ADDRSEL_BANK_BASE; 2044 cs0->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) + 2045 UMC_ADDRSEL_BANK_BASE; 2046 cs0->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) + 2047 UMC_ADDRSEL_BANK_BASE; 2048 cs0->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) + 2049 UMC_ADDRSEL_BANK_BASE; 2050 bcopy(cs0->ucs_bank_bits, cs1->ucs_bank_bits, 2051 sizeof (cs0->ucs_bank_bits)); 2052 2053 reg = UMC_COLSEL_LO_DDR4(id, dimmno); 2054 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2055 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2056 "select low register %x: %d", SMN_REG_ADDR(reg), ret); 2057 return (B_FALSE); 2058 } 2059 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2060 cs0->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) + 2061 UMC_COLSEL_LO_BASE; 2062 } 2063 2064 reg = UMC_COLSEL_HI_DDR4(id, dimmno); 2065 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2066 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2067 "select high register %x: %d", SMN_REG_ADDR(reg), ret); 2068 return (B_FALSE); 2069 } 2070 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2071 cs0->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] = 2072 UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE; 2073 } 2074 bcopy(cs0->ucs_col_bits, cs1->ucs_col_bits, sizeof (cs0->ucs_col_bits)); 2075 2076 /* 2077 * The next two registers give us information about a given rank select. 2078 * In the APUs, the inversion bits are there; however, the actual bit 2079 * selects are not. In this case we read the reserved bits regardless. 2080 * They should be ignored due to the fact that the number of banks is 2081 * zero. 2082 */ 2083 reg = UMC_RMSEL_DDR4(id, dimmno); 2084 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2085 dev_err(umc->umc_dip, CE_WARN, "failed to read rank address " 2086 "select register %x: %d", SMN_REG_ADDR(reg), ret); 2087 return (B_FALSE); 2088 } 2089 cs0->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBE(val); 2090 cs1->ucs_inv_msbs = UMC_RMSEL_DDR4_GET_INV_MSBO(val); 2091 cs0->ucs_rm_bits[0] = UMC_RMSEL_DDR4_GET_RM0(val) + 2092 UMC_RMSEL_BASE; 2093 cs0->ucs_rm_bits[1] = UMC_RMSEL_DDR4_GET_RM1(val) + 2094 UMC_RMSEL_BASE; 2095 cs0->ucs_rm_bits[2] = UMC_RMSEL_DDR4_GET_RM2(val) + 2096 UMC_RMSEL_BASE; 2097 bcopy(cs0->ucs_rm_bits, cs1->ucs_rm_bits, sizeof (cs0->ucs_rm_bits)); 2098 2099 reg = UMC_RMSEL_SEC_DDR4(id, dimmno); 2100 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2101 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary rank " 2102 "address select register %x: %d", SMN_REG_ADDR(reg), ret); 2103 return (B_FALSE); 2104 } 2105 cs0->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBE(val); 2106 cs1->ucs_inv_msbs_sec = UMC_RMSEL_DDR4_GET_INV_MSBO(val); 2107 cs0->ucs_rm_bits_sec[0] = UMC_RMSEL_DDR4_GET_RM0(val) + 2108 UMC_RMSEL_BASE; 2109 cs0->ucs_rm_bits_sec[1] = UMC_RMSEL_DDR4_GET_RM1(val) + 2110 UMC_RMSEL_BASE; 2111 cs0->ucs_rm_bits_sec[2] = UMC_RMSEL_DDR4_GET_RM2(val) + 2112 UMC_RMSEL_BASE; 2113 bcopy(cs0->ucs_rm_bits_sec, cs1->ucs_rm_bits_sec, 2114 sizeof (cs0->ucs_rm_bits_sec)); 2115 2116 return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_TRUE)); 2117 } 2118 2119 /* 2120 * The DDR5 based systems are organized such that almost all the information we 2121 * care about is split between two different chip-select structures in the UMC 2122 * hardware SMN space. 2123 */ 2124 static boolean_t 2125 zen_umc_fill_chan_rank_ddr5(zen_umc_t *umc, zen_umc_df_t *df, 2126 zen_umc_chan_t *chan, const uint_t dimmno, const uint_t rankno) 2127 { 2128 int ret; 2129 umc_cs_t *cs; 2130 uint32_t val; 2131 smn_reg_t reg; 2132 const uint32_t id = chan->chan_logid; 2133 const uint32_t regno = dimmno * 2 + rankno; 2134 2135 ASSERT3U(dimmno, <, ZEN_UMC_MAX_DIMMS); 2136 ASSERT3U(rankno, <, ZEN_UMC_MAX_CS_PER_DIMM); 2137 cs = &chan->chan_dimms[dimmno].ud_cs[rankno]; 2138 2139 reg = UMC_BASE(id, regno); 2140 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2141 dev_err(umc->umc_dip, CE_WARN, "failed to read base " 2142 "register %x: %d", SMN_REG_ADDR(reg), ret); 2143 return (B_FALSE); 2144 } 2145 cs->ucs_base.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) << 2146 UMC_BASE_ADDR_SHIFT; 2147 cs->ucs_base.udb_valid = UMC_BASE_GET_EN(val); 2148 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2149 uint64_t addr; 2150 2151 reg = UMC_BASE_EXT_DDR5(id, regno); 2152 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2153 0) { 2154 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2155 "extended base register %x: %d", SMN_REG_ADDR(reg), 2156 ret); 2157 return (B_FALSE); 2158 } 2159 2160 addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) << 2161 UMC_BASE_EXT_ADDR_SHIFT; 2162 cs->ucs_base.udb_base |= addr; 2163 } 2164 2165 reg = UMC_BASE_SEC(id, regno); 2166 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2167 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary base " 2168 "register %x: %d", SMN_REG_ADDR(reg), ret); 2169 return (B_FALSE); 2170 } 2171 cs->ucs_sec.udb_base = (uint64_t)UMC_BASE_GET_ADDR(val) << 2172 UMC_BASE_ADDR_SHIFT; 2173 cs->ucs_sec.udb_valid = UMC_BASE_GET_EN(val); 2174 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2175 uint64_t addr; 2176 2177 reg = UMC_BASE_EXT_SEC_DDR5(id, regno); 2178 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2179 0) { 2180 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2181 "extended secondary base register %x: %d", 2182 SMN_REG_ADDR(reg), ret); 2183 return (B_FALSE); 2184 } 2185 2186 addr = (uint64_t)UMC_BASE_EXT_GET_ADDR(val) << 2187 UMC_BASE_EXT_ADDR_SHIFT; 2188 cs->ucs_sec.udb_base |= addr; 2189 } 2190 2191 reg = UMC_MASK_DDR5(id, regno); 2192 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2193 dev_err(umc->umc_dip, CE_WARN, "failed to read mask " 2194 "register %x: %d", SMN_REG_ADDR(reg), ret); 2195 return (B_FALSE); 2196 } 2197 cs->ucs_base_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 2198 UMC_MASK_ADDR_SHIFT; 2199 cs->ucs_base_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 2200 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2201 uint64_t addr; 2202 2203 reg = UMC_MASK_EXT_DDR5(id, regno); 2204 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2205 0) { 2206 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2207 "extended mask register %x: %d", SMN_REG_ADDR(reg), 2208 ret); 2209 return (B_FALSE); 2210 } 2211 2212 addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) << 2213 UMC_MASK_EXT_ADDR_SHIFT; 2214 cs->ucs_base_mask |= addr; 2215 } 2216 2217 2218 reg = UMC_MASK_SEC_DDR5(id, regno); 2219 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2220 dev_err(umc->umc_dip, CE_WARN, "failed to read secondary mask " 2221 "register %x: %d", SMN_REG_ADDR(reg), ret); 2222 return (B_FALSE); 2223 } 2224 cs->ucs_sec_mask = (uint64_t)UMC_MASK_GET_ADDR(val) << 2225 UMC_MASK_ADDR_SHIFT; 2226 cs->ucs_sec_mask |= (1 << UMC_MASK_ADDR_SHIFT) - 1; 2227 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_UMC_EADDR) != 0) { 2228 uint64_t addr; 2229 2230 reg = UMC_MASK_EXT_SEC_DDR5(id, regno); 2231 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 2232 0) { 2233 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2234 "extended mask register %x: %d", SMN_REG_ADDR(reg), 2235 ret); 2236 return (B_FALSE); 2237 } 2238 2239 addr = (uint64_t)UMC_MASK_EXT_GET_ADDR(val) << 2240 UMC_MASK_EXT_ADDR_SHIFT; 2241 cs->ucs_sec_mask |= addr; 2242 } 2243 2244 reg = UMC_ADDRCFG_DDR5(id, regno); 2245 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2246 dev_err(umc->umc_dip, CE_WARN, "failed to read address config " 2247 "register %x: %d", SMN_REG_ADDR(reg), ret); 2248 return (B_FALSE); 2249 } 2250 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_CS_XOR) != 0) { 2251 cs->ucs_cs_xor = UMC_ADDRCFG_DDR5_GET_CSXOR(val); 2252 } else { 2253 cs->ucs_cs_xor = 0; 2254 } 2255 cs->ucs_nbanks = UMC_ADDRCFG_GET_NBANK_BITS(val) + 2256 UMC_ADDRCFG_NBANK_BITS_BASE; 2257 cs->ucs_ncol = UMC_ADDRCFG_GET_NCOL_BITS(val) + 2258 UMC_ADDRCFG_NCOL_BITS_BASE; 2259 cs->ucs_nrow_lo = UMC_ADDRCFG_GET_NROW_BITS_LO(val) + 2260 UMC_ADDRCFG_NROW_BITS_LO_BASE; 2261 cs->ucs_nrow_hi = 0; 2262 cs->ucs_nrm = UMC_ADDRCFG_DDR5_GET_NRM_BITS(val); 2263 cs->ucs_nbank_groups = UMC_ADDRCFG_GET_NBANKGRP_BITS(val); 2264 2265 reg = UMC_ADDRSEL_DDR5(id, regno); 2266 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2267 dev_err(umc->umc_dip, CE_WARN, "failed to read address select " 2268 "register %x: %d", SMN_REG_ADDR(reg), ret); 2269 return (B_FALSE); 2270 } 2271 cs->ucs_row_hi_bit = 0; 2272 cs->ucs_row_low_bit = UMC_ADDRSEL_GET_ROW_LO(val) + 2273 UMC_ADDRSEL_ROW_LO_BASE; 2274 cs->ucs_bank_bits[4] = UMC_ADDRSEL_GET_BANK4(val) + 2275 UMC_ADDRSEL_BANK_BASE; 2276 cs->ucs_bank_bits[3] = UMC_ADDRSEL_GET_BANK3(val) + 2277 UMC_ADDRSEL_BANK_BASE; 2278 cs->ucs_bank_bits[2] = UMC_ADDRSEL_GET_BANK2(val) + 2279 UMC_ADDRSEL_BANK_BASE; 2280 cs->ucs_bank_bits[1] = UMC_ADDRSEL_GET_BANK1(val) + 2281 UMC_ADDRSEL_BANK_BASE; 2282 cs->ucs_bank_bits[0] = UMC_ADDRSEL_GET_BANK0(val) + 2283 UMC_ADDRSEL_BANK_BASE; 2284 2285 reg = UMC_COLSEL_LO_DDR5(id, regno); 2286 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2287 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2288 "select low register %x: %d", SMN_REG_ADDR(reg), ret); 2289 return (B_FALSE); 2290 } 2291 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2292 cs->ucs_col_bits[i] = UMC_COLSEL_REMAP_GET_COL(val, i) + 2293 UMC_COLSEL_LO_BASE; 2294 } 2295 2296 reg = UMC_COLSEL_HI_DDR5(id, regno); 2297 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2298 dev_err(umc->umc_dip, CE_WARN, "failed to read column address " 2299 "select high register %x: %d", SMN_REG_ADDR(reg), ret); 2300 return (B_FALSE); 2301 } 2302 for (uint_t i = 0; i < ZEN_UMC_MAX_COLSEL_PER_REG; i++) { 2303 cs->ucs_col_bits[i + ZEN_UMC_MAX_COLSEL_PER_REG] = 2304 UMC_COLSEL_REMAP_GET_COL(val, i) + UMC_COLSEL_HI_BASE; 2305 } 2306 2307 /* 2308 * Time for our friend, the RM Selection register. Like in DDR4 we end 2309 * up reading everything here, even though most others have reserved 2310 * bits here. The intent is that we won't look at the reserved bits 2311 * unless something actually points us there. 2312 */ 2313 reg = UMC_RMSEL_DDR5(id, regno); 2314 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2315 dev_err(umc->umc_dip, CE_WARN, "failed to read rank multiply " 2316 "select register %x: %d", SMN_REG_ADDR(reg), ret); 2317 return (B_FALSE); 2318 } 2319 2320 /* 2321 * DDR5 based devices have a primary and secondary msbs; however, they 2322 * only have a single set of rm bits. To normalize things with the DDR4 2323 * subsystem, we copy the primary bits to the secondary so we can use 2324 * these the same way in the decoder/encoder. 2325 */ 2326 cs->ucs_inv_msbs = UMC_RMSEL_DDR5_GET_INV_MSBS(val); 2327 cs->ucs_inv_msbs_sec = UMC_RMSEL_DDR5_GET_INV_MSBS_SEC(val); 2328 cs->ucs_subchan = UMC_RMSEL_DDR5_GET_SUBCHAN(val) + 2329 UMC_RMSEL_DDR5_SUBCHAN_BASE; 2330 cs->ucs_rm_bits[3] = UMC_RMSEL_DDR5_GET_RM3(val) + UMC_RMSEL_BASE; 2331 cs->ucs_rm_bits[2] = UMC_RMSEL_DDR5_GET_RM2(val) + UMC_RMSEL_BASE; 2332 cs->ucs_rm_bits[1] = UMC_RMSEL_DDR5_GET_RM1(val) + UMC_RMSEL_BASE; 2333 cs->ucs_rm_bits[0] = UMC_RMSEL_DDR5_GET_RM0(val) + UMC_RMSEL_BASE; 2334 bcopy(cs->ucs_rm_bits, cs->ucs_rm_bits_sec, 2335 sizeof (cs->ucs_rm_bits)); 2336 2337 return (zen_umc_fill_dimm_common(umc, df, chan, dimmno, B_FALSE)); 2338 } 2339 2340 2341 static void 2342 zen_umc_fill_ddr_type(zen_umc_chan_t *chan, boolean_t ddr4) 2343 { 2344 umc_dimm_type_t dimm = UMC_DIMM_T_UNKNOWN; 2345 uint8_t val; 2346 2347 /* 2348 * The DDR4 and DDR5 values while overlapping in some parts of this 2349 * space (e.g. DDR4 values), are otherwise actually different in all the 2350 * space in-between. As such we need to treat them differently in case 2351 * we encounter something we don't expect. 2352 */ 2353 val = UMC_UMCCFG_GET_DDR_TYPE(chan->chan_umccfg_raw); 2354 if (ddr4) { 2355 switch (val) { 2356 case UMC_UMCCFG_DDR4_T_DDR4: 2357 dimm = UMC_DIMM_T_DDR4; 2358 break; 2359 case UMC_UMCCFG_DDR4_T_LPDDR4: 2360 dimm = UMC_DIMM_T_LPDDR4; 2361 break; 2362 default: 2363 break; 2364 } 2365 } else { 2366 switch (val) { 2367 case UMC_UMCCFG_DDR5_T_DDR5: 2368 dimm = UMC_DIMM_T_DDR5; 2369 break; 2370 case UMC_UMCCFG_DDR5_T_LPDDR5: 2371 dimm = UMC_DIMM_T_LPDDR5; 2372 break; 2373 default: 2374 break; 2375 } 2376 } 2377 2378 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) { 2379 chan->chan_dimms[i].ud_type = dimm; 2380 } 2381 } 2382 2383 /* 2384 * Fill common channel information. While the locations of many of the registers 2385 * changed between the DDR4-capable and DDR5-capable devices, the actual 2386 * contents are the same so we process them together. 2387 */ 2388 static boolean_t 2389 zen_umc_fill_chan_hash(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan, 2390 boolean_t ddr4) 2391 { 2392 int ret; 2393 smn_reg_t reg; 2394 uint32_t val; 2395 2396 const umc_chan_hash_flags_t flags = umc->umc_fdata->zufd_chan_hash; 2397 const uint32_t id = chan->chan_logid; 2398 umc_chan_hash_t *chash = &chan->chan_hash; 2399 chash->uch_flags = flags; 2400 2401 if ((flags & UMC_CHAN_HASH_F_BANK) != 0) { 2402 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_BANK_HASH; i++) { 2403 umc_bank_hash_t *bank = &chash->uch_bank_hashes[i]; 2404 2405 if (ddr4) { 2406 reg = UMC_BANK_HASH_DDR4(id, i); 2407 } else { 2408 reg = UMC_BANK_HASH_DDR5(id, i); 2409 } 2410 2411 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 2412 &val)) != 0) { 2413 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2414 "bank hash register %x: %d", 2415 SMN_REG_ADDR(reg), ret); 2416 return (B_FALSE); 2417 } 2418 2419 bank->ubh_row_xor = UMC_BANK_HASH_GET_ROW(val); 2420 bank->ubh_col_xor = UMC_BANK_HASH_GET_COL(val); 2421 bank->ubh_en = UMC_BANK_HASH_GET_EN(val); 2422 } 2423 } 2424 2425 if ((flags & UMC_CHAN_HASH_F_RM) != 0) { 2426 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_RM_HASH; i++) { 2427 uint64_t addr; 2428 umc_addr_hash_t *rm = &chash->uch_rm_hashes[i]; 2429 2430 if (ddr4) { 2431 reg = UMC_RANK_HASH_DDR4(id, i); 2432 } else { 2433 reg = UMC_RANK_HASH_DDR5(id, i); 2434 } 2435 2436 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 2437 &val)) != 0) { 2438 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2439 "rm hash register %x: %d", 2440 SMN_REG_ADDR(reg), ret); 2441 return (B_FALSE); 2442 } 2443 2444 addr = UMC_RANK_HASH_GET_ADDR(val); 2445 rm->uah_addr_xor = addr << UMC_RANK_HASH_SHIFT; 2446 rm->uah_en = UMC_RANK_HASH_GET_EN(val); 2447 2448 if (ddr4 || (umc->umc_fdata->zufd_flags & 2449 ZEN_UMC_FAM_F_UMC_EADDR) == 0) { 2450 continue; 2451 } 2452 2453 reg = UMC_RANK_HASH_EXT_DDR5(id, i); 2454 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 2455 &val)) != 0) { 2456 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2457 "rm hash ext register %x: %d", 2458 SMN_REG_ADDR(reg), ret); 2459 return (B_FALSE); 2460 } 2461 2462 addr = UMC_RANK_HASH_EXT_GET_ADDR(val); 2463 rm->uah_addr_xor |= addr << 2464 UMC_RANK_HASH_EXT_ADDR_SHIFT; 2465 } 2466 } 2467 2468 if ((flags & UMC_CHAN_HASH_F_PC) != 0) { 2469 umc_pc_hash_t *pc = &chash->uch_pc_hash; 2470 2471 if (ddr4) { 2472 reg = UMC_PC_HASH_DDR4(id); 2473 } else { 2474 reg = UMC_PC_HASH_DDR5(id); 2475 } 2476 2477 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2478 dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash " 2479 "register %x: %d", SMN_REG_ADDR(reg), ret); 2480 return (B_FALSE); 2481 } 2482 2483 pc->uph_row_xor = UMC_PC_HASH_GET_ROW(val); 2484 pc->uph_col_xor = UMC_PC_HASH_GET_COL(val); 2485 pc->uph_en = UMC_PC_HASH_GET_EN(val); 2486 2487 if (ddr4) { 2488 reg = UMC_PC_HASH2_DDR4(id); 2489 } else { 2490 reg = UMC_PC_HASH2_DDR5(id); 2491 } 2492 2493 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2494 dev_err(umc->umc_dip, CE_WARN, "failed to read pc hash " 2495 "2 register %x: %d", SMN_REG_ADDR(reg), ret); 2496 return (B_FALSE); 2497 } 2498 2499 pc->uph_bank_xor = UMC_PC_HASH2_GET_BANK(val); 2500 } 2501 2502 if ((flags & UMC_CHAN_HASH_F_CS) != 0) { 2503 for (uint_t i = 0; i < ZEN_UMC_MAX_CHAN_CS_HASH; i++) { 2504 uint64_t addr; 2505 umc_addr_hash_t *rm = &chash->uch_cs_hashes[i]; 2506 2507 if (ddr4) { 2508 reg = UMC_CS_HASH_DDR4(id, i); 2509 } else { 2510 reg = UMC_CS_HASH_DDR5(id, i); 2511 } 2512 2513 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 2514 &val)) != 0) { 2515 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2516 "cs hash register %x", SMN_REG_ADDR(reg)); 2517 return (B_FALSE); 2518 } 2519 2520 addr = UMC_CS_HASH_GET_ADDR(val); 2521 rm->uah_addr_xor = addr << UMC_CS_HASH_SHIFT; 2522 rm->uah_en = UMC_CS_HASH_GET_EN(val); 2523 2524 if (ddr4 || (umc->umc_fdata->zufd_flags & 2525 ZEN_UMC_FAM_F_UMC_EADDR) == 0) { 2526 continue; 2527 } 2528 2529 reg = UMC_CS_HASH_EXT_DDR5(id, i); 2530 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, 2531 &val)) != 0) { 2532 dev_err(umc->umc_dip, CE_WARN, "failed to read " 2533 "cs hash ext register %x", 2534 SMN_REG_ADDR(reg)); 2535 return (B_FALSE); 2536 } 2537 2538 addr = UMC_CS_HASH_EXT_GET_ADDR(val); 2539 rm->uah_addr_xor |= addr << UMC_CS_HASH_EXT_ADDR_SHIFT; 2540 } 2541 } 2542 2543 return (B_TRUE); 2544 } 2545 2546 /* 2547 * This fills in settings that we care about which are valid for the entire 2548 * channel and are the same between DDR4/5 capable devices. 2549 */ 2550 static boolean_t 2551 zen_umc_fill_chan(zen_umc_t *umc, zen_umc_df_t *df, zen_umc_chan_t *chan) 2552 { 2553 uint32_t val; 2554 smn_reg_t reg; 2555 const uint32_t id = chan->chan_logid; 2556 int ret; 2557 boolean_t ddr4; 2558 2559 if (umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4 || 2560 umc->umc_fdata->zufd_umc_style == ZEN_UMC_UMC_S_DDR4_APU) { 2561 ddr4 = B_TRUE; 2562 } else { 2563 ddr4 = B_FALSE; 2564 } 2565 2566 /* 2567 * Begin by gathering all of the information related to hashing. What is 2568 * valid here varies based on the actual chip family and then the 2569 * registers vary based on DDR4 and DDR5. 2570 */ 2571 if (!zen_umc_fill_chan_hash(umc, df, chan, ddr4)) { 2572 return (B_FALSE); 2573 } 2574 2575 reg = UMC_UMCCFG(id); 2576 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2577 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC " 2578 "configuration register %x: %d", SMN_REG_ADDR(reg), ret); 2579 return (B_FALSE); 2580 } 2581 2582 chan->chan_umccfg_raw = val; 2583 if (UMC_UMCCFG_GET_ECC_EN(val)) { 2584 chan->chan_flags |= UMC_CHAN_F_ECC_EN; 2585 } 2586 2587 /* 2588 * This register contains information to determine the type of DIMM. 2589 * All DIMMs in the channel must be the same type. As such, set this on 2590 * all DIMMs we've discovered. 2591 */ 2592 zen_umc_fill_ddr_type(chan, ddr4); 2593 2594 /* 2595 * Grab data that we can use to determine if we're scrambling or 2596 * encrypting regions of memory. 2597 */ 2598 reg = UMC_DATACTL(id); 2599 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2600 dev_err(umc->umc_dip, CE_WARN, "failed to read data control " 2601 "register %x: %d", SMN_REG_ADDR(reg), ret); 2602 return (B_FALSE); 2603 } 2604 chan->chan_datactl_raw = val; 2605 if (UMC_DATACTL_GET_SCRAM_EN(val)) { 2606 chan->chan_flags |= UMC_CHAN_F_SCRAMBLE_EN; 2607 } 2608 2609 if (UMC_DATACTL_GET_ENCR_EN(val)) { 2610 chan->chan_flags |= UMC_CHAN_F_ENCR_EN; 2611 } 2612 2613 /* 2614 * At the moment we snapshot the raw ECC control information. When we do 2615 * further work of making this a part of the MCA/X decoding, we'll want 2616 * to further take this apart for syndrome decoding. Until then, simply 2617 * cache it for future us and observability. 2618 */ 2619 reg = UMC_ECCCTL(id); 2620 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2621 dev_err(umc->umc_dip, CE_WARN, "failed to read ECC control " 2622 "register %x: %d", SMN_REG_ADDR(reg), ret); 2623 return (B_FALSE); 2624 } 2625 chan->chan_eccctl_raw = val; 2626 2627 /* 2628 * Read and snapshot the UMC capability registers for debugging in the 2629 * future. 2630 */ 2631 reg = UMC_UMCCAP(id); 2632 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2633 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap" 2634 "register %x: %d", SMN_REG_ADDR(reg), ret); 2635 return (B_FALSE); 2636 } 2637 chan->chan_umccap_raw = val; 2638 2639 reg = UMC_UMCCAP_HI(id); 2640 if ((ret = amdzen_c_smn_read(df->zud_dfno, reg, &val)) != 0) { 2641 dev_err(umc->umc_dip, CE_WARN, "failed to read UMC cap high " 2642 "register %x: %d", SMN_REG_ADDR(reg), ret); 2643 return (B_FALSE); 2644 } 2645 chan->chan_umccap_hi_raw = val; 2646 2647 return (B_TRUE); 2648 } 2649 2650 static int 2651 zen_umc_fill_umc_cb(const uint_t dfno, const uint32_t fabid, 2652 const uint32_t instid, void *arg) 2653 { 2654 zen_umc_t *umc = arg; 2655 zen_umc_df_t *df = &umc->umc_dfs[dfno]; 2656 zen_umc_chan_t *chan = &df->zud_chan[df->zud_nchan]; 2657 2658 df->zud_nchan++; 2659 VERIFY3U(df->zud_nchan, <=, ZEN_UMC_MAX_UMCS); 2660 2661 /* 2662 * The data fabric is generally organized such that all UMC entries 2663 * should be continuous in their fabric ID space; however, we don't 2664 * want to rely on specific ID locations. The UMC SMN addresses are 2665 * organized in a relative order. To determine the SMN ID to use (the 2666 * chan_logid) we end up making the following assumptions: 2667 * 2668 * o The iteration order will always be from the lowest component ID 2669 * to the highest component ID. 2670 * o The relative order that we encounter will be the same as the SMN 2671 * order. That is, the first thing we find (regardless of component 2672 * ID) will be SMN UMC entry 0, the next 1, etc. 2673 */ 2674 chan->chan_logid = df->zud_nchan - 1; 2675 chan->chan_fabid = fabid; 2676 chan->chan_instid = instid; 2677 chan->chan_nrules = umc->umc_fdata->zufd_cs_nrules; 2678 for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules; i++) { 2679 if (zen_umc_read_dram_rule(umc, dfno, instid, i, 2680 &chan->chan_rules[i]) != 0) { 2681 return (-1); 2682 } 2683 } 2684 2685 for (uint_t i = 0; i < umc->umc_fdata->zufd_cs_nrules - 1; i++) { 2686 int ret; 2687 uint32_t offset; 2688 uint64_t t; 2689 df_reg_def_t off_reg; 2690 chan_offset_t *offp = &chan->chan_offsets[i]; 2691 2692 switch (umc->umc_df_rev) { 2693 case DF_REV_2: 2694 case DF_REV_3: 2695 case DF_REV_3P5: 2696 ASSERT3U(i, ==, 0); 2697 off_reg = DF_DRAM_OFFSET_V2; 2698 break; 2699 case DF_REV_4: 2700 off_reg = DF_DRAM_OFFSET_V4(i); 2701 break; 2702 default: 2703 dev_err(umc->umc_dip, CE_WARN, "!encountered " 2704 "unsupported DF revision processing DRAM Offsets: " 2705 "0x%x", umc->umc_df_rev); 2706 return (-1); 2707 } 2708 2709 if ((ret = amdzen_c_df_read32(dfno, instid, off_reg, 2710 &offset)) != 0) { 2711 dev_err(umc->umc_dip, CE_WARN, "!failed to read DRAM " 2712 "offset %u on 0x%x/0x%x: %d", i, dfno, instid, ret); 2713 return (-1); 2714 } 2715 2716 offp->cho_raw = offset; 2717 offp->cho_valid = DF_DRAM_OFFSET_GET_EN(offset); 2718 2719 switch (umc->umc_df_rev) { 2720 case DF_REV_2: 2721 t = DF_DRAM_OFFSET_V2_GET_OFFSET(offset); 2722 break; 2723 case DF_REV_3: 2724 case DF_REV_3P5: 2725 t = DF_DRAM_OFFSET_V3_GET_OFFSET(offset); 2726 break; 2727 case DF_REV_4: 2728 t = DF_DRAM_OFFSET_V4_GET_OFFSET(offset); 2729 break; 2730 default: 2731 dev_err(umc->umc_dip, CE_WARN, "!encountered " 2732 "unsupported DF revision processing DRAM Offsets: " 2733 "0x%x", umc->umc_df_rev); 2734 return (-1); 2735 } 2736 offp->cho_offset = t << DF_DRAM_OFFSET_SHIFT; 2737 } 2738 2739 /* 2740 * If this platform supports our favorete Zen 3 6-channel hash special 2741 * then we need to grab the NP2 configuration registers. This will only 2742 * be referenced if this channel is actually being used for a 6-channel 2743 * hash, so even if the contents are weird that should still be ok. 2744 */ 2745 if ((umc->umc_fdata->zufd_flags & ZEN_UMC_FAM_F_NP2) != 0) { 2746 uint32_t np2; 2747 int ret; 2748 2749 if ((ret = amdzen_c_df_read32(dfno, instid, DF_NP2_CONFIG_V3, 2750 &np2)) != 0) { 2751 dev_err(umc->umc_dip, CE_WARN, "!failed to read NP2 " 2752 "config: %d", ret); 2753 return (-1); 2754 } 2755 2756 chan->chan_np2_raw = np2; 2757 chan->chan_np2_space0 = DF_NP2_CONFIG_V3_GET_SPACE0(np2); 2758 } 2759 2760 /* 2761 * Now that we have everything we need from the data fabric, read out 2762 * the rest of what we need from the UMC channel data in SMN register 2763 * space. 2764 */ 2765 switch (umc->umc_fdata->zufd_umc_style) { 2766 case ZEN_UMC_UMC_S_DDR4: 2767 case ZEN_UMC_UMC_S_DDR4_APU: 2768 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) { 2769 if (!zen_umc_fill_chan_dimm_ddr4(umc, df, chan, i)) { 2770 return (-1); 2771 } 2772 } 2773 break; 2774 case ZEN_UMC_UMC_S_DDR5: 2775 case ZEN_UMC_UMC_S_DDR5_APU: 2776 for (uint_t i = 0; i < ZEN_UMC_MAX_DIMMS; i++) { 2777 for (uint_t r = 0; r < ZEN_UMC_MAX_CS_PER_DIMM; r++) { 2778 if (!zen_umc_fill_chan_rank_ddr5(umc, df, chan, 2779 i, r)) { 2780 return (-1); 2781 } 2782 } 2783 } 2784 break; 2785 default: 2786 dev_err(umc->umc_dip, CE_WARN, "!encountered unsupported " 2787 "Zen family: 0x%x", umc->umc_fdata->zufd_umc_style); 2788 return (-1); 2789 } 2790 2791 if (!zen_umc_fill_chan(umc, df, chan)) { 2792 return (-1); 2793 } 2794 2795 return (0); 2796 } 2797 2798 /* 2799 * Today there are no privileges for the memory controller information, it is 2800 * restricted based on file system permissions. 2801 */ 2802 static int 2803 zen_umc_open(dev_t *devp, int flag, int otyp, cred_t *credp) 2804 { 2805 zen_umc_t *umc = zen_umc; 2806 2807 if ((flag & (FEXCL | FNDELAY | FNONBLOCK | FWRITE)) != 0) { 2808 return (EINVAL); 2809 } 2810 2811 if (otyp != OTYP_CHR) { 2812 return (EINVAL); 2813 } 2814 2815 if (getminor(*devp) >= umc->umc_ndfs) { 2816 return (ENXIO); 2817 } 2818 2819 return (0); 2820 } 2821 2822 static void 2823 zen_umc_ioctl_decode(zen_umc_t *umc, mc_encode_ioc_t *encode) 2824 { 2825 zen_umc_decoder_t dec; 2826 uint32_t sock, die, comp; 2827 2828 bzero(&dec, sizeof (dec)); 2829 if (!zen_umc_decode_pa(umc, encode->mcei_pa, &dec)) { 2830 encode->mcei_err = (uint32_t)dec.dec_fail; 2831 encode->mcei_errdata = dec.dec_fail_data; 2832 return; 2833 } 2834 2835 encode->mcei_errdata = 0; 2836 encode->mcei_err = 0; 2837 encode->mcei_chan_addr = dec.dec_norm_addr; 2838 encode->mcei_rank_addr = UINT64_MAX; 2839 encode->mcei_board = 0; 2840 zen_fabric_id_decompose(&umc->umc_decomp, dec.dec_targ_fabid, &sock, 2841 &die, &comp); 2842 encode->mcei_chip = sock; 2843 encode->mcei_die = die; 2844 encode->mcei_mc = dec.dec_umc_chan->chan_logid; 2845 encode->mcei_chan = 0; 2846 encode->mcei_dimm = dec.dec_dimm_no; 2847 encode->mcei_row = dec.dec_dimm_row; 2848 encode->mcei_column = dec.dec_dimm_col; 2849 /* 2850 * We don't have a logical rank that something matches to, we have the 2851 * actual chip-select and rank multiplication. If we could figure out 2852 * how to transform that into an actual rank, that'd be grand. 2853 */ 2854 encode->mcei_rank = UINT8_MAX; 2855 encode->mcei_cs = dec.dec_dimm_csno; 2856 encode->mcei_rm = dec.dec_dimm_rm; 2857 encode->mcei_bank = dec.dec_dimm_bank; 2858 encode->mcei_bank_group = dec.dec_dimm_bank_group; 2859 encode->mcei_subchan = dec.dec_dimm_subchan; 2860 } 2861 2862 static void 2863 umc_decoder_pack(zen_umc_t *umc) 2864 { 2865 char *buf = NULL; 2866 size_t len = 0; 2867 2868 ASSERT(MUTEX_HELD(&umc->umc_nvl_lock)); 2869 if (umc->umc_decoder_buf != NULL) { 2870 return; 2871 } 2872 2873 if (umc->umc_decoder_nvl == NULL) { 2874 umc->umc_decoder_nvl = zen_umc_dump_decoder(umc); 2875 if (umc->umc_decoder_nvl == NULL) { 2876 return; 2877 } 2878 } 2879 2880 if (nvlist_pack(umc->umc_decoder_nvl, &buf, &len, NV_ENCODE_XDR, 2881 KM_NOSLEEP_LAZY) != 0) { 2882 return; 2883 } 2884 2885 umc->umc_decoder_buf = buf; 2886 umc->umc_decoder_len = len; 2887 } 2888 2889 static int 2890 zen_umc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 2891 int *rvalp) 2892 { 2893 int ret; 2894 zen_umc_t *umc = zen_umc; 2895 mc_encode_ioc_t encode; 2896 mc_snapshot_info_t info; 2897 2898 if (getminor(dev) >= umc->umc_ndfs) { 2899 return (ENXIO); 2900 } 2901 2902 switch (cmd) { 2903 case MC_IOC_DECODE_PA: 2904 if (crgetzoneid(credp) != GLOBAL_ZONEID || 2905 drv_priv(credp) != 0) { 2906 ret = EPERM; 2907 break; 2908 } 2909 2910 if (ddi_copyin((void *)arg, &encode, sizeof (encode), 2911 mode & FKIOCTL) != 0) { 2912 ret = EFAULT; 2913 break; 2914 } 2915 2916 zen_umc_ioctl_decode(umc, &encode); 2917 ret = 0; 2918 2919 if (ddi_copyout(&encode, (void *)arg, sizeof (encode), 2920 mode & FKIOCTL) != 0) { 2921 ret = EFAULT; 2922 break; 2923 } 2924 break; 2925 case MC_IOC_DECODE_SNAPSHOT_INFO: 2926 mutex_enter(&umc->umc_nvl_lock); 2927 umc_decoder_pack(umc); 2928 2929 if (umc->umc_decoder_buf == NULL) { 2930 mutex_exit(&umc->umc_nvl_lock); 2931 ret = EIO; 2932 break; 2933 } 2934 2935 if (umc->umc_decoder_len > UINT32_MAX) { 2936 mutex_exit(&umc->umc_nvl_lock); 2937 ret = EOVERFLOW; 2938 break; 2939 } 2940 2941 info.mcs_size = umc->umc_decoder_len; 2942 info.mcs_gen = 0; 2943 if (ddi_copyout(&info, (void *)arg, sizeof (info), 2944 mode & FKIOCTL) != 0) { 2945 mutex_exit(&umc->umc_nvl_lock); 2946 ret = EFAULT; 2947 break; 2948 } 2949 2950 mutex_exit(&umc->umc_nvl_lock); 2951 ret = 0; 2952 break; 2953 case MC_IOC_DECODE_SNAPSHOT: 2954 mutex_enter(&umc->umc_nvl_lock); 2955 umc_decoder_pack(umc); 2956 2957 if (umc->umc_decoder_buf == NULL) { 2958 mutex_exit(&umc->umc_nvl_lock); 2959 ret = EIO; 2960 break; 2961 } 2962 2963 if (ddi_copyout(umc->umc_decoder_buf, (void *)arg, 2964 umc->umc_decoder_len, mode & FKIOCTL) != 0) { 2965 mutex_exit(&umc->umc_nvl_lock); 2966 ret = EFAULT; 2967 break; 2968 } 2969 2970 mutex_exit(&umc->umc_nvl_lock); 2971 ret = 0; 2972 break; 2973 default: 2974 ret = ENOTTY; 2975 break; 2976 } 2977 2978 return (ret); 2979 } 2980 2981 static int 2982 zen_umc_close(dev_t dev, int flag, int otyp, cred_t *credp) 2983 { 2984 return (0); 2985 } 2986 2987 static void 2988 zen_umc_cleanup(zen_umc_t *umc) 2989 { 2990 nvlist_free(umc->umc_decoder_nvl); 2991 umc->umc_decoder_nvl = NULL; 2992 if (umc->umc_decoder_buf != NULL) { 2993 kmem_free(umc->umc_decoder_buf, umc->umc_decoder_len); 2994 umc->umc_decoder_buf = NULL; 2995 umc->umc_decoder_len = 0; 2996 } 2997 2998 if (umc->umc_dip != NULL) { 2999 ddi_remove_minor_node(umc->umc_dip, NULL); 3000 } 3001 mutex_destroy(&umc->umc_nvl_lock); 3002 kmem_free(umc, sizeof (zen_umc_t)); 3003 } 3004 3005 static int 3006 zen_umc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3007 { 3008 int ret; 3009 zen_umc_t *umc; 3010 3011 if (cmd == DDI_RESUME) { 3012 return (DDI_SUCCESS); 3013 } else if (cmd != DDI_ATTACH) { 3014 return (DDI_FAILURE); 3015 } 3016 if (zen_umc != NULL) { 3017 dev_err(dip, CE_WARN, "!zen_umc is already attached to a " 3018 "dev_info_t: %p", zen_umc->umc_dip); 3019 return (DDI_FAILURE); 3020 } 3021 3022 /* 3023 * To get us going, we need to do several bits of set up. First, we need 3024 * to use the knowledge about the actual hardware that we're using to 3025 * encode a bunch of different data: 3026 * 3027 * o The set of register styles and extra hardware features that exist 3028 * on the hardware platform. 3029 * o The number of actual rules there are for the CCMs and UMCs. 3030 * o How many actual things exist (DFs, etc.) 3031 * o Useful fabric and instance IDs for all of the different UMC 3032 * entries so we can actually talk to them. 3033 * 3034 * Only once we have all the above will we go dig into the actual data. 3035 */ 3036 umc = kmem_zalloc(sizeof (zen_umc_t), KM_SLEEP); 3037 mutex_init(&umc->umc_nvl_lock, NULL, MUTEX_DRIVER, NULL); 3038 umc->umc_family = chiprev_family(cpuid_getchiprev(CPU)); 3039 umc->umc_ndfs = amdzen_c_df_count(); 3040 umc->umc_dip = dip; 3041 3042 if (!zen_umc_identify(umc)) { 3043 dev_err(dip, CE_WARN, "!encountered unsupported CPU"); 3044 goto err; 3045 } 3046 3047 umc->umc_df_rev = amdzen_c_df_rev(); 3048 switch (umc->umc_df_rev) { 3049 case DF_REV_2: 3050 case DF_REV_3: 3051 case DF_REV_3P5: 3052 case DF_REV_4: 3053 break; 3054 default: 3055 dev_err(dip, CE_WARN, "!encountered unknown DF revision: %x", 3056 umc->umc_df_rev); 3057 goto err; 3058 } 3059 3060 if ((ret = amdzen_c_df_fabric_decomp(&umc->umc_decomp)) != 0) { 3061 dev_err(dip, CE_WARN, "!failed to get fabric decomposition: %d", 3062 ret); 3063 } 3064 3065 umc->umc_tom = rdmsr(MSR_AMD_TOM); 3066 umc->umc_tom2 = rdmsr(MSR_AMD_TOM2); 3067 3068 /* 3069 * For each DF, start by reading all of the data that we need from it. 3070 * This involves finding a target CCM, reading all of the rules, 3071 * ancillary settings, and related. Then we'll do a pass over all of the 3072 * actual UMC targets there. 3073 */ 3074 for (uint_t i = 0; i < umc->umc_ndfs; i++) { 3075 if (amdzen_c_df_iter(i, ZEN_DF_TYPE_CCM_CPU, 3076 zen_umc_fill_ccm_cb, umc) < 0 || 3077 amdzen_c_df_iter(i, ZEN_DF_TYPE_CS_UMC, zen_umc_fill_umc_cb, 3078 umc) != 0) { 3079 goto err; 3080 } 3081 } 3082 3083 /* 3084 * Create a minor node for each df that we encounter. 3085 */ 3086 for (uint_t i = 0; i < umc->umc_ndfs; i++) { 3087 int ret; 3088 char minor[64]; 3089 3090 (void) snprintf(minor, sizeof (minor), "mc-umc-%u", i); 3091 if ((ret = ddi_create_minor_node(umc->umc_dip, minor, S_IFCHR, 3092 i, "ddi_mem_ctrl", 0)) != 0) { 3093 dev_err(dip, CE_WARN, "!failed to create minor %s: %d", 3094 minor, ret); 3095 goto err; 3096 } 3097 } 3098 3099 zen_umc = umc; 3100 return (DDI_SUCCESS); 3101 3102 err: 3103 zen_umc_cleanup(umc); 3104 return (DDI_FAILURE); 3105 } 3106 3107 static int 3108 zen_umc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 3109 { 3110 zen_umc_t *umc; 3111 3112 if (zen_umc == NULL || zen_umc->umc_dip == NULL) { 3113 return (DDI_FAILURE); 3114 } 3115 umc = zen_umc; 3116 3117 switch (cmd) { 3118 case DDI_INFO_DEVT2DEVINFO: 3119 *resultp = (void *)umc->umc_dip; 3120 break; 3121 case DDI_INFO_DEVT2INSTANCE: 3122 *resultp = (void *)(uintptr_t)ddi_get_instance( 3123 umc->umc_dip); 3124 break; 3125 default: 3126 return (DDI_FAILURE); 3127 } 3128 return (DDI_SUCCESS); 3129 } 3130 3131 static int 3132 zen_umc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3133 { 3134 zen_umc_t *umc; 3135 3136 if (cmd == DDI_SUSPEND) { 3137 return (DDI_SUCCESS); 3138 } else if (cmd != DDI_DETACH) { 3139 return (DDI_FAILURE); 3140 } 3141 3142 if (zen_umc == NULL) { 3143 dev_err(dip, CE_WARN, "!asked to detach zen_umc, but it " 3144 "was never successfully attached"); 3145 return (DDI_FAILURE); 3146 } 3147 3148 umc = zen_umc; 3149 zen_umc = NULL; 3150 zen_umc_cleanup(umc); 3151 return (DDI_SUCCESS); 3152 } 3153 3154 static struct cb_ops zen_umc_cb_ops = { 3155 .cb_open = zen_umc_open, 3156 .cb_close = zen_umc_close, 3157 .cb_strategy = nodev, 3158 .cb_print = nodev, 3159 .cb_dump = nodev, 3160 .cb_read = nodev, 3161 .cb_write = nodev, 3162 .cb_ioctl = zen_umc_ioctl, 3163 .cb_devmap = nodev, 3164 .cb_mmap = nodev, 3165 .cb_segmap = nodev, 3166 .cb_chpoll = nochpoll, 3167 .cb_prop_op = ddi_prop_op, 3168 .cb_flag = D_MP, 3169 .cb_rev = CB_REV, 3170 .cb_aread = nodev, 3171 .cb_awrite = nodev 3172 }; 3173 3174 static struct dev_ops zen_umc_dev_ops = { 3175 .devo_rev = DEVO_REV, 3176 .devo_refcnt = 0, 3177 .devo_getinfo = zen_umc_getinfo, 3178 .devo_identify = nulldev, 3179 .devo_probe = nulldev, 3180 .devo_attach = zen_umc_attach, 3181 .devo_detach = zen_umc_detach, 3182 .devo_reset = nodev, 3183 .devo_quiesce = ddi_quiesce_not_needed, 3184 .devo_cb_ops = &zen_umc_cb_ops 3185 }; 3186 3187 static struct modldrv zen_umc_modldrv = { 3188 .drv_modops = &mod_driverops, 3189 .drv_linkinfo = "AMD Zen Unified Memory Controller", 3190 .drv_dev_ops = &zen_umc_dev_ops 3191 }; 3192 3193 static struct modlinkage zen_umc_modlinkage = { 3194 .ml_rev = MODREV_1, 3195 .ml_linkage = { &zen_umc_modldrv, NULL } 3196 }; 3197 3198 int 3199 _init(void) 3200 { 3201 return (mod_install(&zen_umc_modlinkage)); 3202 } 3203 3204 int 3205 _info(struct modinfo *modinfop) 3206 { 3207 return (mod_info(&zen_umc_modlinkage, modinfop)); 3208 } 3209 3210 int 3211 _fini(void) 3212 { 3213 return (mod_remove(&zen_umc_modlinkage)); 3214 } 3215